-
Notifications
You must be signed in to change notification settings - Fork 63
Integrate TraCR as the runtime/kernel profiler for Simpler #1173
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9f7efd7
6c3e026
fd73895
653785a
643537d
f51b297
d5c73c8
2ca1954
f1f53d5
87b9988
aaa3b3a
d610b50
22d49f7
104c330
9d8699f
d1daa48
7171a74
53a28ae
ea0b944
079f147
9373caa
b7f16be
83eef5a
068d11f
279ef8c
3b729e5
d036300
6c6c509
e5d7da4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| [submodule "tools/tracr"] | ||
| path = tools/tracr | ||
| url = https://github.com/huawei-csl/TracR.git |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,6 +70,27 @@ endif() | |
| # Create shared library (host-compatible for dlopen) | ||
| add_library(aicpu_kernel SHARED ${AICPU_SOURCES}) | ||
|
|
||
| # TraCR | ||
| include(${CMAKE_CURRENT_SOURCE_DIR}/../../../../../tools/tracr.cmake) | ||
| tracr_enable(aicpu_kernel) | ||
|
|
||
| # TODO: move this somewhere such that EVERY platform launches this once. Placing this here is hacky... | ||
| # Only build the host-side trace post-processor when TraCR is enabled: it is an | ||
| # offline analysis tool, and it pulls in Linux-only APIs (sched_getcpu) that do | ||
| # not compile on the macOS packaging build. | ||
| if(BUILD_TRACR) | ||
| include(${CMAKE_CURRENT_SOURCE_DIR}/../../../../../tools/tracr_postprocessing_script.cmake) | ||
| endif() | ||
|
|
||
|
Comment on lines
+77
to
+84
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎯 Functional Correctness | 🟠 Major | 🏗️ Heavy lift Hoist TraCR post-processing out of the sim-only target. Lines 77-79 register 🤖 Prompt for AI Agents |
||
| # Optional: to make the Orchestrator run independently (i.e. finalize before letting the schedulers run) | ||
| option(INDEP_ORCH "Run Orchestrator independent from the Schedulers" OFF) | ||
| if(DEFINED ENV{INDEP_ORCH}) | ||
| set(INDEP_ORCH $ENV{INDEP_ORCH}) | ||
| endif() | ||
| if(INDEP_ORCH) | ||
| target_compile_definitions(aicpu_kernel PRIVATE INDEP_ORCH) | ||
| endif() | ||
|
|
||
| option(WERROR "Treat compiler warnings as errors" ON) | ||
| if(DEFINED ENV{SIMPLER_DISABLE_WARNINGS_AS_ERRORS}) | ||
| set(WERROR OFF) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,8 @@ | |
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include <tracr_simpler_api.hpp> | ||
|
|
||
| #include "aicpu/device_phase_aicpu.h" | ||
| #include "aicpu/platform_aicpu_affinity.h" | ||
| #include "callable_protocol.h" | ||
|
|
@@ -253,6 +255,15 @@ int DeviceRunner::run(Runtime &runtime, int block_dim, int launch_aicpu_num) { | |
| worker_count_ = num_aicore; | ||
| runtime.set_aicpu_thread_num(launch_aicpu_num); | ||
|
|
||
| // Initialize TraCR memory on the device | ||
| #ifdef ENABLE_TRACR | ||
| rc = DevAllocTraCR(this, runtime); | ||
| if (rc != 0) { | ||
| LOG_ERROR("DevAllocTraCR failed rc=%d", rc); | ||
| return rc; | ||
| } | ||
| #endif | ||
|
|
||
|
Comment on lines
+258
to
+266
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🩺 Stability & Availability | 🟠 Major | ⚡ Quick win Make the TraCR lifecycle unwind-safe.
Also applies to: 527-535 🤖 Prompt for AI Agents |
||
| int num_aic = block_dim; | ||
| uint32_t enable_profiling_flag = PROFILING_FLAG_NONE; | ||
| if (enable_dump_tensor_) { | ||
|
|
@@ -571,6 +582,15 @@ int DeviceRunner::run(Runtime &runtime, int block_dim, int launch_aicpu_num) { | |
| return runtime_rc; | ||
| } | ||
|
|
||
| // Download and Free TraCR memory from Device and store in memory (~/ascend/) | ||
| #ifdef ENABLE_TRACR | ||
| rc = StoreTracrData(this, runtime); | ||
| if (rc != 0) { | ||
| LOG_ERROR("FreeTraCR failed: %d", rc); | ||
| return -1; | ||
| } | ||
| #endif | ||
|
|
||
| // Tear down collectors. stop() joins mgmt then collector in the only safe | ||
| // order (mgmt's final-drain pass into L2 has poll as its consumer). | ||
| if (enable_l2_swimlane_) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🩺 Stability & Availability | 🟠 Major | ⚡ Quick win
Make the TraCR lifecycle unwind-safe.
DevAllocTraCR()can fail after populating one runtime pointer, and every laterreturnbefore Line 487 skips the only shown free path inStoreTracrData(). The new Line 490 return also bypassesteardown_shared_collectors_after_run()at Line 496. Failed runs will leak TraCR buffers and skip the collector stop/export path. Either guard the TraCR buffers with an all-exit cleanup path here or make the helper transactional, then return the export error only after teardown has run.Also applies to: 485-493
🤖 Prompt for AI Agents