diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 2d5613ba8..8eceadc4f 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -503,12 +503,12 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // Wire every arena-internal pointer field (host wrote host-mirror // addresses; we overwrite them with device addresses). runtime_wire_arena_pointers(runtime_arena_, rt->prebuilt_layout, rt); - sm_size = PTO2SharedMemoryHandle::calculate_size_per_ring(rt->prebuilt_layout.task_window_sizes); + sm_size = PTO2SharedMemoryHandle::calculate_size_per_ring(rt->prebuilt_layout.sizing.task_window_sizes); for (int r = 0; r < PTO2_MAX_RING_DEPTH; ++r) { LOG_INFO_V0( "Thread %d: Ring %d sizes: task_window=%" PRIu64 " heap=%" PRIu64 " dep_pool=%d", thread_idx, r, - rt->prebuilt_layout.task_window_sizes[r], rt->prebuilt_layout.heap_sizes[r], - rt->prebuilt_layout.dep_pool_capacities[r] + rt->prebuilt_layout.sizing.task_window_sizes[r], rt->prebuilt_layout.sizing.heap_sizes[r], + rt->prebuilt_layout.sizing.dep_pool_capacities[r] ); } } @@ -522,7 +522,8 @@ int32_t AicpuExecutor::run(Runtime *runtime) { AicpuPhaseScope sm_reset(AicpuPhase::SmReset); memset(rt->sm_handle, 0, sizeof(*rt->sm_handle)); if (!rt->sm_handle->init_per_ring( - sm_ptr, sm_size, rt->prebuilt_layout.task_window_sizes, rt->prebuilt_layout.heap_sizes + sm_ptr, sm_size, rt->prebuilt_layout.sizing.task_window_sizes, + rt->prebuilt_layout.sizing.heap_sizes )) { LOG_ERROR("Thread %d: sm_handle->init_per_ring failed", thread_idx); rt = nullptr; @@ -553,7 +554,8 @@ int32_t AicpuExecutor::run(Runtime *runtime) { for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { auto &alloc = orch.rings[r].task_allocator; scope_stats_set_ring_capacity( - r, alloc.window_size(), alloc.heap_capacity(), rt->prebuilt_layout.dep_pool_capacities[r] + r, alloc.window_size(), alloc.heap_capacity(), + rt->prebuilt_layout.sizing.dep_pool_capacities[r] ); } scope_stats_set_tensormap_capacity(orch.tensor_map.pool_capacity()); diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp index dc0ddefb8..5d52a6e3c 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp @@ -501,7 +501,7 @@ static bool ensure_static_arenas(Runtime *runtime, const ArenaSizingConfig &sizi runtime_reserve_layout(sizing_arena, sizing.task_window_sizes, sizing.heap_sizes, sizing.dep_pool_capacities); int64_t t_setup_start = _now_ms(); - if (runtime->host_api.setup_static_arena(sizing.total_heap, sizing.sm_size, layout.arena_size) != 0) { + if (runtime->host_api.setup_static_arena(sizing.total_heap, sizing.sm_size, layout.offsets.arena_size) != 0) { LOG_ERROR("Failed to setup pooled static arena"); return false; } @@ -558,7 +558,7 @@ static bool build_runtime_image( ) { PTO2RuntimeArenaLayout layout = runtime_reserve_layout(*host_arena, sizing.task_window_sizes, sizing.heap_sizes, sizing.dep_pool_capacities); - layout.scheduler_timeout_ms = sizing.scheduler_timeout_ms; + layout.sizing.scheduler_timeout_ms = sizing.scheduler_timeout_ms; if (host_arena->commit(DeviceArena::kDefaultBaseAlign) == nullptr) { LOG_ERROR("Failed to commit host arena for prebuilt runtime image"); return false; @@ -587,12 +587,13 @@ static bool bind_launch_state( ) { runtime->set_orch_args(device_args); - int rc_upload = runtime->host_api.copy_to_device(ptrs.runtime_arena_dev, host_arena.base(), layout.arena_size); + int rc_upload = + runtime->host_api.copy_to_device(ptrs.runtime_arena_dev, host_arena.base(), layout.offsets.arena_size); if (rc_upload != 0) { LOG_ERROR("Failed to rtMemcpy prebuilt runtime arena to device (rc=%d)", rc_upload); return false; } - runtime->set_prebuilt_arena(ptrs.runtime_arena_dev, layout.off_runtime); + runtime->set_prebuilt_arena(ptrs.runtime_arena_dev, layout.offsets.off_runtime); return true; } diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h index ad5537536..510fa3b67 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h @@ -96,30 +96,50 @@ struct PTO2RuntimeOps { }; /** - * Layout descriptor for the prebuilt runtime arena. Holds all sub-region - * offsets (orchestrator / scheduler / sm_handle wrapper / runtime header / - * AICore mailbox) plus the layout-defining capacities. Produced once on the - * host by runtime_reserve_layout(); consumed by runtime_init_data_from_layout - * and runtime_wire_arena_pointers. + * Sizing half of the runtime-arena layout: the capacities that *define* the + * layout (the input to runtime_reserve_layout) plus the scheduler timeout. + * Stable per (callable_id, ring config); re-read at AICPU boot to reconstruct + * ring/heap/dep-pool capacities and the scheduler no-progress budget. */ -struct PTO2RuntimeArenaLayout { +struct ArenaSizingKey { + uint64_t task_window_sizes[PTO2_MAX_RING_DEPTH]{}; + uint64_t heap_sizes[PTO2_MAX_RING_DEPTH]{}; + int32_t dep_pool_capacities[PTO2_MAX_RING_DEPTH]{}; + int32_t scheduler_timeout_ms{0}; +}; + +/** + * Offset half of the runtime-arena layout: every sub-region offset + * (sm_handle wrapper / orchestrator / scheduler / runtime header / AICore + * mailbox) plus the committed arena byte size. The *output* of + * runtime_reserve_layout; consumed by runtime_init_data_from_layout and + * runtime_wire_arena_pointers (the AICPU re-wires arena-internal pointers + * from these after rtMemcpy). + */ +struct ArenaOffsets { size_t off_sm_handle{0}; PTO2OrchestratorLayout orch; PTO2SchedulerLayout sched; size_t off_runtime{0}; size_t off_mailbox{0}; - // Cached parameters (re-used by init_data + wire stages). - uint64_t task_window_sizes[PTO2_MAX_RING_DEPTH]{}; - uint64_t heap_sizes[PTO2_MAX_RING_DEPTH]{}; - int32_t dep_pool_capacities[PTO2_MAX_RING_DEPTH]{}; - int32_t scheduler_timeout_ms{0}; - // Total arena byte size post-commit. Used by host to size the prebuilt // image buffer and as the rtMemcpy length. size_t arena_size{0}; }; +/** + * Layout descriptor for the prebuilt runtime arena. Two named halves with + * distinct lifetimes/semantics: `sizing` is the layout-defining input + * (capacities + scheduler timeout), `offsets` is the computed sub-region + * offsets + arena size. Produced once on the host by runtime_reserve_layout(); + * consumed by runtime_init_data_from_layout and runtime_wire_arena_pointers. + */ +struct PTO2RuntimeArenaLayout { + ArenaSizingKey sizing; + ArenaOffsets offsets; +}; + /** * PTO Runtime2 context * diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp index a3e58c8d6..d5b951000 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp @@ -885,9 +885,9 @@ int32_t SchedulerContext::resolve_and_dispatch(Runtime *runtime, int32_t thread_ // an undefined value. uint64_t last_progress_ts = get_sys_cnt_aicpu(); uint64_t scheduler_timeout_cycles = SCHEDULER_TIMEOUT_CYCLES; - if (rt_ != nullptr && rt_->prebuilt_layout.scheduler_timeout_ms > 0) { - scheduler_timeout_cycles = - static_cast(rt_->prebuilt_layout.scheduler_timeout_ms) * (PLATFORM_PROF_SYS_CNT_FREQ / 1000); + if (rt_ != nullptr && rt_->prebuilt_layout.sizing.scheduler_timeout_ms > 0) { + scheduler_timeout_cycles = static_cast(rt_->prebuilt_layout.sizing.scheduler_timeout_ms) * + (PLATFORM_PROF_SYS_CNT_FREQ / 1000); } while (true) { diff --git a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp index 8806f3dcf..c63c94f92 100644 --- a/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp +++ b/src/a2a3/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp @@ -399,22 +399,22 @@ PTO2RuntimeArenaLayout runtime_reserve_layout( PTO2RuntimeArenaLayout layout{}; for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { - layout.task_window_sizes[r] = task_window_sizes[r]; - layout.heap_sizes[r] = heap_sizes[r]; - layout.dep_pool_capacities[r] = dep_pool_capacities[r]; + layout.sizing.task_window_sizes[r] = task_window_sizes[r]; + layout.sizing.heap_sizes[r] = heap_sizes[r]; + layout.sizing.dep_pool_capacities[r] = dep_pool_capacities[r]; } - layout.off_sm_handle = arena.reserve(sizeof(PTO2SharedMemoryHandle), alignof(PTO2SharedMemoryHandle)); + layout.offsets.off_sm_handle = arena.reserve(sizeof(PTO2SharedMemoryHandle), alignof(PTO2SharedMemoryHandle)); int32_t task_window_sizes_i32[PTO2_MAX_RING_DEPTH]; for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { task_window_sizes_i32[r] = static_cast(task_window_sizes[r]); } - layout.orch = PTO2OrchestratorState::reserve_layout(arena, task_window_sizes_i32, dep_pool_capacities); - layout.sched = PTO2SchedulerState::reserve_layout(arena, dep_pool_capacities); - layout.off_runtime = arena.reserve(sizeof(PTO2Runtime), PTO2_ALIGN_SIZE); - layout.off_mailbox = arena.reserve(sizeof(AICoreCompletionMailbox), alignof(AICoreCompletionMailbox)); + layout.offsets.orch = PTO2OrchestratorState::reserve_layout(arena, task_window_sizes_i32, dep_pool_capacities); + layout.offsets.sched = PTO2SchedulerState::reserve_layout(arena, dep_pool_capacities); + layout.offsets.off_runtime = arena.reserve(sizeof(PTO2Runtime), PTO2_ALIGN_SIZE); + layout.offsets.off_mailbox = arena.reserve(sizeof(AICoreCompletionMailbox), alignof(AICoreCompletionMailbox)); - layout.arena_size = arena.total_size(); + layout.offsets.arena_size = arena.total_size(); return layout; } @@ -433,10 +433,10 @@ PTO2Runtime *runtime_init_data_from_layout( DeviceArena &arena, const PTO2RuntimeArenaLayout &layout, PTO2RuntimeMode mode, void *sm_dev_base, uint64_t /*sm_size*/, void *gm_heap_dev_base, const uint64_t heap_sizes[PTO2_MAX_RING_DEPTH] ) { - PTO2Runtime *rt = static_cast(arena.region_ptr(layout.off_runtime)); + PTO2Runtime *rt = static_cast(arena.region_ptr(layout.offsets.off_runtime)); memset(rt, 0, sizeof(*rt)); - auto *sm_wrap = static_cast(arena.region_ptr(layout.off_sm_handle)); + auto *sm_wrap = static_cast(arena.region_ptr(layout.offsets.off_sm_handle)); memset(sm_wrap, 0, sizeof(*sm_wrap)); // rt->ops is filled by the AICPU at boot. @@ -451,25 +451,25 @@ PTO2Runtime *runtime_init_data_from_layout( rt->total_cycles = 0; if (!rt->orchestrator.init_data_from_layout( - layout.orch, arena, sm_dev_base, gm_heap_dev_base, heap_sizes, layout.task_window_sizes + layout.offsets.orch, arena, sm_dev_base, gm_heap_dev_base, heap_sizes, layout.sizing.task_window_sizes )) { return nullptr; } - if (!rt->scheduler.init_data_from_layout(layout.sched, arena, sm_dev_base)) { + if (!rt->scheduler.init_data_from_layout(layout.offsets.sched, arena, sm_dev_base)) { return nullptr; } - auto *mailbox = static_cast(arena.region_ptr(layout.off_mailbox)); + auto *mailbox = static_cast(arena.region_ptr(layout.offsets.off_mailbox)); memset(mailbox, 0, sizeof(*mailbox)); return rt; } void runtime_wire_arena_pointers(DeviceArena &arena, const PTO2RuntimeArenaLayout &layout, PTO2Runtime *rt) { - rt->sm_handle = static_cast(arena.region_ptr(layout.off_sm_handle)); - rt->aicore_mailbox = static_cast(arena.region_ptr(layout.off_mailbox)); - rt->orchestrator.wire_arena_pointers(layout.orch, arena, &rt->scheduler); - rt->scheduler.wire_arena_pointers(layout.sched, arena); + rt->sm_handle = static_cast(arena.region_ptr(layout.offsets.off_sm_handle)); + rt->aicore_mailbox = static_cast(arena.region_ptr(layout.offsets.off_mailbox)); + rt->orchestrator.wire_arena_pointers(layout.offsets.orch, arena, &rt->scheduler); + rt->scheduler.wire_arena_pointers(layout.offsets.sched, arena); } void runtime_destroy(PTO2Runtime *rt, DeviceArena & /*arena*/) { diff --git a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp index 41a97ace8..9421c4a1d 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/aicpu/aicpu_executor.cpp @@ -500,12 +500,12 @@ int32_t AicpuExecutor::run(Runtime *runtime) { // Wire every arena-internal pointer field (host wrote host-mirror // addresses; we overwrite them with device addresses). runtime_wire_arena_pointers(runtime_arena_, rt->prebuilt_layout, rt); - sm_size = PTO2SharedMemoryHandle::calculate_size_per_ring(rt->prebuilt_layout.task_window_sizes); + sm_size = PTO2SharedMemoryHandle::calculate_size_per_ring(rt->prebuilt_layout.sizing.task_window_sizes); for (int r = 0; r < PTO2_MAX_RING_DEPTH; ++r) { LOG_INFO_V0( "Thread %d: Ring %d sizes: task_window=%" PRIu64 " heap=%" PRIu64 " dep_pool=%d", thread_idx, r, - rt->prebuilt_layout.task_window_sizes[r], rt->prebuilt_layout.heap_sizes[r], - rt->prebuilt_layout.dep_pool_capacities[r] + rt->prebuilt_layout.sizing.task_window_sizes[r], rt->prebuilt_layout.sizing.heap_sizes[r], + rt->prebuilt_layout.sizing.dep_pool_capacities[r] ); } } @@ -519,7 +519,8 @@ int32_t AicpuExecutor::run(Runtime *runtime) { AicpuPhaseScope sm_reset(AicpuPhase::SmReset); memset(rt->sm_handle, 0, sizeof(*rt->sm_handle)); if (!rt->sm_handle->init_per_ring( - sm_ptr, sm_size, rt->prebuilt_layout.task_window_sizes, rt->prebuilt_layout.heap_sizes + sm_ptr, sm_size, rt->prebuilt_layout.sizing.task_window_sizes, + rt->prebuilt_layout.sizing.heap_sizes )) { LOG_ERROR("Thread %d: sm_handle->init_per_ring failed", thread_idx); rt = nullptr; @@ -551,7 +552,8 @@ int32_t AicpuExecutor::run(Runtime *runtime) { for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { auto &alloc = orch.rings[r].task_allocator; scope_stats_set_ring_capacity( - r, alloc.window_size(), alloc.heap_capacity(), rt->prebuilt_layout.dep_pool_capacities[r] + r, alloc.window_size(), alloc.heap_capacity(), + rt->prebuilt_layout.sizing.dep_pool_capacities[r] ); } scope_stats_set_tensormap_capacity(orch.tensor_map.pool_capacity()); diff --git a/src/a5/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp b/src/a5/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp index dc0ddefb8..5d52a6e3c 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/host/runtime_maker.cpp @@ -501,7 +501,7 @@ static bool ensure_static_arenas(Runtime *runtime, const ArenaSizingConfig &sizi runtime_reserve_layout(sizing_arena, sizing.task_window_sizes, sizing.heap_sizes, sizing.dep_pool_capacities); int64_t t_setup_start = _now_ms(); - if (runtime->host_api.setup_static_arena(sizing.total_heap, sizing.sm_size, layout.arena_size) != 0) { + if (runtime->host_api.setup_static_arena(sizing.total_heap, sizing.sm_size, layout.offsets.arena_size) != 0) { LOG_ERROR("Failed to setup pooled static arena"); return false; } @@ -558,7 +558,7 @@ static bool build_runtime_image( ) { PTO2RuntimeArenaLayout layout = runtime_reserve_layout(*host_arena, sizing.task_window_sizes, sizing.heap_sizes, sizing.dep_pool_capacities); - layout.scheduler_timeout_ms = sizing.scheduler_timeout_ms; + layout.sizing.scheduler_timeout_ms = sizing.scheduler_timeout_ms; if (host_arena->commit(DeviceArena::kDefaultBaseAlign) == nullptr) { LOG_ERROR("Failed to commit host arena for prebuilt runtime image"); return false; @@ -587,12 +587,13 @@ static bool bind_launch_state( ) { runtime->set_orch_args(device_args); - int rc_upload = runtime->host_api.copy_to_device(ptrs.runtime_arena_dev, host_arena.base(), layout.arena_size); + int rc_upload = + runtime->host_api.copy_to_device(ptrs.runtime_arena_dev, host_arena.base(), layout.offsets.arena_size); if (rc_upload != 0) { LOG_ERROR("Failed to rtMemcpy prebuilt runtime arena to device (rc=%d)", rc_upload); return false; } - runtime->set_prebuilt_arena(ptrs.runtime_arena_dev, layout.off_runtime); + runtime->set_prebuilt_arena(ptrs.runtime_arena_dev, layout.offsets.off_runtime); return true; } diff --git a/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h b/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h index 156e0eafa..557dbfd32 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h +++ b/src/a5/runtime/tensormap_and_ringbuffer/runtime/pto_runtime2.h @@ -97,30 +97,50 @@ struct PTO2RuntimeOps { }; /** - * Layout descriptor for the prebuilt runtime arena. Holds all sub-region - * offsets (orchestrator / scheduler / sm_handle wrapper / runtime header / - * AICore mailbox) plus the layout-defining capacities. Produced once on the - * host by runtime_reserve_layout(); consumed by runtime_init_data_from_layout - * and runtime_wire_arena_pointers. + * Sizing half of the runtime-arena layout: the capacities that *define* the + * layout (the input to runtime_reserve_layout) plus the scheduler timeout. + * Stable per (callable_id, ring config); re-read at AICPU boot to reconstruct + * ring/heap/dep-pool capacities and the scheduler no-progress budget. */ -struct PTO2RuntimeArenaLayout { +struct ArenaSizingKey { + uint64_t task_window_sizes[PTO2_MAX_RING_DEPTH]{}; + uint64_t heap_sizes[PTO2_MAX_RING_DEPTH]{}; + int32_t dep_pool_capacities[PTO2_MAX_RING_DEPTH]{}; + int32_t scheduler_timeout_ms{0}; +}; + +/** + * Offset half of the runtime-arena layout: every sub-region offset + * (sm_handle wrapper / orchestrator / scheduler / runtime header / AICore + * mailbox) plus the committed arena byte size. The *output* of + * runtime_reserve_layout; consumed by runtime_init_data_from_layout and + * runtime_wire_arena_pointers (the AICPU re-wires arena-internal pointers + * from these after rtMemcpy). + */ +struct ArenaOffsets { size_t off_sm_handle{0}; PTO2OrchestratorLayout orch; PTO2SchedulerLayout sched; size_t off_runtime{0}; size_t off_mailbox{0}; - // Cached parameters (re-used by init_data + wire stages). - uint64_t task_window_sizes[PTO2_MAX_RING_DEPTH]{}; - uint64_t heap_sizes[PTO2_MAX_RING_DEPTH]{}; - int32_t dep_pool_capacities[PTO2_MAX_RING_DEPTH]{}; - int32_t scheduler_timeout_ms{0}; - // Total arena byte size post-commit. Used by host to size the prebuilt // image buffer and as the rtMemcpy length. size_t arena_size{0}; }; +/** + * Layout descriptor for the prebuilt runtime arena. Two named halves with + * distinct lifetimes/semantics: `sizing` is the layout-defining input + * (capacities + scheduler timeout), `offsets` is the computed sub-region + * offsets + arena size. Produced once on the host by runtime_reserve_layout(); + * consumed by runtime_init_data_from_layout and runtime_wire_arena_pointers. + */ +struct PTO2RuntimeArenaLayout { + ArenaSizingKey sizing; + ArenaOffsets offsets; +}; + /** * PTO Runtime2 context * diff --git a/src/a5/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp b/src/a5/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp index af17954ed..e340e042d 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/runtime/scheduler/scheduler_dispatch.cpp @@ -603,9 +603,9 @@ int32_t SchedulerContext::resolve_and_dispatch(Runtime *runtime, int32_t thread_ // an undefined value. uint64_t last_progress_ts = get_sys_cnt_aicpu(); uint64_t scheduler_timeout_cycles = SCHEDULER_TIMEOUT_CYCLES; - if (rt_ != nullptr && rt_->prebuilt_layout.scheduler_timeout_ms > 0) { - scheduler_timeout_cycles = - static_cast(rt_->prebuilt_layout.scheduler_timeout_ms) * (PLATFORM_PROF_SYS_CNT_FREQ / 1000); + if (rt_ != nullptr && rt_->prebuilt_layout.sizing.scheduler_timeout_ms > 0) { + scheduler_timeout_cycles = static_cast(rt_->prebuilt_layout.sizing.scheduler_timeout_ms) * + (PLATFORM_PROF_SYS_CNT_FREQ / 1000); } while (true) { diff --git a/src/a5/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp b/src/a5/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp index 9894f04be..6301a8a26 100644 --- a/src/a5/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp +++ b/src/a5/runtime/tensormap_and_ringbuffer/runtime/shared/pto_runtime2_init.cpp @@ -390,22 +390,22 @@ PTO2RuntimeArenaLayout runtime_reserve_layout( ) { PTO2RuntimeArenaLayout layout{}; for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { - layout.task_window_sizes[r] = task_window_sizes[r]; - layout.heap_sizes[r] = heap_sizes[r]; - layout.dep_pool_capacities[r] = dep_pool_capacities[r]; + layout.sizing.task_window_sizes[r] = task_window_sizes[r]; + layout.sizing.heap_sizes[r] = heap_sizes[r]; + layout.sizing.dep_pool_capacities[r] = dep_pool_capacities[r]; } - layout.off_sm_handle = arena.reserve(sizeof(PTO2SharedMemoryHandle), alignof(PTO2SharedMemoryHandle)); + layout.offsets.off_sm_handle = arena.reserve(sizeof(PTO2SharedMemoryHandle), alignof(PTO2SharedMemoryHandle)); int32_t task_window_sizes_i32[PTO2_MAX_RING_DEPTH]; for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { task_window_sizes_i32[r] = static_cast(task_window_sizes[r]); } - layout.orch = PTO2OrchestratorState::reserve_layout(arena, task_window_sizes_i32, dep_pool_capacities); - layout.sched = PTO2SchedulerState::reserve_layout(arena, dep_pool_capacities); - layout.off_runtime = arena.reserve(sizeof(PTO2Runtime), PTO2_ALIGN_SIZE); - layout.off_mailbox = arena.reserve(sizeof(AICoreCompletionMailbox), alignof(AICoreCompletionMailbox)); + layout.offsets.orch = PTO2OrchestratorState::reserve_layout(arena, task_window_sizes_i32, dep_pool_capacities); + layout.offsets.sched = PTO2SchedulerState::reserve_layout(arena, dep_pool_capacities); + layout.offsets.off_runtime = arena.reserve(sizeof(PTO2Runtime), PTO2_ALIGN_SIZE); + layout.offsets.off_mailbox = arena.reserve(sizeof(AICoreCompletionMailbox), alignof(AICoreCompletionMailbox)); - layout.arena_size = arena.total_size(); + layout.offsets.arena_size = arena.total_size(); return layout; } @@ -424,10 +424,10 @@ PTO2Runtime *runtime_init_data_from_layout( DeviceArena &arena, const PTO2RuntimeArenaLayout &layout, PTO2RuntimeMode mode, void *sm_dev_base, uint64_t /*sm_size*/, void *gm_heap_dev_base, const uint64_t heap_sizes[PTO2_MAX_RING_DEPTH] ) { - PTO2Runtime *rt = static_cast(arena.region_ptr(layout.off_runtime)); + PTO2Runtime *rt = static_cast(arena.region_ptr(layout.offsets.off_runtime)); memset(rt, 0, sizeof(*rt)); - auto *sm_wrap = static_cast(arena.region_ptr(layout.off_sm_handle)); + auto *sm_wrap = static_cast(arena.region_ptr(layout.offsets.off_sm_handle)); memset(sm_wrap, 0, sizeof(*sm_wrap)); // rt->ops is filled by the AICPU at boot. @@ -442,25 +442,25 @@ PTO2Runtime *runtime_init_data_from_layout( rt->total_cycles = 0; if (!rt->orchestrator.init_data_from_layout( - layout.orch, arena, sm_dev_base, gm_heap_dev_base, heap_sizes, layout.task_window_sizes + layout.offsets.orch, arena, sm_dev_base, gm_heap_dev_base, heap_sizes, layout.sizing.task_window_sizes )) { return nullptr; } - if (!rt->scheduler.init_data_from_layout(layout.sched, arena, sm_dev_base)) { + if (!rt->scheduler.init_data_from_layout(layout.offsets.sched, arena, sm_dev_base)) { return nullptr; } - auto *mailbox = static_cast(arena.region_ptr(layout.off_mailbox)); + auto *mailbox = static_cast(arena.region_ptr(layout.offsets.off_mailbox)); memset(mailbox, 0, sizeof(*mailbox)); return rt; } void runtime_wire_arena_pointers(DeviceArena &arena, const PTO2RuntimeArenaLayout &layout, PTO2Runtime *rt) { - rt->sm_handle = static_cast(arena.region_ptr(layout.off_sm_handle)); - rt->aicore_mailbox = static_cast(arena.region_ptr(layout.off_mailbox)); - rt->orchestrator.wire_arena_pointers(layout.orch, arena, &rt->scheduler); - rt->scheduler.wire_arena_pointers(layout.sched, arena); + rt->sm_handle = static_cast(arena.region_ptr(layout.offsets.off_sm_handle)); + rt->aicore_mailbox = static_cast(arena.region_ptr(layout.offsets.off_mailbox)); + rt->orchestrator.wire_arena_pointers(layout.offsets.orch, arena, &rt->scheduler); + rt->scheduler.wire_arena_pointers(layout.offsets.sched, arena); } void runtime_destroy(PTO2Runtime *rt, DeviceArena & /*arena*/) { diff --git a/tests/ut/cpp/a2a3/test_shared_memory.cpp b/tests/ut/cpp/a2a3/test_shared_memory.cpp index 5f76b62d5..eaf546908 100644 --- a/tests/ut/cpp/a2a3/test_shared_memory.cpp +++ b/tests/ut/cpp/a2a3/test_shared_memory.cpp @@ -259,9 +259,9 @@ TEST(RuntimeArenaLayout, PerRingConfigInitializesRuntimeComponents) { EXPECT_EQ(rt->gm_heap_size, total_heap); for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { - EXPECT_EQ(layout.task_window_sizes[r], ws[r]); - EXPECT_EQ(layout.heap_sizes[r], heaps[r]); - EXPECT_EQ(layout.dep_pool_capacities[r], dep_caps[r]); + EXPECT_EQ(layout.sizing.task_window_sizes[r], ws[r]); + EXPECT_EQ(layout.sizing.heap_sizes[r], heaps[r]); + EXPECT_EQ(layout.sizing.dep_pool_capacities[r], dep_caps[r]); EXPECT_EQ(rt->orchestrator.rings[r].task_allocator.window_size(), static_cast(ws[r])); EXPECT_EQ(rt->orchestrator.rings[r].task_allocator.heap_capacity(), heaps[r]); EXPECT_EQ(rt->orchestrator.rings[r].fanin_pool.capacity, dep_caps[r]); @@ -283,7 +283,7 @@ TEST(RuntimeArenaLayout, RejectsOverflowingPerRingHeapSum) { EXPECT_EQ(runtime_init_data_from_layout(runtime_arena, layout, PTO2_MODE_EXECUTE, &sm, 0, &gm, heaps), nullptr); PTO2OrchestratorState orch{}; - EXPECT_FALSE(orch.init_data_from_layout(layout.orch, runtime_arena, &sm, &gm, heaps, ws)); + EXPECT_FALSE(orch.init_data_from_layout(layout.offsets.orch, runtime_arena, &sm, &gm, heaps, ws)); } // ============================================================================= diff --git a/tests/ut/cpp/a5/test_shared_memory.cpp b/tests/ut/cpp/a5/test_shared_memory.cpp index 5f76b62d5..eaf546908 100644 --- a/tests/ut/cpp/a5/test_shared_memory.cpp +++ b/tests/ut/cpp/a5/test_shared_memory.cpp @@ -259,9 +259,9 @@ TEST(RuntimeArenaLayout, PerRingConfigInitializesRuntimeComponents) { EXPECT_EQ(rt->gm_heap_size, total_heap); for (int r = 0; r < PTO2_MAX_RING_DEPTH; r++) { - EXPECT_EQ(layout.task_window_sizes[r], ws[r]); - EXPECT_EQ(layout.heap_sizes[r], heaps[r]); - EXPECT_EQ(layout.dep_pool_capacities[r], dep_caps[r]); + EXPECT_EQ(layout.sizing.task_window_sizes[r], ws[r]); + EXPECT_EQ(layout.sizing.heap_sizes[r], heaps[r]); + EXPECT_EQ(layout.sizing.dep_pool_capacities[r], dep_caps[r]); EXPECT_EQ(rt->orchestrator.rings[r].task_allocator.window_size(), static_cast(ws[r])); EXPECT_EQ(rt->orchestrator.rings[r].task_allocator.heap_capacity(), heaps[r]); EXPECT_EQ(rt->orchestrator.rings[r].fanin_pool.capacity, dep_caps[r]); @@ -283,7 +283,7 @@ TEST(RuntimeArenaLayout, RejectsOverflowingPerRingHeapSum) { EXPECT_EQ(runtime_init_data_from_layout(runtime_arena, layout, PTO2_MODE_EXECUTE, &sm, 0, &gm, heaps), nullptr); PTO2OrchestratorState orch{}; - EXPECT_FALSE(orch.init_data_from_layout(layout.orch, runtime_arena, &sm, &gm, heaps, ws)); + EXPECT_FALSE(orch.init_data_from_layout(layout.offsets.orch, runtime_arena, &sm, &gm, heaps, ws)); } // =============================================================================