Skip to content

Commit 5be276f

Browse files
committed
Arm backend: support embedded PTE semihosting in executorch runner
- Allow `arm_executor_runner` to reuse `model_pte.h` in semihosting mode via `ET_COMPILED_PTE` - Keep semihosting available for host-side prompt and tensor I/O while exercising the embedded-PTE execution path - Make the semihosting file size configurable - update the runner argument handling for semihosted embedded-PTE flows Change-Id: Ia1b5596decd4350eaf243c36dc4da4d8df406f04 Signed-off-by: Xingguo Li <xingguo.li@arm.com>
1 parent 3f0e901 commit 5be276f

2 files changed

Lines changed: 107 additions & 66 deletions

File tree

examples/arm/executor_runner/CMakeLists.txt

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,7 @@ if(NOT ET_MODEL_PTE_ADDR
152152
)
153153
endif()
154154

155-
if(NOT SEMIHOSTING
156-
AND NOT ET_MODEL_PTE_ADDR
157-
AND NOT "${ET_PTE_FILE_PATH}" STREQUAL ""
158-
)
155+
if(NOT ET_MODEL_PTE_ADDR AND NOT "${ET_PTE_FILE_PATH}" STREQUAL "")
159156
if(NOT EXISTS "${ET_PTE_FILE_PATH}")
160157
message(
161158
FATAL_ERROR
@@ -228,7 +225,7 @@ if(NOT CMAKE_SKIP_INSTALL_RULES AND TARGET ethosu_core_driver)
228225
endif()
229226

230227
# Convert pte to header
231-
if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING)
228+
if(NOT ET_MODEL_PTE_ADDR AND NOT "${ET_PTE_FILE_PATH}" STREQUAL "")
232229
add_custom_target(
233230
gen_model_header DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h
234231
)
@@ -319,8 +316,7 @@ list(
319316
# EXECUTORCH_SELECT_OPS_MODEL to include ops automatically. If the pte contains
320317
# no undelegated ops, use neither.
321318
set(FOUND_OPS_IN_FILE FALSE)
322-
if(NOT SEMIHOSTING
323-
AND NOT ET_MODEL_PTE_ADDR
319+
if(NOT ET_MODEL_PTE_ADDR
324320
AND NOT "${ET_PTE_FILE_PATH}" STREQUAL ""
325321
AND EXISTS "${ET_PTE_FILE_PATH}"
326322
)
@@ -337,7 +333,7 @@ if(NOT SEMIHOSTING
337333
endif()
338334
endif()
339335

340-
if(SEMIHOSTING)
336+
if(SEMIHOSTING AND "${ET_PTE_FILE_PATH}" STREQUAL "")
341337
set(EXECUTORCH_SELECT_OPS_MODEL "")
342338
message(
343339
"gen_oplist: Building with semihosting, no model is used to auto generate ops from will use EXECUTORCH_SELECT_OPS_LIST=${EXECUTORCH_SELECT_OPS_LIST}"
@@ -506,7 +502,7 @@ target_compile_definitions(
506502
arm_executor_runner PRIVATE C10_USING_CUSTOM_GENERATED_MACROS
507503
)
508504

509-
if(NOT "${ET_MODEL_PTE_ADDR}" AND NOT SEMIHOSTING)
505+
if(NOT ET_MODEL_PTE_ADDR AND NOT "${ET_PTE_FILE_PATH}" STREQUAL "")
510506
add_dependencies(arm_executor_runner gen_model_header)
511507
endif()
512508

@@ -569,6 +565,10 @@ if(SEMIHOSTING)
569565
target_compile_definitions(arm_executor_runner PUBLIC SEMIHOSTING)
570566
endif()
571567

568+
if(ET_PTE_FILE_PATH)
569+
target_compile_definitions(arm_executor_runner PUBLIC ET_COMPILED_PTE)
570+
endif()
571+
572572
# Memory buffer sizes for Executorch flow
573573

574574
if(ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE)
@@ -579,6 +579,14 @@ if(ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE)
579579
)
580580
endif()
581581

582+
if(DEFINED ET_ARM_BAREMETAL_SEMIHOSTING_FILE_ALLOCATOR_POOL_SIZE)
583+
target_compile_definitions(
584+
arm_executor_runner
585+
PUBLIC
586+
ET_ARM_BAREMETAL_SEMIHOSTING_FILE_ALLOCATOR_POOL_SIZE=${ET_ARM_BAREMETAL_SEMIHOSTING_FILE_ALLOCATOR_POOL_SIZE}
587+
)
588+
endif()
589+
582590
target_compile_definitions(
583591
arm_executor_runner
584592
PUBLIC

examples/arm/executor_runner/arm_executor_runner.cpp

Lines changed: 90 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@
1818
* a c-array named model_pte and put into model_pte.h
1919
* this is placed in network_model_sec linker section
2020
* that is controlled by your memory mode via the
21-
* ETHOSU_MODEL cmake parameter.
22-
* If SEMIHOSTING is define this is not used
21+
* ETHOSU_MODEL cmake parameter. This is not used by the
22+
* semihosting path, which either loads the model from a
23+
* file or can reuse an embedded model with
24+
* ET_COMPILED_PTE.
25+
* ET_COMPILED_PTE - In SEMIHOSTING mode, reuse the model embedded in
26+
* model_pte.h instead of passing the PTE as a host file.
2327
* ET_NUM_INFERENCES - Numbers of times to run the inference
2428
* ET_LOG_DUMP_INPUT - Control if you want input to be dumped to the log.
2529
* ET_LOG_DUMP_OUTPUT - Control if you want output to be dumped to the log.
@@ -61,12 +65,12 @@
6165
* as guidance if timeing adaptor values are set correctly.
6266
*
6367
* SEMIHOSTING - When using the FVP simulator it can be built to access your dev
64-
* machines filesystem, this is used for testing models in
65-
* unittest/pytest and a special version of the runner is built
66-
* to read model and input as files and output is saved to the
67-
* filesystem. The backends/arm/test/setup_testing.sh script will
68-
* build this for you so you can use it from pytest to test with
69-
* the FVP simulator.
68+
* machines filesystem. This is used both for unit-test style
69+
* flows that load model and input files from the host and for
70+
* host-driven prompt/input/output exchange while still reusing an
71+
* embedded PTE via ET_COMPILED_PTE. The
72+
* backends/arm/test/setup_testing.sh script builds the unittest
73+
* configuration used with the FVP simulator.
7074
*
7175
* Memory areas used:
7276
* You might want to configure this differently on your HW, like maybe all
@@ -79,6 +83,14 @@
7983
* ET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE - Size of memory area
8084
* used when setting up
8185
* the model
86+
* ET_ARM_BAREMETAL_SEMIHOSTING_FILE_ALLOCATOR_POOL_SIZE
87+
* - Size of memory area
88+
* used to hold
89+
* semihosted files,
90+
* including input
91+
* tensors and, when
92+
* applicable, an
93+
* external PTE file
8294
* ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE - Size of memory area
8395
* used when running
8496
* inferences
@@ -140,24 +152,27 @@
140152
* files/memory
141153
*/
142154

143-
const size_t input_file_allocation_pool_size = 60 * 1024 * 1024;
155+
#if !defined(ET_ARM_BAREMETAL_SEMIHOSTING_FILE_ALLOCATOR_POOL_SIZE)
156+
#define ET_ARM_BAREMETAL_SEMIHOSTING_FILE_ALLOCATOR_POOL_SIZE (60 * 1024 * 1024)
157+
#endif
158+
const size_t input_file_allocation_pool_size =
159+
ET_ARM_BAREMETAL_SEMIHOSTING_FILE_ALLOCATOR_POOL_SIZE;
144160
unsigned char __attribute__((
145161
section("input_data_sec"),
146162
aligned(16))) input_file_allocation_pool[input_file_allocation_pool_size];
147-
char* model_pte = nullptr;
163+
#endif
148164

149-
#else
150-
#if defined(ET_MODEL_PTE_ADDR)
165+
#if defined(ET_MODEL_PTE_ADDR) && defined(ET_COMPILED_PTE)
166+
#error "ET_MODEL_PTE_ADDR and ET_COMPILED_PTE are mutually exclusive"
167+
#endif
151168

152-
/**
153-
* Set ET_MODEL_PTE_ADDR to the memory address where your PTE is placed
154-
* e.g. if you for example flash it to 0x7000000 set
155-
* -DET_MODEL_PTE_ADDR=0x7000000 You can run the Corstone FVP with the --data
156-
* flag to place it on a address if you use the FVP.
157-
*/
158-
char* model_pte = reinterpret_cast<char*>(ET_MODEL_PTE_ADDR);
169+
#if !defined(ET_MODEL_PTE_ADDR) && !defined(ET_COMPILED_PTE) && \
170+
!defined(SEMIHOSTING)
171+
#error \
172+
"One of ET_MODEL_PTE_ADDR, ET_COMPILED_PTE, or SEMIHOSTING must be defined"
173+
#endif
159174

160-
#else
175+
#if !defined(ET_MODEL_PTE_ADDR) && defined(ET_COMPILED_PTE)
161176
/**
162177
* This header file is generated by the build process based on the .pte file
163178
* specified in the ET_PTE_FILE_PATH variable to the cmake build.
@@ -169,7 +184,6 @@ char* model_pte = reinterpret_cast<char*>(ET_MODEL_PTE_ADDR);
169184
*/
170185
#include "model_pte.h"
171186
#endif
172-
#endif
173187

174188
using executorch::aten::ScalarType;
175189
using executorch::aten::Tensor;
@@ -543,6 +557,7 @@ std::pair<char*, size_t> read_binary_file(
543557
fclose(fp);
544558
return std::make_pair(buffer, read_size);
545559
}
560+
546561
#endif
547562

548563
/// Holds all state needed for setup and run phases
@@ -557,7 +572,7 @@ struct RunnerContext {
557572
size_t executor_membase = 0;
558573
size_t program_data_len = 0;
559574
size_t input_memsize = 0;
560-
size_t pte_size = 0;
575+
size_t model_data_size = 0;
561576
bool bundle_io = false;
562577
Box<BufferDataLoader> loader;
563578
Box<Program> program;
@@ -581,21 +596,22 @@ struct RunnerContext {
581596

582597
void runner_init(
583598
RunnerContext& ctx,
584-
std::vector<std::pair<char*, size_t>> input_buffers,
585-
size_t pte_size) {
599+
const uint8_t* model_data,
600+
size_t model_size,
601+
std::vector<std::pair<char*, size_t>> input_buffers) {
586602
// Find the offset to the embedded Program.
587-
const void* program_data = model_pte;
588-
ctx.program_data_len = pte_size;
589-
ctx.pte_size = pte_size;
603+
const void* program_data = model_data;
604+
ctx.program_data_len = model_size;
605+
ctx.model_data_size = model_size;
590606

591607
#if defined(ET_BUNDLE_IO)
592608
ctx.bundle_io = executorch::bundled_program::is_bundled_program(
593-
reinterpret_cast<void*>(model_pte), ctx.pte_size);
609+
const_cast<uint8_t*>(model_data), ctx.model_data_size);
594610
if (ctx.bundle_io) {
595611
// BundleIO bpte is provided, dig out the actual model from the data area
596612
Error status = executorch::bundled_program::get_program_data(
597-
reinterpret_cast<void*>(model_pte),
598-
ctx.pte_size,
613+
const_cast<uint8_t*>(model_data),
614+
ctx.model_data_size,
599615
&program_data,
600616
&ctx.program_data_len);
601617

@@ -780,7 +796,7 @@ void runner_init(
780796
// Useful for testing
781797
ET_LOG(Info, "Input testset[%d] from bundled bpte", testset_idx);
782798
Error status = executorch::bundled_program::load_bundled_input(
783-
*ctx.method.value(), model_pte, testset_idx);
799+
*ctx.method.value(), model_data, testset_idx);
784800
ET_CHECK_MSG(
785801
status == Error::Ok,
786802
"load_bundled_input failed with status 0x%" PRIx32,
@@ -857,7 +873,7 @@ void log_mem_status(RunnerContext& ctx) {
857873
ET_LOG(
858874
Info,
859875
"model_pte_loaded_size: %lu bytes. (pte size unknown when not baked into elf)",
860-
static_cast<unsigned long>(ctx.pte_size));
876+
static_cast<unsigned long>(ctx.model_data_size));
861877
#else
862878
ET_LOG(
863879
Info,
@@ -866,7 +882,7 @@ void log_mem_status(RunnerContext& ctx) {
866882
ET_LOG(
867883
Info,
868884
"model_pte_loaded_size: %lu bytes.",
869-
static_cast<unsigned long>(ctx.pte_size));
885+
static_cast<unsigned long>(ctx.model_data_size));
870886
#endif
871887

872888
#if defined(SEMIHOSTING)
@@ -1149,13 +1165,13 @@ void write_etdump(RunnerContext& ctx) {
11491165
// cppcheck-suppress constParameterReference
11501166
// ET_BUNDLE_IO verification passes ctx.method into devtools/bundled_program
11511167
// helpers, which currently require a non-const Method&.
1152-
bool verify_result(RunnerContext& ctx, const void* model_pte) {
1168+
bool verify_result(RunnerContext& ctx, const void* model_data) {
11531169
bool model_ok = false;
11541170
#if defined(ET_BUNDLE_IO)
11551171
if (ctx.bundle_io) {
11561172
// Check result
11571173
ErrorStats stats = compute_method_output_error_stats(
1158-
*ctx.method.value(), model_pte, testset_idx);
1174+
*ctx.method.value(), model_data, testset_idx);
11591175
if (stats.status == Error::Ok) {
11601176
ET_LOG(Info, "=== Error stats for testset %d ===", testset_idx);
11611177
ET_LOG(Info, " mean_absolute_error: %f", stats.mean_abs_error);
@@ -1172,7 +1188,7 @@ bool verify_result(RunnerContext& ctx, const void* model_pte) {
11721188

11731189
// Verify the result.
11741190
Error status = verify_method_outputs(
1175-
*ctx.method.value(), model_pte, testset_idx, et_rtol, et_atol);
1191+
*ctx.method.value(), model_data, testset_idx, et_rtol, et_atol);
11761192
if (status == Error::Ok) {
11771193
ET_LOG(Info, "Model output match expected BundleIO bpte ref data.");
11781194
ET_LOG(Info, "TEST: BundleIO index[%d] Test_result: PASS", testset_idx);
@@ -1194,14 +1210,14 @@ bool verify_result(RunnerContext& ctx, const void* model_pte) {
11941210
}
11951211
#else // defined(ET_BUNDLE_IO)
11961212
(void)ctx;
1197-
(void)model_pte;
1213+
(void)model_data;
11981214
// No checking done, assume true
11991215
model_ok = true;
12001216
#endif // defined(ET_BUNDLE_IO)
12011217
return model_ok;
12021218
}
12031219

1204-
bool run_model(RunnerContext& ctx, const void* model_pte) {
1220+
bool run_model(RunnerContext& ctx, const void* model_data) {
12051221
Error status;
12061222
ET_LOG(Info, "Starting running %d inferences...", num_inferences);
12071223
int n = 0;
@@ -1229,7 +1245,7 @@ bool run_model(RunnerContext& ctx, const void* model_pte) {
12291245

12301246
ET_LOG(Info, "%d inferences finished", num_inferences);
12311247
print_outputs(ctx);
1232-
bool model_ok = verify_result(ctx, model_pte);
1248+
bool model_ok = verify_result(ctx, model_data);
12331249
ET_LOG(Info, "Model run: %d", model_ok);
12341250

12351251
return model_ok;
@@ -1240,6 +1256,14 @@ bool run_model(RunnerContext& ctx, const void* model_pte) {
12401256
int main(int argc, const char* argv[]) {
12411257
#if defined(SEMIHOSTING)
12421258
ET_LOG(Info, "Running executor with parameter:");
1259+
#if defined(ET_COMPILED_PTE)
1260+
if (argc < 5) {
1261+
ET_LOG(Fatal, "Not right number of parameters!");
1262+
ET_LOG(Fatal, "app -o output_basename -i input.bin [-i input2.bin]");
1263+
ET_LOG(Fatal, "Exiting!");
1264+
_exit(1);
1265+
}
1266+
#else
12431267
if (argc < 7) {
12441268
ET_LOG(Fatal, "Not right number of parameters!");
12451269
ET_LOG(
@@ -1248,6 +1272,7 @@ int main(int argc, const char* argv[]) {
12481272
ET_LOG(Fatal, "Exiting!");
12491273
_exit(1);
12501274
}
1275+
#endif
12511276
ET_LOG(Info, " %s", argv[0]);
12521277
for (int i = 1; i < argc; i++) {
12531278
ET_LOG(Info, " %s %s", argv[i], argv[++i]);
@@ -1259,14 +1284,18 @@ int main(int argc, const char* argv[]) {
12591284

12601285
executorch::runtime::runtime_init();
12611286
std::vector<std::pair<char*, size_t>> input_buffers;
1287+
const uint8_t* model_data = nullptr;
1288+
size_t model_size = 0;
12621289

12631290
#if defined(ET_MODEL_PTE_ADDR)
1264-
// pte not in a known array but just on a memory/flash address
1265-
// As we dont know the size we pick something big enough
1266-
// Actual model is read from this area.
1267-
size_t pte_size = 0x10000000;
1268-
#else
1269-
size_t pte_size = sizeof(model_pte);
1291+
// Read the PTE from a fixed memory/flash address configured via
1292+
// -DET_MODEL_PTE_ADDR=<address>. Since the runner does not know the exact
1293+
// size up front, use a large upper bound for the buffer span.
1294+
model_data = reinterpret_cast<const uint8_t*>(ET_MODEL_PTE_ADDR);
1295+
model_size = 0x10000000;
1296+
#elif defined(ET_COMPILED_PTE)
1297+
model_data = model_pte;
1298+
model_size = sizeof(model_pte);
12701299
#endif
12711300

12721301
RunnerContext ctx;
@@ -1307,10 +1336,8 @@ int main(int argc, const char* argv[]) {
13071336
_exit(1);
13081337
}
13091338

1310-
// Store the model data with the same variable as if it was loaded
1311-
// from compiled in location.
1312-
model_pte = buffer;
1313-
pte_size = buffer_size;
1339+
model_data = reinterpret_cast<const uint8_t*>(buffer);
1340+
model_size = buffer_size;
13141341
} else if (std::strcmp(argv[i], "-o") == 0) {
13151342
// store the base filename to write output to.
13161343
ctx.output_basename = argv[++i];
@@ -1320,17 +1347,23 @@ int main(int argc, const char* argv[]) {
13201347

13211348
// Byte 4-7 is usually a nice magic number that could be good to print to make
13221349
// sure it's OK ETxx for PTE and BPxx for bundled pte where xx is a number.
1350+
// cppcheck-suppress knownConditionTrueFalse
1351+
if (model_data == nullptr || model_size == 0) {
1352+
ET_LOG(Fatal, "Model data is not initialized");
1353+
return 1;
1354+
}
13231355
ET_LOG(
13241356
Info,
13251357
"PTE @ %p [----%c%c%c%c]",
1326-
model_pte,
1327-
model_pte[4],
1328-
model_pte[5],
1329-
model_pte[6],
1330-
model_pte[7]);
1331-
1332-
runner_init(ctx, input_buffers, pte_size);
1333-
bool model_ok = run_model(ctx, model_pte);
1358+
model_data,
1359+
model_data[4],
1360+
model_data[5],
1361+
model_data[6],
1362+
model_data[7]);
1363+
1364+
runner_init(ctx, model_data, model_size, input_buffers);
1365+
bool model_ok = true;
1366+
model_ok = run_model(ctx, model_data);
13341367
ET_LOG(Info, "Model run: %d", model_ok);
13351368

13361369
log_mem_status(ctx);

0 commit comments

Comments
 (0)