Skip to content

Commit 4dc27a2

Browse files
committed
Qualcomm AI Engine Direct - tensor dumping at lpai backend
Summary: Currently, using --dump_intermediate_outputs and RPC mode will dump tensor Test plan: python backends/qualcomm/tests/test_qnn_delegate.py -k TestQNNQuantizedUtils.test_qnn_backend_dump_intermediate_outputs_simple_model -b build-android -s ${SN} -m ${CHIPID} --backend lpai python backends/qualcomm/tests/test_qnn_delegate.py -k TestQNNQuantizedUtils.test_qnn_backend_dump_intermediate_outputs_simple_model -b build-android -s ${SN} -m ${CHIPID} --dump_intermediate_outputs --backend lpai
1 parent ef5c8a7 commit 4dc27a2

3 files changed

Lines changed: 34 additions & 7 deletions

File tree

backends/qualcomm/debugger/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ After `build_executorch_binary()`, the debugger holds:
156156

157157
Ensure `dump_intermediate_outputs` is enabled in your `QnnConfig` (or pass `--dump_intermediate_outputs` via CLI). Only run **one inference** for debugging — multiple executions are not supported.
158158

159+
**Note:** Intermediate tensor dumping is not currently supported in direct mode on HTP/LPAI backends.
160+
159161
```python
160162
from executorch.examples.qualcomm.utils import SimpleADB
161163

@@ -266,7 +268,7 @@ python -m examples.qualcomm.util_scripts.qnn_intermediate_debugger_demo -b build
266268
3. Does not support graphs with partitions (partial delegation).
267269
4. Does not support LLM models.
268270
5. Does not support graphs with multiple methods.
269-
271+
6. Intermediate tensor dumping is not currently supported in direct mode on HTP/LPAI backends.
270272

271273
## ExecuTorch QNN HTP Heap Profiling
272274

backends/qualcomm/export_utils.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,10 @@ def __init__(
276276
self.skip_push = qnn_config.skip_push
277277
self.backend_library_paths = {}
278278

279+
if self.direct_build_folder and self.dump_intermediate_outputs:
280+
raise ValueError(
281+
"Per-tensor dumping is currently not supported in direct mode."
282+
)
279283
if self.direct_build_folder:
280284
direct_general_artifacts = [
281285
f"{self.build_path}/examples/qualcomm/direct_executor_runner/libqnn_executorch_stub.so",
@@ -437,9 +441,8 @@ def execute(
437441
f"--input_list_path {self.input_list_filename}",
438442
f"--etdump_path {self.etdump_path}",
439443
"--shared_buffer" if self.shared_buffer else "",
440-
f"--debug_output_path {self.debug_output_path}",
441444
(
442-
"--dump_intermediate_outputs"
445+
f"--debug_output_path {self.debug_output_path} --dump_intermediate_outputs"
443446
if self.dump_intermediate_outputs
444447
else ""
445448
),

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6110,6 +6110,10 @@ def test_qnn_backend_dump_intermediate_outputs_topk(self):
61106110
)
61116111

61126112
def test_qnn_backend_dump_intermediate_outputs_simple_model(self):
6113+
if self.direct_build_folder:
6114+
self.skipTest(
6115+
"Direct mode does not support per-tensor dumping (HTP/LPAI backends)."
6116+
)
61136117
backend_options = generate_htp_compiler_spec(use_fp16=True)
61146118
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
61156119
soc_model=self.chipset_table[TestQNN.soc_model],
@@ -6840,20 +6844,38 @@ def output_callback(log_msg):
68406844
)
68416845

68426846
def test_qnn_backend_dump_intermediate_outputs_simple_model(self):
6843-
backend_options = generate_htp_compiler_spec(use_fp16=False)
6847+
# TODO: LPAI direct mode support per-tensor dumping.
6848+
if self.direct_build_folder:
6849+
self.skipTest(
6850+
"Direct mode does not support per-tensor dumping (HTP/LPAI backends)."
6851+
)
6852+
match get_backend_type(self.backend):
6853+
case QnnExecuTorchBackendType.kHtpBackend:
6854+
backend_options = generate_htp_compiler_spec(use_fp16=False)
6855+
expected_compared_events = 14
6856+
case QnnExecuTorchBackendType.kLpaiBackend:
6857+
backend_options = generate_lpai_compiler_spec(
6858+
target_env=self.get_lpai_target_env()
6859+
)
6860+
# I/O q/dq nodes fall back to CPU via FoldQDQ LPAI workaround
6861+
# and are excluded from QNN etdump; update after first LPAI run
6862+
expected_compared_events = 17
6863+
case _:
6864+
raise ValueError("Backend is not implemented yet")
68446865
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
68456866
soc_model=self.chipset_table[TestQNN.soc_model],
68466867
backend_options=backend_options,
68476868
dump_intermediate_outputs=True,
68486869
)
68496870
module = SimpleModel() # noqa: F405
6871+
torch.manual_seed(8)
68506872
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
6851-
module = self.get_qdq_module(module, sample_input)
6873+
qdq_module = self.get_qdq_module(module, sample_input)
68526874
self.lower_module_and_test_output(
6853-
module,
6875+
qdq_module,
68546876
sample_input,
68556877
expected_partitions=1,
6856-
expected_compared_events=14,
6878+
expected_compared_events=expected_compared_events,
68576879
)
68586880

68596881
def test_qnn_backend_dump_intermediate_outputs_topk(self):

0 commit comments

Comments
 (0)