Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion backends/qualcomm/debugger/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ After `build_executorch_binary()`, the debugger holds:

Ensure `dump_intermediate_outputs` is enabled in your `QnnConfig` (or pass `--dump_intermediate_outputs` via CLI). Only run **one inference** for debugging — multiple executions are not supported.

**Note:** Intermediate tensor dumping is not currently supported in direct mode on HTP/LPAI backends.

```python
from executorch.examples.qualcomm.utils import SimpleADB

Expand Down Expand Up @@ -266,7 +268,7 @@ python -m examples.qualcomm.util_scripts.qnn_intermediate_debugger_demo -b build
3. Does not support graphs with partitions (partial delegation).
4. Does not support LLM models.
5. Does not support graphs with multiple methods.

6. Intermediate tensor dumping is not currently supported in direct mode on HTP/LPAI backends.

## ExecuTorch QNN HTP Heap Profiling

Expand Down
7 changes: 5 additions & 2 deletions backends/qualcomm/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ def __init__(
self.skip_push = qnn_config.skip_push
self.backend_library_paths = {}

if self.direct_build_folder and self.dump_intermediate_outputs:
raise ValueError(
"Per-tensor dumping is currently not supported in direct mode."
)
if self.direct_build_folder:
direct_general_artifacts = [
f"{self.build_path}/examples/qualcomm/direct_executor_runner/libqnn_executorch_stub.so",
Expand Down Expand Up @@ -437,9 +441,8 @@ def execute(
f"--input_list_path {self.input_list_filename}",
f"--etdump_path {self.etdump_path}",
"--shared_buffer" if self.shared_buffer else "",
f"--debug_output_path {self.debug_output_path}",
(
"--dump_intermediate_outputs"
f"--debug_output_path {self.debug_output_path} --dump_intermediate_outputs"
if self.dump_intermediate_outputs
else ""
),
Expand Down
30 changes: 26 additions & 4 deletions backends/qualcomm/tests/test_qnn_delegate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6110,6 +6110,10 @@ def test_qnn_backend_dump_intermediate_outputs_topk(self):
)

def test_qnn_backend_dump_intermediate_outputs_simple_model(self):
if self.direct_build_folder:
self.skipTest(
"Direct mode does not support per-tensor dumping (HTP/LPAI backends)."
)
backend_options = generate_htp_compiler_spec(use_fp16=True)
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.chipset_table[TestQNN.soc_model],
Expand Down Expand Up @@ -6840,20 +6844,38 @@ def output_callback(log_msg):
)

def test_qnn_backend_dump_intermediate_outputs_simple_model(self):
backend_options = generate_htp_compiler_spec(use_fp16=False)
# TODO: LPAI direct mode support per-tensor dumping.
if self.direct_build_folder:
self.skipTest(
"Direct mode does not support per-tensor dumping (HTP/LPAI backends)."
)
match get_backend_type(self.backend):
case QnnExecuTorchBackendType.kHtpBackend:
backend_options = generate_htp_compiler_spec(use_fp16=False)
expected_compared_events = 14
case QnnExecuTorchBackendType.kLpaiBackend:
backend_options = generate_lpai_compiler_spec(
target_env=self.get_lpai_target_env()
)
# I/O q/dq nodes fall back to CPU via FoldQDQ LPAI workaround
# and are excluded from QNN etdump; update after first LPAI run
expected_compared_events = 17
case _:
raise ValueError("Backend is not implemented yet")
TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.chipset_table[TestQNN.soc_model],
backend_options=backend_options,
dump_intermediate_outputs=True,
)
module = SimpleModel() # noqa: F405
torch.manual_seed(8)
sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28))
module = self.get_qdq_module(module, sample_input)
qdq_module = self.get_qdq_module(module, sample_input)
self.lower_module_and_test_output(
module,
qdq_module,
sample_input,
expected_partitions=1,
expected_compared_events=14,
expected_compared_events=expected_compared_events,
)

def test_qnn_backend_dump_intermediate_outputs_topk(self):
Expand Down
Loading