pytorch · qti-horodnic · Apr 24, 2026
@@ -0,0 +1,18 @@
+[run]
+branch = True
+source = backends/qualcomm/genai_pipeline
+
+omit =
+    */__init__.py
+    */tests/*
+    */__pycache__/*
+    pipeline_types.py
+
+[report]
+show_missing = True
+
+omit =
+    */__init__.py
+    */tests/*
+    */__pycache__/*
+    pipeline_types.py
@@ -0,0 +1,49 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+__version__ = "1.0.0"
+
+from backends.qualcomm.genai_pipeline.configs import (
+    CompilationInputConfig,
+    CompilationOutputConfig,
+    InferenceInputConfig,
+    InferenceOutputConfig,
+    ModelPreparationInputConfig,
+    ModelPreparationOutputConfig,
+    QuantizationInputConfig,
+    QuantizationOutputConfig,
+)
+from backends.qualcomm.genai_pipeline.engine_proxy import EngineProxy
+from backends.qualcomm.genai_pipeline.exceptions import (
+    ConfigValidationError,
+    EngineNotAvailableError,
+    PipelineError,
+    StageError,
+)
+from backends.qualcomm.genai_pipeline.pipeline_context import (
+    PipelineContext,
+    PipelineContextBuilder,
+)
+from backends.qualcomm.genai_pipeline.pipeline_types import EngineType
+
+__all__ = [
+    "CompilationInputConfig",
+    "CompilationOutputConfig",
+    "ConfigValidationError",
+    "EngineNotAvailableError",
+    "EngineProxy",
+    "EngineType",
+    "InferenceInputConfig",
+    "InferenceOutputConfig",
+    "ModelPreparationInputConfig",
+    "ModelPreparationOutputConfig",
+    "PipelineContext",
+    "PipelineContextBuilder",
+    "PipelineError",
+    "QuantizationInputConfig",
+    "QuantizationOutputConfig",
+    "StageError",
+]
@@ -0,0 +1,41 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from backends.qualcomm.genai_pipeline.configs.compilation_input_config import (
+    CompilationInputConfig,
+)
+from backends.qualcomm.genai_pipeline.configs.compilation_output_config import (
+    CompilationOutputConfig,
+)
+from backends.qualcomm.genai_pipeline.configs.inference_input_config import (
+    InferenceInputConfig,
+)
+from backends.qualcomm.genai_pipeline.configs.inference_output_config import (
+    InferenceOutputConfig,
+)
+from backends.qualcomm.genai_pipeline.configs.model_preparation_input_config import (
+    ModelPreparationInputConfig,
+)
+from backends.qualcomm.genai_pipeline.configs.model_preparation_output_config import (
+    ModelPreparationOutputConfig,
+)
+from backends.qualcomm.genai_pipeline.configs.quantization_input_config import (
+    QuantizationInputConfig,
+)
+from backends.qualcomm.genai_pipeline.configs.quantization_output_config import (
+    QuantizationOutputConfig,
+)
+
+__all__ = [
+    "CompilationInputConfig",
+    "CompilationOutputConfig",
+    "InferenceInputConfig",
+    "InferenceOutputConfig",
+    "ModelPreparationInputConfig",
+    "ModelPreparationOutputConfig",
+    "QuantizationInputConfig",
+    "QuantizationOutputConfig",
+]
@@ -0,0 +1,38 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import List, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from executorch.backends.qualcomm.serialization.qc_schema import (
+        QcomChipset,
+        QnnExecuTorchBackendType,
+    )
+    from executorch.exir.backend.compile_spec import CompileSpec
+    from torch import nn
+
+
+@dataclass
+class CompilationInputConfig:
+    """Input configuration for the compilation stage.
+
+    Attributes:
+        soc_model: The target SoC (e.g., QcomChipset.SM8750). Required.
+        backend_type: QNN backend type (HTP, GPU, LPAI, etc.). Required.
+        model: The nn.Module to compile (quantized or original for FP16 mode).
+        artifact_dir: Directory to store compiled artifacts.
+        compile_specs: QNN compiler specifications for backend delegation.
+    """
+
+    soc_model: "QcomChipset"
+    backend_type: "QnnExecuTorchBackendType"
+    model: Optional["nn.Module"] = None
+    artifact_dir: Path = field(default_factory=lambda: Path("."))
+    compile_specs: Optional[List["CompileSpec"]] = None
@@ -0,0 +1,29 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from executorch.devtools.etrecord import ETRecord
+
+
+@dataclass
+class CompilationOutputConfig:
+    """Output produced by the compilation stage.
+
+    Attributes:
+        artifact_paths: Paths to the compiled artifacts (.pte files).
+            List to support multi-split models where compilation produces
+            multiple .pte files (e.g., prefill + decode).
+        etrecord: Optional ETRecord for debugging. ExecuTorch engine only.
+    """
+
+    artifact_paths: Optional[List[Path]] = None
+    etrecord: Optional["ETRecord"] = None
@@ -0,0 +1,36 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset
+
+
+@dataclass
+class InferenceInputConfig:
+    """Input configuration for the inference stage.
+
+    Attributes:
+        soc_model: The target SoC (e.g., QcomChipset.SM8750). Required.
+        artifact_paths: Paths to compiled model artifacts (.pte files).
+            List to support multi-split models (e.g., prefill + decode).
+        tokenizer: The tokenizer instance for encoding/decoding.
+        runtime_tokenizer_path: Path to runtime tokenizer for on-device use.
+        prompt: The user prompt(s) for text generation.
+        inference_options: Engine-specific inference options.
+    """
+
+    soc_model: "QcomChipset"
+    artifact_paths: Optional[List[Path]] = None
+    tokenizer: Any = None
+    runtime_tokenizer_path: Optional[Path] = None
+    prompt: Optional[List[str]] = None
+    inference_options: Dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,30 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from executorch.devtools.etdump.schema_flatcc import ETDump
+
+
+@dataclass
+class InferenceOutputConfig:
+    """Output produced by the inference stage.
+
+    Attributes:
+        inference_results: Generated text output(s) from the model.
+        performance_metrics: Performance data (e.g., TTFT, tokens/sec).
+        eval_results: Evaluation metric results (e.g., SQNR, perplexity).
+        etdump: Optional ETDump for debugging. ExecuTorch engine only.
+    """
+
+    inference_results: Optional[List[str]] = None
+    performance_metrics: Dict[str, Any] = field(default_factory=dict)
+    eval_results: Dict[str, Any] = field(default_factory=dict)
+    etdump: Optional["ETDump"] = None
@@ -0,0 +1,23 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass, field
+from typing import Any, Dict
+
+
+@dataclass
+class ModelPreparationInputConfig:
+    """Input configuration for the model preparation stage.
+
+    Attributes:
+        model_name: Model identifier (e.g., "llama3_2-1b_instruct"). Required.
+        soc_model: Target SoC (e.g., "SM8750"). Required.
+        extra_options: Additional model-preparation-specific options.
+    """
+
+    model_name: str
+    soc_model: str
+    extra_options: Dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,33 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, List, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from torch import nn
+
+
+@dataclass
+class ModelPreparationOutputConfig:
+    """Output produced by the model preparation stage.
+
+    Attributes:
+        model_module: The prepared nn.Module ready for quantization.
+        tokenizer: The tokenizer instance for encoding/decoding text.
+        calibration_data: Dataset samples for calibration during quantization.
+        runtime_tokenizer_path: Path to runtime tokenizer for on-device inference.
+        chat_template: Optional chat template for instruct models.
+    """
+
+    model_module: Optional["nn.Module"] = None
+    tokenizer: Any = None
+    calibration_data: Optional[List[Any]] = None
+    runtime_tokenizer_path: Optional[Path] = None
+    chat_template: Optional[str] = None
@@ -0,0 +1,38 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from executorch.backends.qualcomm.serialization.qc_schema import (
+        QcomChipset,
+        QnnExecuTorchBackendType,
+    )
+    from torch import nn
+
+
+@dataclass
+class QuantizationInputConfig:
+    """Input configuration for the quantization stage.
+
+    Attributes:
+        soc_model: The target SoC (e.g., QcomChipset.SM8750). Required.
+        backend_type: QNN backend type (HTP, GPU, LPAI, etc.). Required.
+        model_module: The nn.Module to quantize.
+        calibration_data: Calibration dataset samples.
+        quant_recipe: Quantization recipe (per-layer bit widths, group sizes, etc.).
+        extra_options: Additional quantization-specific options.
+    """
+
+    soc_model: "QcomChipset"
+    backend_type: "QnnExecuTorchBackendType"
+    model_module: Optional["nn.Module"] = None
+    calibration_data: Optional[List[Any]] = None
+    quant_recipe: Any = None
+    extra_options: Dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,19 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from dataclasses import dataclass
+from typing import Any
+
+
+@dataclass
+class QuantizationOutputConfig:
+    """Output produced by the quantization stage.
+
+    Attributes:
+        quantized_model: The quantized nn.Module or path to saved QDQ model.
+    """
+
+    quantized_model: Any = None