Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions backends/qualcomm/.coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[run]
branch = True
source = backends/qualcomm/genai_pipeline

omit =
*/__init__.py
*/tests/*
*/__pycache__/*
pipeline_types.py

[report]
show_missing = True

omit =
*/__init__.py
*/tests/*
*/__pycache__/*
pipeline_types.py
49 changes: 49 additions & 0 deletions backends/qualcomm/genai_pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

__version__ = "1.0.0"

from backends.qualcomm.genai_pipeline.configs import (
CompilationInputConfig,
CompilationOutputConfig,
InferenceInputConfig,
InferenceOutputConfig,
ModelPreparationInputConfig,
ModelPreparationOutputConfig,
QuantizationInputConfig,
QuantizationOutputConfig,
)
from backends.qualcomm.genai_pipeline.engine_proxy import EngineProxy
from backends.qualcomm.genai_pipeline.exceptions import (
ConfigValidationError,
EngineNotAvailableError,
PipelineError,
StageError,
)
from backends.qualcomm.genai_pipeline.pipeline_context import (
PipelineContext,
PipelineContextBuilder,
)
from backends.qualcomm.genai_pipeline.pipeline_types import EngineType

__all__ = [
"CompilationInputConfig",
"CompilationOutputConfig",
"ConfigValidationError",
"EngineNotAvailableError",
"EngineProxy",
"EngineType",
"InferenceInputConfig",
"InferenceOutputConfig",
"ModelPreparationInputConfig",
"ModelPreparationOutputConfig",
"PipelineContext",
"PipelineContextBuilder",
"PipelineError",
"QuantizationInputConfig",
"QuantizationOutputConfig",
"StageError",
]
41 changes: 41 additions & 0 deletions backends/qualcomm/genai_pipeline/configs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from backends.qualcomm.genai_pipeline.configs.compilation_input_config import (
CompilationInputConfig,
)
from backends.qualcomm.genai_pipeline.configs.compilation_output_config import (
CompilationOutputConfig,
)
from backends.qualcomm.genai_pipeline.configs.inference_input_config import (
InferenceInputConfig,
)
from backends.qualcomm.genai_pipeline.configs.inference_output_config import (
InferenceOutputConfig,
)
from backends.qualcomm.genai_pipeline.configs.model_preparation_input_config import (
ModelPreparationInputConfig,
)
from backends.qualcomm.genai_pipeline.configs.model_preparation_output_config import (
ModelPreparationOutputConfig,
)
from backends.qualcomm.genai_pipeline.configs.quantization_input_config import (
QuantizationInputConfig,
)
from backends.qualcomm.genai_pipeline.configs.quantization_output_config import (
QuantizationOutputConfig,
)

__all__ = [
"CompilationInputConfig",
"CompilationOutputConfig",
"InferenceInputConfig",
"InferenceOutputConfig",
"ModelPreparationInputConfig",
"ModelPreparationOutputConfig",
"QuantizationInputConfig",
"QuantizationOutputConfig",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from executorch.backends.qualcomm.serialization.qc_schema import (
QcomChipset,
QnnExecuTorchBackendType,
)
from executorch.exir.backend.compile_spec import CompileSpec
from torch import nn


@dataclass
class CompilationInputConfig:
"""Input configuration for the compilation stage.

Attributes:
soc_model: The target SoC (e.g., QcomChipset.SM8750). Required.
backend_type: QNN backend type (HTP, GPU, LPAI, etc.). Required.
model: The nn.Module to compile (quantized or original for FP16 mode).
artifact_dir: Directory to store compiled artifacts.
compile_specs: QNN compiler specifications for backend delegation.
"""

soc_model: "QcomChipset"
backend_type: "QnnExecuTorchBackendType"
model: Optional["nn.Module"] = None
artifact_dir: Path = field(default_factory=lambda: Path("."))
compile_specs: Optional[List["CompileSpec"]] = None
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from executorch.devtools.etrecord import ETRecord


@dataclass
class CompilationOutputConfig:
"""Output produced by the compilation stage.

Attributes:
artifact_paths: Paths to the compiled artifacts (.pte files).
List to support multi-split models where compilation produces
multiple .pte files (e.g., prefill + decode).
etrecord: Optional ETRecord for debugging. ExecuTorch engine only.
"""

artifact_paths: Optional[List[Path]] = None
etrecord: Optional["ETRecord"] = None
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset


@dataclass
class InferenceInputConfig:
"""Input configuration for the inference stage.

Attributes:
soc_model: The target SoC (e.g., QcomChipset.SM8750). Required.
artifact_paths: Paths to compiled model artifacts (.pte files).
List to support multi-split models (e.g., prefill + decode).
tokenizer: The tokenizer instance for encoding/decoding.
runtime_tokenizer_path: Path to runtime tokenizer for on-device use.
prompt: The user prompt(s) for text generation.
inference_options: Engine-specific inference options.
"""

soc_model: "QcomChipset"
artifact_paths: Optional[List[Path]] = None
tokenizer: Any = None
runtime_tokenizer_path: Optional[Path] = None
prompt: Optional[List[str]] = None
inference_options: Dict[str, Any] = field(default_factory=dict)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from executorch.devtools.etdump.schema_flatcc import ETDump


@dataclass
class InferenceOutputConfig:
"""Output produced by the inference stage.

Attributes:
inference_results: Generated text output(s) from the model.
performance_metrics: Performance data (e.g., TTFT, tokens/sec).
eval_results: Evaluation metric results (e.g., SQNR, perplexity).
etdump: Optional ETDump for debugging. ExecuTorch engine only.
"""

inference_results: Optional[List[str]] = None
performance_metrics: Dict[str, Any] = field(default_factory=dict)
eval_results: Dict[str, Any] = field(default_factory=dict)
etdump: Optional["ETDump"] = None
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from dataclasses import dataclass, field
from typing import Any, Dict


@dataclass
class ModelPreparationInputConfig:
"""Input configuration for the model preparation stage.

Attributes:
model_name: Model identifier (e.g., "llama3_2-1b_instruct"). Required.
soc_model: Target SoC (e.g., "SM8750"). Required.
extra_options: Additional model-preparation-specific options.
"""

model_name: str
soc_model: str
extra_options: Dict[str, Any] = field(default_factory=dict)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Any, List, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from torch import nn


@dataclass
class ModelPreparationOutputConfig:
"""Output produced by the model preparation stage.

Attributes:
model_module: The prepared nn.Module ready for quantization.
tokenizer: The tokenizer instance for encoding/decoding text.
calibration_data: Dataset samples for calibration during quantization.
runtime_tokenizer_path: Path to runtime tokenizer for on-device inference.
chat_template: Optional chat template for instruct models.
"""

model_module: Optional["nn.Module"] = None
tokenizer: Any = None
calibration_data: Optional[List[Any]] = None
runtime_tokenizer_path: Optional[Path] = None
chat_template: Optional[str] = None
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from executorch.backends.qualcomm.serialization.qc_schema import (
QcomChipset,
QnnExecuTorchBackendType,
)
from torch import nn


@dataclass
class QuantizationInputConfig:
"""Input configuration for the quantization stage.

Attributes:
soc_model: The target SoC (e.g., QcomChipset.SM8750). Required.
backend_type: QNN backend type (HTP, GPU, LPAI, etc.). Required.
model_module: The nn.Module to quantize.
calibration_data: Calibration dataset samples.
quant_recipe: Quantization recipe (per-layer bit widths, group sizes, etc.).
extra_options: Additional quantization-specific options.
"""

soc_model: "QcomChipset"
backend_type: "QnnExecuTorchBackendType"
model_module: Optional["nn.Module"] = None
calibration_data: Optional[List[Any]] = None
quant_recipe: Any = None
extra_options: Dict[str, Any] = field(default_factory=dict)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from dataclasses import dataclass
from typing import Any


@dataclass
class QuantizationOutputConfig:
"""Output produced by the quantization stage.

Attributes:
quantized_model: The quantized nn.Module or path to saved QDQ model.
"""

quantized_model: Any = None
Loading
Loading