pytorch
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/build.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 2 additions & 1 deletion b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 101 additions & 0 deletions b/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎.claude/skills/qualcomm/new_op_development.md‎
Lines changed: 12 additions & 3 deletions b/‎.claude/skills/qualcomm/new_op_development.md‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎.flake8‎
Lines changed: 1 addition & 0 deletions b/‎.flake8‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/build-cadence-runner.yml‎
Lines changed: 39 additions & 3 deletions b/‎.github/workflows/build-cadence-runner.yml‎
Lines changed: 39 additions & 3 deletions
diff --git a/‎.github/workflows/build-cmsis-pack.yml‎
Lines changed: 137 additions & 0 deletions b/‎.github/workflows/build-cmsis-pack.yml‎
Lines changed: 137 additions & 0 deletions
diff --git a/‎.github/workflows/docker-builds.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/docker-builds.yml‎
Lines changed: 1 addition & 1 deletion
@@ -89,10 +89,10 @@ case "${IMAGE_NAME}" in
     OS_VERSION=24.04
     GCC_VERSION=14
     ;;
-  executorch-ubuntu-26.04-gcc15)
+  executorch-ubuntu-26.04-gcc14)
     LINTRUNNER=""
     OS_VERSION=26.04
-    GCC_VERSION=15
+    GCC_VERSION=14
     ;;
   *)
     echo "Invalid image name ${IMAGE_NAME}"
 
@@ -422,8 +422,9 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
       --no-compile
   echo "::endgroup::"
 
-  # Copy tokenizer for the runner
+  # Copy tokenizer files for the runner and model-specific serving launcher.
   cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
+  cp "$LOCAL_MODEL_DIR/tokenizer_config.json" "${OUTPUT_DIR}/tokenizer_config.json"
 
   # Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
   echo "::group::Export"
 
@@ -447,4 +447,105 @@ case "$MODEL_NAME" in
 esac
 echo "::endgroup::"
 
+if [ "$DEVICE" = "cuda" ] && [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
+  echo "::group::Run $MODEL_NAME OpenAI serving smoke"
+  pip install -r examples/llm_server/python/requirements.txt "transformers==5.0.0rc1"
+  python -m pip install --no-deps --no-build-isolation --editable . -v
+
+  PORT=$(python - <<'PY'
+import socket
+
+with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+    s.bind(("127.0.0.1", 0))
+    print(s.getsockname()[1])
+PY
+)
+  SERVER_LOG=$(mktemp)
+  WORKER_BIN="cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_worker"
+  python -u -m executorch.examples.models.qwen3_5_moe.serve \
+    --model-path "${MODEL_DIR}/model.pte" \
+    --data-path "${MODEL_DIR}/aoti_cuda_blob.ptd" \
+    --tokenizer-path "${MODEL_DIR}/tokenizer.json" \
+    --hf-tokenizer "${MODEL_DIR}" \
+    --model-id qwen3.5-moe \
+    --max-context 4096 \
+    --max-sessions 2 \
+    --no-think \
+    --worker-bin "$WORKER_BIN" \
+    --host 127.0.0.1 \
+    --port "$PORT" >"$SERVER_LOG" 2>&1 &
+  SERVER_PID=$!
+
+  cleanup_qwen_server() {
+    if kill -0 "$SERVER_PID" 2>/dev/null; then
+      kill "$SERVER_PID" 2>/dev/null || true
+      wait "$SERVER_PID" 2>/dev/null || true
+    fi
+    rm -f "$SERVER_LOG"
+  }
+  trap cleanup_qwen_server EXIT
+
+  if ! python - "$PORT" "$SERVER_LOG" <<'PY'
+import json
+import sys
+import time
+import urllib.request
+
+port = sys.argv[1]
+log_path = sys.argv[2]
+base = f"http://127.0.0.1:{port}"
+
+
+def request(path, payload=None):
+    data = None
+    headers = {}
+    if payload is not None:
+        data = json.dumps(payload).encode("utf-8")
+        headers["Content-Type"] = "application/json"
+    req = urllib.request.Request(base + path, data=data, headers=headers)
+    with urllib.request.urlopen(req, timeout=120) as resp:
+        return json.loads(resp.read().decode("utf-8"))
+
+
+last = None
+for _ in range(180):
+    try:
+        request("/health")
+        break
+    except Exception as e:
+        last = e
+        time.sleep(1)
+else:
+    print(open(log_path, encoding="utf-8", errors="replace").read())
+    raise RuntimeError(f"server did not become healthy: {last}")
+
+models = request("/v1/models")
+ids = {m["id"] for m in models["data"]}
+if "qwen3.5-moe" not in ids:
+    raise AssertionError(f"qwen3.5-moe missing from /v1/models: {ids}")
+
+body = {
+    "model": "qwen3.5-moe",
+    "messages": [{"role": "user", "content": "What is the capital of France?"}],
+    "max_tokens": 32,
+    "temperature": 0,
+}
+resp = request("/v1/chat/completions", body)
+content = resp["choices"][0]["message"].get("content") or ""
+if "Paris" not in content:
+    raise AssertionError(f"expected Paris in serving response, got: {content!r}")
+
+print("Qwen3.5-MoE serving smoke passed")
+PY
+  then
+    echo "Qwen3.5-MoE serving smoke failed; server log:"
+    cat "$SERVER_LOG"
+    exit 1
+  fi
+
+  cleanup_qwen_server
+  trap - EXIT
+  echo "::endgroup::"
+fi
+
 popd
@@ -217,8 +217,17 @@ class DecomposeMyOp(ExportPass):
 
 ### Registration (all decompose passes)
 1. `_passes/__init__.py` — import + `__all__`
-2. `_passes/qnn_pass_manager.py` — import + `transform_for_annotation_pipeline` + `transform_for_export_pipeline` + `get_capture_program_passes`
-3. `_passes/utils.py` — add to `get_passes_dependency_for_capture_program()` with `[RemoveRedundancy]` dependency
+2. `_passes/qnn_pass_manager.py` — The pass manager uses classmethods for pipeline definitions:
+   - **Import** — add to the import block at top of file
+   - **`get_annotation_passes()`** — add pass class to the returned list (runs before quantizer, ATen IR)
+   - **`get_export_passes()`** — add pass class if needed for float-only path (runs after quantization, before to-edge)
+   - **`get_default_pass_activations()`** — add `(PassClass, True)` ONLY if the pass also needs to run in the to-edge pipeline
+   - **`get_passes_dependency_for_capture_program()`** — add `PassClass: [RemoveRedundancy]` dependency ONLY if also in `get_default_pass_activations`
+
+**When to add to which pipeline:**
+- **Annotation only** (most common for decompose passes): `get_annotation_passes()` — pass decomposes the op before the quantizer sees it
+- **Export pipeline** too: if the float-only test fails without it (op doesn't get handled by PyTorch's built-in decomposition during to-edge)
+- **Capture program** (to-edge) too: if the op can appear in edge dialect and needs decomposition there (e.g., `DecomposeVar`, `DecomposeCDist`, `DecomposeDiagonal`)
 
 ---
 
@@ -255,4 +264,4 @@ class DecomposeMyOp(ExportPass):
 
 **Native QNN Op:** `qnn_constants.py` → `op_my_op.py` → `builders/__init__.py` → `htp_rules.py` → `lpai_rules.py` → `layout_transform.py` → `tests/models.py` → `test_qnn_delegate.py` → `partition/utils.py` (skip decomp) → `common_defs.py` (remove to_be_implemented) → `builders/README.md`
 
-**Decompose Pass:** `_passes/decompose_my_op.py` → `_passes/__init__.py` → `qnn_pass_manager.py` (annotation + export + capture) → `_passes/utils.py` (dependency) → `tests/models.py` → `test_qnn_delegate.py` → `common_defs.py` → `builders/README.md`
+**Decompose Pass:** `_passes/decompose_my_op.py` → `_passes/__init__.py` → `qnn_pass_manager.py` (`get_annotation_passes` + optionally `get_export_passes`; if also needed in to-edge: `get_default_pass_activations` + `get_passes_dependency_for_capture_program`) → `tests/models.py` → `test_qnn_delegate.py` → `common_defs.py` → `builders/README.md`
@@ -75,6 +75,7 @@ exclude =
     ./configurations,
     ./docs,
     ./exir/_serialize/generated/executorch_flatbuffer,
+    ./devtools/bundled_program/serialize/generated,
     ./third_party,
     *.pyi
 
 
@@ -10,14 +10,45 @@ on:
     tags:
       - ciflow/nightly/*
   pull_request:
+  pull_request_target:
+    types: [labeled]
   workflow_dispatch:
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number || github.sha }}
   cancel-in-progress: true
 
 jobs:
+  gate:
+    runs-on: ubuntu-latest
+    outputs:
+      run-cadence: ${{ steps.decide.outputs.run }}
+    steps:
+      - id: decide
+        env:
+          EVENT: ${{ github.event_name }}
+          IS_FORK: ${{ github.event.pull_request.head.repo.full_name != github.repository }}
+          HAS_CLA: ${{ contains(github.event.pull_request.labels.*.name, 'CLA Signed') }}
+          HAS_EXPORT: ${{ contains(github.event.pull_request.labels.*.name, 'meta-exported') }}
+        run: |
+          run=false
+          case "${EVENT}" in
+            push|schedule|workflow_dispatch)
+              run=true
+              ;;
+            pull_request)
+              [ "${IS_FORK}" = "false" ] && run=true
+              ;;
+            pull_request_target)
+              if [ "${IS_FORK}" = "true" ] && [ "${HAS_CLA}" = "true" ] && [ "${HAS_EXPORT}" = "true" ]; then
+                run=true
+              fi
+              ;;
+          esac
+          echo "run=${run}" >> "${GITHUB_OUTPUT}"
+
   cpu-build:
+    if: github.event_name != 'pull_request_target'
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
       id-token: write
@@ -44,6 +75,7 @@ jobs:
 
   cpu-test:
     needs: cpu-build
+    if: github.event_name != 'pull_request_target'
     permissions:
       id-token: write
       contents: read
@@ -56,19 +88,23 @@ jobs:
   # lives in _xtensa_build.yml. fusion_g3 is omitted until the upstream fusion_g3
   # <-> nnlib-FusionG3 API skew is fixed (its runner does not link).
   hifi-build:
+    needs: gate
+    if: needs.gate.outputs.run-cadence == 'true'
     permissions:
       id-token: write
       contents: read
     uses: ./.github/workflows/_xtensa_build.yml
     with:
       backend: hifi4
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
 
   vision-build:
+    needs: gate
+    if: needs.gate.outputs.run-cadence == 'true'
     permissions:
       id-token: write
       contents: read
     uses: ./.github/workflows/_xtensa_build.yml
     with:
       backend: vision
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
@@ -0,0 +1,137 @@
+# Copyright 2026 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+name: Build CMSIS Pack
+
+on:
+  schedule:
+    # Nightly at 03:00 UTC, staggered after nightly.yml (02:00) so the
+    # shared runner pool isn't hit by both at the same minute.
+    - cron: 0 3 * * *
+  release:
+    # Build (and, for non-prerelease, publish) the pack when a GitHub
+    # Release is created. The tag the release points at drives the pack
+    # version via GITHUB_REF below.
+    types: [published]
+  push:
+    branches:
+      - main
+      - release/*
+    paths:
+      - .github/workflows/build-cmsis-pack.yml
+      - backends/arm/cmsis_pack/**
+      - backends/arm/cmsis_pack/scripts/**
+      - backends/arm/runtime/**
+      - backends/cortex_m/**
+      - kernels/portable/**
+      - kernels/quantized/**
+      - runtime/**
+      - schema/**
+  pull_request:
+    paths:
+      - .github/workflows/build-cmsis-pack.yml
+      - backends/arm/cmsis_pack/**
+      - backends/arm/cmsis_pack/scripts/**
+  workflow_dispatch:
+    inputs:
+      version_override:
+        description: 'Override pack version (e.g., 1.2.0). Leave empty to derive from version.txt'
+        required: false
+        type: string
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  build-cmsis-pack:
+    name: build-cmsis-pack
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 60
+      upload-artifact: cmsis-pack-artifact
+      script: |
+        set -eux
+
+        echo "::group::Setup environment"
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        source .ci/scripts/utils.sh
+        install_executorch "--use-pt-pinned-commit"
+        echo "::endgroup::"
+
+        echo "::group::Install ARM toolchain"
+        .ci/scripts/setup-arm-baremetal-tools.sh
+        source examples/arm/arm-scratch/setup_path.sh
+        echo "::endgroup::"
+
+        echo "::group::Cross-compile ExecuTorch for Cortex-M"
+        # Stage 1: Build core ExecuTorch with arm-none-eabi-gcc
+        # This generates required headers (flatbuffers, schema)
+        backends/arm/scripts/build_executorch.sh
+        CMAKE_BUILD_DIR="$(pwd)/cmake-out-arm"
+        echo "::endgroup::"
+
+        echo "::group::Determine pack version"
+        # Derive version from tag, input override, schedule (nightly), or version.txt
+        BASE_VER="$(cat version.txt | sed 's/a0$//')"
+        if [[ -n "${{ inputs.version_override || '' }}" ]]; then
+          PACK_VERSION="${{ inputs.version_override }}"
+        elif [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
+          # Strip leading 'v' and any -rc suffix for release tags
+          PACK_VERSION="${GITHUB_REF#refs/tags/v}"
+        elif [[ "${{ github.event_name }}" == "schedule" ]]; then
+          PACK_VERSION="${BASE_VER}-nightly-$(date -u +%Y%m%d)"
+        else
+          PACK_VERSION="${BASE_VER}-dev"
+        fi
+        echo "Pack version: ${PACK_VERSION}"
+        echo "::endgroup::"
+
+        echo "::group::Build CMSIS Pack"
+        backends/arm/cmsis_pack/scripts/build_pack.sh \
+          --executorch-root "$(pwd)" \
+          --build-dir "${CMAKE_BUILD_DIR}" \
+          --version "${PACK_VERSION}" \
+          --output-dir "$(pwd)/artifacts-to-be-uploaded"
+        echo "::endgroup::"
+
+        # Structural validation and consumer-build smoke are intentionally
+        # not run in CI yet. See:
+        #   backends/arm/cmsis_pack/test/validate_pack.py    (structural)
+        #   backends/arm/cmsis_pack/test/smoke/run.sh        (cbuild via
+        #                                                     AVH-MLOps)
+        # for the local test drivers.
+
+  # Attach the pack to the GitHub Release when a non-prerelease release is
+  # published. Prereleases still build + validate via the release trigger
+  # but are not published.
+  publish-cmsis-pack:
+    if: github.event_name == 'release' && !github.event.release.prerelease
+    needs: build-cmsis-pack
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Download pack artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: cmsis-pack-artifact
+          path: pack-output
+
+      - name: Upload to GitHub Release
+        uses: softprops/action-gh-release@v2
+        with:
+          files: pack-output/*.pack
+          tag_name: ${{ github.ref_name }}
@@ -43,7 +43,7 @@ jobs:
           executorch-ubuntu-22.04-mediatek-sdk,
           executorch-ubuntu-22.04-clang12-android,
           executorch-ubuntu-24.04-gcc14,
-          executorch-ubuntu-26.04-gcc15,
+          executorch-ubuntu-26.04-gcc14,
         ]
         include:
           - docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64
Original file line number	Diff line number	Diff line change
`@@ -43,7 +43,7 @@ jobs:`
`43`	`43`	`executorch-ubuntu-22.04-mediatek-sdk,`
`44`	`44`	`executorch-ubuntu-22.04-clang12-android,`
`45`	`45`	`executorch-ubuntu-24.04-gcc14,`
`46`		`- executorch-ubuntu-26.04-gcc15,`
	`46`	`+ executorch-ubuntu-26.04-gcc14,`
`47`	`47`	`]`
`48`	`48`	`include:`
`49`	`49`	`- docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64`