Skip to content

Commit e322831

Browse files
authored
Merge branch 'main' into g4-gguf-export-mem
2 parents 3178334 + 63b4c4d commit e322831

283 files changed

Lines changed: 14487 additions & 1635 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/docker/build.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ case "${IMAGE_NAME}" in
8989
OS_VERSION=24.04
9090
GCC_VERSION=14
9191
;;
92-
executorch-ubuntu-26.04-gcc15)
92+
executorch-ubuntu-26.04-gcc14)
9393
LINTRUNNER=""
9494
OS_VERSION=26.04
95-
GCC_VERSION=15
95+
GCC_VERSION=14
9696
;;
9797
*)
9898
echo "Invalid image name ${IMAGE_NAME}"

.ci/scripts/export_model_artifact.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,9 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
422422
--no-compile
423423
echo "::endgroup::"
424424

425-
# Copy tokenizer for the runner
425+
# Copy tokenizer files for the runner and model-specific serving launcher.
426426
cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
427+
cp "$LOCAL_MODEL_DIR/tokenizer_config.json" "${OUTPUT_DIR}/tokenizer_config.json"
427428

428429
# Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
429430
echo "::group::Export"

.ci/scripts/test_model_e2e.sh

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,4 +447,105 @@ case "$MODEL_NAME" in
447447
esac
448448
echo "::endgroup::"
449449

450+
if [ "$DEVICE" = "cuda" ] && [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
451+
echo "::group::Run $MODEL_NAME OpenAI serving smoke"
452+
pip install -r examples/llm_server/python/requirements.txt "transformers==5.0.0rc1"
453+
python -m pip install --no-deps --no-build-isolation --editable . -v
454+
455+
PORT=$(python - <<'PY'
456+
import socket
457+
458+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
459+
s.bind(("127.0.0.1", 0))
460+
print(s.getsockname()[1])
461+
PY
462+
)
463+
SERVER_LOG=$(mktemp)
464+
WORKER_BIN="cmake-out/examples/models/qwen3_5_moe/qwen3_5_moe_worker"
465+
python -u -m executorch.examples.models.qwen3_5_moe.serve \
466+
--model-path "${MODEL_DIR}/model.pte" \
467+
--data-path "${MODEL_DIR}/aoti_cuda_blob.ptd" \
468+
--tokenizer-path "${MODEL_DIR}/tokenizer.json" \
469+
--hf-tokenizer "${MODEL_DIR}" \
470+
--model-id qwen3.5-moe \
471+
--max-context 4096 \
472+
--max-sessions 2 \
473+
--no-think \
474+
--worker-bin "$WORKER_BIN" \
475+
--host 127.0.0.1 \
476+
--port "$PORT" >"$SERVER_LOG" 2>&1 &
477+
SERVER_PID=$!
478+
479+
cleanup_qwen_server() {
480+
if kill -0 "$SERVER_PID" 2>/dev/null; then
481+
kill "$SERVER_PID" 2>/dev/null || true
482+
wait "$SERVER_PID" 2>/dev/null || true
483+
fi
484+
rm -f "$SERVER_LOG"
485+
}
486+
trap cleanup_qwen_server EXIT
487+
488+
if ! python - "$PORT" "$SERVER_LOG" <<'PY'
489+
import json
490+
import sys
491+
import time
492+
import urllib.request
493+
494+
port = sys.argv[1]
495+
log_path = sys.argv[2]
496+
base = f"http://127.0.0.1:{port}"
497+
498+
499+
def request(path, payload=None):
500+
data = None
501+
headers = {}
502+
if payload is not None:
503+
data = json.dumps(payload).encode("utf-8")
504+
headers["Content-Type"] = "application/json"
505+
req = urllib.request.Request(base + path, data=data, headers=headers)
506+
with urllib.request.urlopen(req, timeout=120) as resp:
507+
return json.loads(resp.read().decode("utf-8"))
508+
509+
510+
last = None
511+
for _ in range(180):
512+
try:
513+
request("/health")
514+
break
515+
except Exception as e:
516+
last = e
517+
time.sleep(1)
518+
else:
519+
print(open(log_path, encoding="utf-8", errors="replace").read())
520+
raise RuntimeError(f"server did not become healthy: {last}")
521+
522+
models = request("/v1/models")
523+
ids = {m["id"] for m in models["data"]}
524+
if "qwen3.5-moe" not in ids:
525+
raise AssertionError(f"qwen3.5-moe missing from /v1/models: {ids}")
526+
527+
body = {
528+
"model": "qwen3.5-moe",
529+
"messages": [{"role": "user", "content": "What is the capital of France?"}],
530+
"max_tokens": 32,
531+
"temperature": 0,
532+
}
533+
resp = request("/v1/chat/completions", body)
534+
content = resp["choices"][0]["message"].get("content") or ""
535+
if "Paris" not in content:
536+
raise AssertionError(f"expected Paris in serving response, got: {content!r}")
537+
538+
print("Qwen3.5-MoE serving smoke passed")
539+
PY
540+
then
541+
echo "Qwen3.5-MoE serving smoke failed; server log:"
542+
cat "$SERVER_LOG"
543+
exit 1
544+
fi
545+
546+
cleanup_qwen_server
547+
trap - EXIT
548+
echo "::endgroup::"
549+
fi
550+
450551
popd

.claude/skills/qualcomm/new_op_development.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,17 @@ class DecomposeMyOp(ExportPass):
217217

218218
### Registration (all decompose passes)
219219
1. `_passes/__init__.py` — import + `__all__`
220-
2. `_passes/qnn_pass_manager.py` — import + `transform_for_annotation_pipeline` + `transform_for_export_pipeline` + `get_capture_program_passes`
221-
3. `_passes/utils.py` — add to `get_passes_dependency_for_capture_program()` with `[RemoveRedundancy]` dependency
220+
2. `_passes/qnn_pass_manager.py` — The pass manager uses classmethods for pipeline definitions:
221+
- **Import** — add to the import block at top of file
222+
- **`get_annotation_passes()`** — add pass class to the returned list (runs before quantizer, ATen IR)
223+
- **`get_export_passes()`** — add pass class if needed for float-only path (runs after quantization, before to-edge)
224+
- **`get_default_pass_activations()`** — add `(PassClass, True)` ONLY if the pass also needs to run in the to-edge pipeline
225+
- **`get_passes_dependency_for_capture_program()`** — add `PassClass: [RemoveRedundancy]` dependency ONLY if also in `get_default_pass_activations`
226+
227+
**When to add to which pipeline:**
228+
- **Annotation only** (most common for decompose passes): `get_annotation_passes()` — pass decomposes the op before the quantizer sees it
229+
- **Export pipeline** too: if the float-only test fails without it (op doesn't get handled by PyTorch's built-in decomposition during to-edge)
230+
- **Capture program** (to-edge) too: if the op can appear in edge dialect and needs decomposition there (e.g., `DecomposeVar`, `DecomposeCDist`, `DecomposeDiagonal`)
222231

223232
---
224233

@@ -255,4 +264,4 @@ class DecomposeMyOp(ExportPass):
255264

256265
**Native QNN Op:** `qnn_constants.py``op_my_op.py``builders/__init__.py``htp_rules.py``lpai_rules.py``layout_transform.py``tests/models.py``test_qnn_delegate.py``partition/utils.py` (skip decomp) → `common_defs.py` (remove to_be_implemented) → `builders/README.md`
257266

258-
**Decompose Pass:** `_passes/decompose_my_op.py``_passes/__init__.py``qnn_pass_manager.py` (annotation + export + capture) → `_passes/utils.py` (dependency) → `tests/models.py``test_qnn_delegate.py``common_defs.py``builders/README.md`
267+
**Decompose Pass:** `_passes/decompose_my_op.py``_passes/__init__.py``qnn_pass_manager.py` (`get_annotation_passes` + optionally `get_export_passes`; if also needed in to-edge: `get_default_pass_activations` + `get_passes_dependency_for_capture_program`) → `tests/models.py``test_qnn_delegate.py``common_defs.py``builders/README.md`

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ exclude =
7575
./configurations,
7676
./docs,
7777
./exir/_serialize/generated/executorch_flatbuffer,
78+
./devtools/bundled_program/serialize/generated,
7879
./third_party,
7980
*.pyi
8081

.github/workflows/build-cadence-runner.yml

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,45 @@ on:
1010
tags:
1111
- ciflow/nightly/*
1212
pull_request:
13+
pull_request_target:
14+
types: [labeled]
1315
workflow_dispatch:
1416

1517
concurrency:
16-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
18+
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number || github.sha }}
1719
cancel-in-progress: true
1820

1921
jobs:
22+
gate:
23+
runs-on: ubuntu-latest
24+
outputs:
25+
run-cadence: ${{ steps.decide.outputs.run }}
26+
steps:
27+
- id: decide
28+
env:
29+
EVENT: ${{ github.event_name }}
30+
IS_FORK: ${{ github.event.pull_request.head.repo.full_name != github.repository }}
31+
HAS_CLA: ${{ contains(github.event.pull_request.labels.*.name, 'CLA Signed') }}
32+
HAS_EXPORT: ${{ contains(github.event.pull_request.labels.*.name, 'meta-exported') }}
33+
run: |
34+
run=false
35+
case "${EVENT}" in
36+
push|schedule|workflow_dispatch)
37+
run=true
38+
;;
39+
pull_request)
40+
[ "${IS_FORK}" = "false" ] && run=true
41+
;;
42+
pull_request_target)
43+
if [ "${IS_FORK}" = "true" ] && [ "${HAS_CLA}" = "true" ] && [ "${HAS_EXPORT}" = "true" ]; then
44+
run=true
45+
fi
46+
;;
47+
esac
48+
echo "run=${run}" >> "${GITHUB_OUTPUT}"
49+
2050
cpu-build:
51+
if: github.event_name != 'pull_request_target'
2152
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2253
permissions:
2354
id-token: write
@@ -44,6 +75,7 @@ jobs:
4475
4576
cpu-test:
4677
needs: cpu-build
78+
if: github.event_name != 'pull_request_target'
4779
permissions:
4880
id-token: write
4981
contents: read
@@ -56,19 +88,23 @@ jobs:
5688
# lives in _xtensa_build.yml. fusion_g3 is omitted until the upstream fusion_g3
5789
# <-> nnlib-FusionG3 API skew is fixed (its runner does not link).
5890
hifi-build:
91+
needs: gate
92+
if: needs.gate.outputs.run-cadence == 'true'
5993
permissions:
6094
id-token: write
6195
contents: read
6296
uses: ./.github/workflows/_xtensa_build.yml
6397
with:
6498
backend: hifi4
65-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
99+
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
66100

67101
vision-build:
102+
needs: gate
103+
if: needs.gate.outputs.run-cadence == 'true'
68104
permissions:
69105
id-token: write
70106
contents: read
71107
uses: ./.github/workflows/_xtensa_build.yml
72108
with:
73109
backend: vision
74-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
110+
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
name: Build CMSIS Pack
7+
8+
on:
9+
schedule:
10+
# Nightly at 03:00 UTC, staggered after nightly.yml (02:00) so the
11+
# shared runner pool isn't hit by both at the same minute.
12+
- cron: 0 3 * * *
13+
release:
14+
# Build (and, for non-prerelease, publish) the pack when a GitHub
15+
# Release is created. The tag the release points at drives the pack
16+
# version via GITHUB_REF below.
17+
types: [published]
18+
push:
19+
branches:
20+
- main
21+
- release/*
22+
paths:
23+
- .github/workflows/build-cmsis-pack.yml
24+
- backends/arm/cmsis_pack/**
25+
- backends/arm/cmsis_pack/scripts/**
26+
- backends/arm/runtime/**
27+
- backends/cortex_m/**
28+
- kernels/portable/**
29+
- kernels/quantized/**
30+
- runtime/**
31+
- schema/**
32+
pull_request:
33+
paths:
34+
- .github/workflows/build-cmsis-pack.yml
35+
- backends/arm/cmsis_pack/**
36+
- backends/arm/cmsis_pack/scripts/**
37+
workflow_dispatch:
38+
inputs:
39+
version_override:
40+
description: 'Override pack version (e.g., 1.2.0). Leave empty to derive from version.txt'
41+
required: false
42+
type: string
43+
44+
concurrency:
45+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
46+
cancel-in-progress: true
47+
48+
jobs:
49+
build-cmsis-pack:
50+
name: build-cmsis-pack
51+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
52+
permissions:
53+
id-token: write
54+
contents: read
55+
with:
56+
runner: linux.2xlarge
57+
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
58+
submodules: 'recursive'
59+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
60+
timeout: 60
61+
upload-artifact: cmsis-pack-artifact
62+
script: |
63+
set -eux
64+
65+
echo "::group::Setup environment"
66+
# The generic Linux job chooses to use base env, not the one setup by the image
67+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
68+
conda activate "${CONDA_ENV}"
69+
70+
source .ci/scripts/utils.sh
71+
install_executorch "--use-pt-pinned-commit"
72+
echo "::endgroup::"
73+
74+
echo "::group::Install ARM toolchain"
75+
.ci/scripts/setup-arm-baremetal-tools.sh
76+
source examples/arm/arm-scratch/setup_path.sh
77+
echo "::endgroup::"
78+
79+
echo "::group::Cross-compile ExecuTorch for Cortex-M"
80+
# Stage 1: Build core ExecuTorch with arm-none-eabi-gcc
81+
# This generates required headers (flatbuffers, schema)
82+
backends/arm/scripts/build_executorch.sh
83+
CMAKE_BUILD_DIR="$(pwd)/cmake-out-arm"
84+
echo "::endgroup::"
85+
86+
echo "::group::Determine pack version"
87+
# Derive version from tag, input override, schedule (nightly), or version.txt
88+
BASE_VER="$(cat version.txt | sed 's/a0$//')"
89+
if [[ -n "${{ inputs.version_override || '' }}" ]]; then
90+
PACK_VERSION="${{ inputs.version_override }}"
91+
elif [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
92+
# Strip leading 'v' and any -rc suffix for release tags
93+
PACK_VERSION="${GITHUB_REF#refs/tags/v}"
94+
elif [[ "${{ github.event_name }}" == "schedule" ]]; then
95+
PACK_VERSION="${BASE_VER}-nightly-$(date -u +%Y%m%d)"
96+
else
97+
PACK_VERSION="${BASE_VER}-dev"
98+
fi
99+
echo "Pack version: ${PACK_VERSION}"
100+
echo "::endgroup::"
101+
102+
echo "::group::Build CMSIS Pack"
103+
backends/arm/cmsis_pack/scripts/build_pack.sh \
104+
--executorch-root "$(pwd)" \
105+
--build-dir "${CMAKE_BUILD_DIR}" \
106+
--version "${PACK_VERSION}" \
107+
--output-dir "$(pwd)/artifacts-to-be-uploaded"
108+
echo "::endgroup::"
109+
110+
# Structural validation and consumer-build smoke are intentionally
111+
# not run in CI yet. See:
112+
# backends/arm/cmsis_pack/test/validate_pack.py (structural)
113+
# backends/arm/cmsis_pack/test/smoke/run.sh (cbuild via
114+
# AVH-MLOps)
115+
# for the local test drivers.
116+
117+
# Attach the pack to the GitHub Release when a non-prerelease release is
118+
# published. Prereleases still build + validate via the release trigger
119+
# but are not published.
120+
publish-cmsis-pack:
121+
if: github.event_name == 'release' && !github.event.release.prerelease
122+
needs: build-cmsis-pack
123+
runs-on: ubuntu-latest
124+
permissions:
125+
contents: write
126+
steps:
127+
- name: Download pack artifact
128+
uses: actions/download-artifact@v4
129+
with:
130+
name: cmsis-pack-artifact
131+
path: pack-output
132+
133+
- name: Upload to GitHub Release
134+
uses: softprops/action-gh-release@v2
135+
with:
136+
files: pack-output/*.pack
137+
tag_name: ${{ github.ref_name }}

.github/workflows/docker-builds.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
executorch-ubuntu-22.04-mediatek-sdk,
4444
executorch-ubuntu-22.04-clang12-android,
4545
executorch-ubuntu-24.04-gcc14,
46-
executorch-ubuntu-26.04-gcc15,
46+
executorch-ubuntu-26.04-gcc14,
4747
]
4848
include:
4949
- docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64

0 commit comments

Comments
 (0)