From cd8b990d24b2a6e3fd903a633793dd968b1a5dd0 Mon Sep 17 00:00:00 2001 From: superxf <1208713646@qq.com> Date: Wed, 1 Jul 2026 10:04:45 +0800 Subject: [PATCH] add qwen3 test ci --- .github/workflows/ci.yml | 169 +++++++++++++++++++++++++++-------- tests/test_qwen3_accuracy.py | 92 +++++++++++++++++++ 2 files changed, 226 insertions(+), 35 deletions(-) create mode 100644 tests/test_qwen3_accuracy.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9be0f1..3d81d78 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,52 +27,151 @@ jobs: extra_args: --all-files unit-tests: - runs-on: ubuntu-latest + runs-on: [self-hosted, linux, arm64, npu] timeout-minutes: 30 + defaults: + run: + working-directory: dist-checkout + env: + ASCEND_HOME_PATH: /usr/local/Ascend/cann-9.0.0 + PTOAS_ROOT: ${{ github.workspace }}/dist-checkout/ptoas-bin + PTO_ISA_ROOT: ${{ github.workspace }}/dist-checkout/pto-isa + CMAKE_BUILD_PARALLEL_LEVEL: 16 + CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: ccache + CCACHE_DIR: /home/ci-runner/hw-native-sys-pypto-lib/ci-cache/ccache + CCACHE_MAXSIZE: 30G + # Shared with the container-side sim job (root) via the same ci-cache dir; + # 000 keeps cache files mutually writable across root and ci-runner. + CCACHE_UMASK: '000' + # Per-job pip dir: unlike ccache, pip has no shared-umask knob, so a shared + # pip cache would leave files owned by whichever job wrote first and block + # the other. The sim job uses ci-cache/pip-sim (see its bind mount). + PIP_CACHE_DIR: /home/ci-runner/hw-native-sys-pypto-lib/ci-cache/pip-a2a3 + steps: - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 with: - python-version: "3.10" + path: dist-checkout + submodules: true - - name: Cache pip packages - uses: actions/cache@v4 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-unit-tests - restore-keys: | - ${{ runner.os }}-pip- + - name: Resolve toolchain + sync pypto source + # Sync the cached pypto checkout ONCE here; the Build step below reuses it, + # so the toolchain pins and the installed pypto are always the same + # revision. pypto owns ptoas (toolchain/versions.env) and pins pto-isa via + # its runtime submodule (runtime/pto_isa.pin); read both. + run: | + PYPTO_SRC="/home/ci-runner/hw-native-sys-pypto-lib/ci-cache/pypto-src" + if [ -d "$PYPTO_SRC/.git" ]; then + echo "Cache hit — updating pypto" + git -C "$PYPTO_SRC" fetch --depth=1 origin HEAD + git -C "$PYPTO_SRC" reset --hard FETCH_HEAD + git -C "$PYPTO_SRC" clean -ffdx + git -C "$PYPTO_SRC" submodule foreach --recursive 'git reset --hard && git clean -ffdx' || true + else + echo "Cache miss — cloning pypto" + git clone --recurse-submodules --depth=1 https://github.com/hw-native-sys/pypto.git "$PYPTO_SRC" + fi + git -C "$PYPTO_SRC" submodule update --init --recursive + rm -rf "$PYPTO_SRC/build" "$PYPTO_SRC/_skbuild" "$PYPTO_SRC/runtime/build" + { + grep -E '^PTOAS_VERSION=' "$PYPTO_SRC/toolchain/versions.env" + echo "PTOAS_SHA256=$(sed -n 's/^PTOAS_SHA256_AARCH64=//p' "$PYPTO_SRC/toolchain/versions.env")" + echo "PTO_ISA_COMMIT=$(tr -d '[:space:]' < "$PYPTO_SRC/runtime/pto_isa.pin")" + } >> "$GITHUB_ENV" + + - name: Check NPU + working-directory: . + run: npu-smi info - - name: Install dependencies + # See the sim job: namespace the cache by pto-isa commit so an ISA bump + # forces a real recompile instead of reusing stale objects (issue #1139). + - name: Scope ccache to pto-isa version + run: echo "CCACHE_NAMESPACE=pto-isa-${PTO_ISA_COMMIT}" >> "$GITHUB_ENV" + + - name: Clone pto-isa repository (pinned) run: | - python -m pip install --upgrade pip - pip install nanobind - pip install pytest - pip install torch --index-url https://download.pytorch.org/whl/cpu + rm -rf "$PTO_ISA_ROOT" + timeout 60 git clone https://github.com/hw-native-sys/pto-isa.git "$PTO_ISA_ROOT" \ + || { rm -rf "$PTO_ISA_ROOT"; timeout 300 git clone https://gitcode.com/luohuan40/pto-isa.git "$PTO_ISA_ROOT"; } + cd "$PTO_ISA_ROOT" + git checkout ${{ env.PTO_ISA_COMMIT }} - - name: Get pypto HEAD commit - id: pypto-hash - run: echo "hash=$(git ls-remote https://github.com/hw-native-sys/pypto.git HEAD | cut -f1)" >> $GITHUB_OUTPUT + - name: Bootstrap conda + per-job venv + write activate.sh + # Per-job venv layered on conda py310-lib; activate.sh lets each later step + # enter the env with a single `source activate.sh`. set_env.sh wires up + # the Ascend/HCCL host environment; LD_LIBRARY_PATH is prepended with + # $CONDA_PREFIX/lib because ptoas needs a newer GLIBCXX than the system. + # PTO2_RING_* (set by the runner's systemd) are handled per-file in the + # run step — they break HCCL, so multi-card files unset them there. + run: | + source /home/ci-runner/miniconda3/etc/profile.d/conda.sh + conda activate py310-lib + python -m venv venv --system-site-packages + cat > activate.sh <<'EOF' + source /home/ci-runner/miniconda3/etc/profile.d/conda.sh + conda activate py310-lib + source "$GITHUB_WORKSPACE/dist-checkout/venv/bin/activate" + source /usr/local/Ascend/cann-9.0.0/set_env.sh + export LD_LIBRARY_PATH="$CONDA_PREFIX/lib:$LD_LIBRARY_PATH" + EOF - - name: Cache pypto wheels - id: cache-pypto - uses: actions/cache@v4 - with: - path: /tmp/pypto-wheels - key: pypto-${{ runner.os }}-${{ runner.arch }}-py3.10-${{ steps.pypto-hash.outputs.hash }} + - name: Show ccache stats (before) + run: ccache -s || true + + - name: Build and install pypto and simpler + # Reuses the pypto source synced by the resolve step above (same revision + # the toolchain pins were read from). + run: | + source activate.sh + PYPTO_SRC="/home/ci-runner/hw-native-sys-pypto-lib/ci-cache/pypto-src" + python -m pip install --upgrade pip + pip install scikit-build-core nanobind cmake ninja + pip install --no-build-isolation "$PYPTO_SRC" + pip install --no-build-isolation "$PYPTO_SRC/runtime" - - name: Build pypto wheels - if: steps.cache-pypto.outputs.cache-hit != 'true' + - name: Install runner dependencies run: | - git clone --recurse-submodules --depth=1 https://github.com/hw-native-sys/pypto.git /tmp/pypto - pip wheel /tmp/pypto -w /tmp/pypto-wheels --no-deps - pip wheel /tmp/pypto/runtime -w /tmp/pypto-wheels --no-deps + source activate.sh + pip install nanobind + pip install torch transformers safetensors numpy - - name: Install pypto and simpler - run: pip install /tmp/pypto-wheels/*.whl + - name: Show ccache stats (after) + run: ccache -s || true - - name: Run unit tests + - name: Install ptoas (local cache) env: - PYTHONPATH: ${{ github.workspace }} - run: python -m pytest tests/test_cli.py tests/test_batching.py + PTOAS_CACHE_DIR: /home/ci-runner/hw-native-sys-pypto-lib/ci-cache/ptoas + run: | + CACHE_ARCHIVE="$PTOAS_CACHE_DIR/ptoas-aarch64-${PTOAS_VERSION}-${PTOAS_SHA256}.tar.gz" + download_ptoas() { + echo "Downloading ptoas ${PTOAS_VERSION}" + mkdir -p "$PTOAS_CACHE_DIR" + curl --fail --location --retry 3 --retry-all-errors \ + https://github.com/hw-native-sys/PTOAS/releases/download/${PTOAS_VERSION}/ptoas-bin-aarch64.tar.gz \ + -o "$CACHE_ARCHIVE.tmp" + echo "${PTOAS_SHA256} $CACHE_ARCHIVE.tmp" | sha256sum -c - + mv "$CACHE_ARCHIVE.tmp" "$CACHE_ARCHIVE" + } + if [ ! -f "$CACHE_ARCHIVE" ]; then + echo "Cache miss" + download_ptoas + elif ! echo "${PTOAS_SHA256} $CACHE_ARCHIVE" | sha256sum -c -; then + echo "Cache corrupted — removing and re-downloading" + rm -f "$CACHE_ARCHIVE" + download_ptoas + else + echo "Cache hit — using $CACHE_ARCHIVE" + fi + mkdir -p "$PTOAS_ROOT" + tar -xzf "$CACHE_ARCHIVE" -C "$PTOAS_ROOT" + chmod +x "$PTOAS_ROOT/ptoas" + chmod +x "$PTOAS_ROOT/bin/ptoas" + + - name: Run Qwen3 accuracy guard + env: + PYTHONPATH: ${{ github.workspace }}/dist-checkout + PYPTO_QWEN3_MODEL_DIR: /data/l00955553/model/Qwen3-14B + run: | + source activate.sh + python -m pytest tests/test_qwen3_accuracy.py -q -s \ No newline at end of file diff --git a/tests/test_qwen3_accuracy.py b/tests/test_qwen3_accuracy.py new file mode 100644 index 0000000..5f3d162 --- /dev/null +++ b/tests/test_qwen3_accuracy.py @@ -0,0 +1,92 @@ +# Copyright (c) PyPTO Contributors. +# This program is free software, you can redistribute it and/or modify it under the terms and conditions of +# CANN Open Software License Agreement Version 2.0 (the "License"). +# Please refer to the License for details. You may not use this file except in compliance with the License. +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +# See LICENSE in the root of the software repository for the full text of the License. +# ----------------------------------------------------------------------------------------------------------- + +"""Qwen3 output accuracy guard for CI.""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + +import pytest + + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + + +MODEL_DIR_ENV = os.environ.get("PYPTO_QWEN3_MODEL_DIR") +if MODEL_DIR_ENV is None: + pytest.fail("PYPTO_QWEN3_MODEL_DIR is required") +MODEL_DIR = Path(MODEL_DIR_ENV) +MODEL_ID = "qwen3-14b-accuracy" +PLATFORM = os.environ.get("PYPTO_QWEN3_PLATFORM", "a2a3") +DEVICE_ID_ENV = os.environ.get("DEVICE_ID") +if DEVICE_ID_ENV is None: + pytest.fail("DEVICE_ID is required") +DEVICE_ID = int(DEVICE_ID_ENV) +PROMPT = "The capital of France is" +MAX_NEW_TOKENS = 8 + +EXPECTED_TOKEN_IDS = [12095, 13, 576, 6722, 315, 9625, 374, 12095] + + +def test_qwen3_output_matches_expected_tokens(): + if not MODEL_DIR or not MODEL_DIR.is_dir(): + pytest.fail(f"Qwen3 model weights not found: {MODEL_DIR}") + + from examples.model.qwen3_14b.runner.npu_executor import Qwen314BPyptoExecutor + from python.core.engine import LLMEngine + from python.core.kv_cache import KvCacheManager + from python.core.types import GenerateConfig, RuntimeConfig + + kv_cache_manager = KvCacheManager() + executor = Qwen314BPyptoExecutor( + kv_cache_manager, + platform=PLATFORM, + device_id=DEVICE_ID, + ) + engine = LLMEngine(kv_cache_manager=kv_cache_manager, executor=executor) + + try: + engine.init_model( + model_id=MODEL_ID, + model_dir=str(MODEL_DIR), + model_format="huggingface", + runtime_config=RuntimeConfig( + page_size=128, + max_batch_size=16, + max_seq_len=512, + max_new_tokens=MAX_NEW_TOKENS, + device="cpu", + kv_dtype="bfloat16", + weight_dtype="float32", + ), + ) + result = engine.generate_result( + MODEL_ID, + PROMPT, + GenerateConfig( + max_new_tokens=MAX_NEW_TOKENS, + temperature=0.0, + top_p=1.0, + top_k=None, + ), + ) + finally: + executor.close() + assert result.token_ids == EXPECTED_TOKEN_IDS, ( + f"Qwen3 output changed for prompt {PROMPT!r}:\n" + f"expected token_ids: {EXPECTED_TOKEN_IDS}\n" + f"actual token_ids: {result.token_ids}\n" + f"actual text: {result.text!r}\n" + f"finish_reason: {result.finish_reason}" + )