Skip to content

[ET-VK][patterns] Fuse torchao 4-bit quantized embedding to embedding_q4gsw #9400

[ET-VK][patterns] Fuse torchao 4-bit quantized embedding to embedding_q4gsw

[ET-VK][patterns] Fuse torchao 4-bit quantized embedding to embedding_q4gsw #9400

name: Cadence Build & Test
on:
schedule:
- cron: 0 8 * * *
push:
branches:
- main
- release/*
tags:
- ciflow/nightly/*
pull_request:
pull_request_target:
types: [labeled]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
jobs:
gate:
runs-on: ubuntu-latest
outputs:
run-cadence: ${{ steps.decide.outputs.run }}
steps:
- id: decide
env:
EVENT: ${{ github.event_name }}
IS_FORK: ${{ github.event.pull_request.head.repo.full_name != github.repository }}
HAS_CLA: ${{ contains(github.event.pull_request.labels.*.name, 'CLA Signed') }}
HAS_EXPORT: ${{ contains(github.event.pull_request.labels.*.name, 'meta-exported') }}
run: |
run=false
case "${EVENT}" in
push|schedule|workflow_dispatch)
run=true
;;
pull_request)
[ "${IS_FORK}" = "false" ] && run=true
;;
pull_request_target)
if [ "${IS_FORK}" = "true" ] && [ "${HAS_CLA}" = "true" ] && [ "${HAS_EXPORT}" = "true" ]; then
run=true
fi
;;
esac
echo "run=${run}" >> "${GITHUB_OUTPUT}"
cpu-build:
if: github.event_name != 'pull_request_target'
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
job-name: build
runner: linux.2xlarge
docker-image: ci-image:executorch-ubuntu-22.04-clang12
submodules: recursive
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 90
upload-artifact: cadence-runner-build
script: |
set -eux
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
./install_requirements.sh > /dev/null
bash backends/cadence/build_cadence_runner.sh
# Copy runner binary to artifact dir for downstream test jobs
cp cmake-out/backends/cadence/cadence_runner "${RUNNER_ARTIFACT_DIR}/"
cpu-test:
needs: cpu-build
if: github.event_name != 'pull_request_target'
permissions:
id-token: write
contents: read
uses: ./.github/workflows/_test_cadence.yml
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# Cross-compile cadence_executor_runner for each Cadence Xtensa core, one job
# per backend so they show as separate lines (no matrix grouping). Shared logic
# lives in _xtensa_build.yml. fusion_g3 is omitted until the upstream fusion_g3
# <-> nnlib-FusionG3 API skew is fixed (its runner does not link).
hifi-build:
needs: gate
if: needs.gate.outputs.run-cadence == 'true'
permissions:
id-token: write
contents: read
uses: ./.github/workflows/_xtensa_build.yml
with:
backend: hifi4
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}
vision-build:
needs: gate
if: needs.gate.outputs.run-cadence == 'true'
permissions:
id-token: write
contents: read
uses: ./.github/workflows/_xtensa_build.yml
with:
backend: vision
ref: ${{ (github.event_name == 'pull_request' || github.event_name == 'pull_request_target') && github.event.pull_request.head.sha || github.sha }}