pytorch · JulianCloudNTH · Jun 17, 2026 · Jun 18, 2026
@@ -138,7 +138,6 @@ endfunction()
 
 if(EXECUTORCH_BUILD_WEBGPU_TEST)
   add_webgpu_native_test(webgpu_native_test test/test_webgpu_native.cpp)
-  add_webgpu_native_test(webgpu_rms_norm_test test/native/test_rms_norm.cpp)
   add_webgpu_native_test(
     webgpu_dispatch_order_test test/native/test_dispatch_order.cpp
   )

diff --git a/backends/webgpu/scripts/test_webgpu_native_ci.sh b/backends/webgpu/scripts/test_webgpu_native_ci.sh
@@ -8,18 +8,19 @@
 # Build + run the WebGPU native test executables on Dawn (Tint) + SwiftShader.
 # This is the substantive op-coverage gate: unlike the python operators suite
 # (which only delegates add.Tensor to WebGPU, the rest CPU-fallback), these
-# executables run rms_norm / multi-dispatch ordering / scratch through the real
-# WebGPU backend on Dawn.
+# executables run quantized_linear / SDPA / update_cache / multi-dispatch
+# ordering / scratch through the real WebGPU backend on Dawn. (Simple ops —
+# add / rms_norm / the misc ops — run through the cases.py op-test framework.)
 #
 # Assumes the Dawn env is already sourced (Dawn_DIR + VK_ICD_FILENAMES +
 # LD_LIBRARY_PATH) via .ci/scripts/setup-webgpu-linux-deps.sh. For local runs:
 #   source .ci/scripts/setup-webgpu-linux-deps.sh
 #   bash backends/webgpu/scripts/test_webgpu_native_ci.sh
 #
 # Builds whatever native test targets are present in the landed tree (NOT a fixed
-# list). This stack lands: webgpu_native_test, webgpu_rms_norm_test (base) +
-# webgpu_dispatch_order_test, webgpu_scratch_buffer_test (D107576199) +
-# webgpu_update_cache_test (D107547307). SDPA executables join once they land.
+# list): webgpu_native_test (base) + webgpu_dispatch_order_test,
+# webgpu_scratch_buffer_test (D107576199) + webgpu_update_cache_test
+# (D107547307). SDPA executables join once they land.
 
 set -e
 
@@ -37,33 +38,19 @@ fi
 cd "${EXECUTORCH_ROOT}"
 
 # ── Exports for the model-driven executables (best-effort) ───────────────────
-# native_test + rms_norm + dispatch_order read .pte/golden inputs via env/dir and
-# self-skip if absent; scratch is standalone (generates its own inputs).
-PTE_MODEL="/tmp/webgpu_add_test.pte"
-PTE_CHAINED_MODEL="/tmp/webgpu_chained_add_test.pte"
-RMS_NORM_DIR="/tmp/rmsn"
-RMS_NORM_OK=1
+# native_test (quantized_linear/SDPA/update_cache) + dispatch_order read .pte/
+# golden inputs via env/dir and self-skip if absent; scratch is standalone.
+# native_test itself is gated below on the executorch wheel being importable.
 DISPATCH_ORDER_DIR="/tmp/dispatch_order"
 DISPATCH_ORDER_OK=1
 UPDATE_CACHE_DIR="/tmp/update_cache"
 UPDATE_CACHE_OK=1
 
-$PYTHON_EXECUTABLE -c "
-from executorch.backends.webgpu.test.ops.add.test_add import export_add_model, export_chained_add_model
-export_add_model('${PTE_MODEL}')
-export_chained_add_model('${PTE_CHAINED_MODEL}')
-" || echo "WARN: add export failed; webgpu_native_test self-skips models whose .pte is absent"
-
 $PYTHON_EXECUTABLE -c "
 from executorch.backends.webgpu.test.ops.quantized_linear.test_quantized_linear import export_all_quantized_linear_models
 export_all_quantized_linear_models('/tmp')
 " || echo "WARN: q4gsw export failed; required configs will FAIL in webgpu_native_test"
 
-$PYTHON_EXECUTABLE -c "
-from executorch.backends.webgpu.test.ops.rms_norm.test_rms_norm import export_rms_norm_cases
-export_rms_norm_cases('${RMS_NORM_DIR}')
-" || { echo "WARN: rms_norm export failed; skipping rms_norm native test"; RMS_NORM_OK=0; }
-
 $PYTHON_EXECUTABLE -c "
 from executorch.backends.webgpu.test.ops.dispatch_order.test_dispatch_order import export_dispatch_order_cases
 export_dispatch_order_cases('${DISPATCH_ORDER_DIR}')
@@ -112,7 +99,7 @@ cmake \
     "${EXECUTORCH_ROOT}"
 
 # ── Build + run every native test target that exists in this tree ────────────
-TARGETS=(webgpu_native_test webgpu_rms_norm_test webgpu_dispatch_order_test webgpu_scratch_buffer_test webgpu_update_cache_test)
+TARGETS=(webgpu_native_test webgpu_dispatch_order_test webgpu_scratch_buffer_test webgpu_update_cache_test)
 BIN_DIR="${BUILD_DIR}/backends/webgpu"
 
 # Which targets are defined depends on which diffs are landed (native_test +
@@ -141,20 +128,17 @@ for t in "${TARGETS[@]}"; do
 done
 
 echo "=== Run native tests on Dawn + SwiftShader ==="
-# native_test is model-driven; only run it if the export produced its .pte
-# (CI's setup-linux.sh provides the executorch wheel so exports succeed; a bare
-# local run without the wheel self-skips here rather than hard-failing on load).
-if [[ -x "${BIN_DIR}/webgpu_native_test" && -f "${PTE_MODEL}" ]]; then
-  env WEBGPU_TEST_MODEL="${PTE_MODEL}" \
-      WEBGPU_TEST_CHAINED_MODEL="${PTE_CHAINED_MODEL}" \
-      WEBGPU_TEST_SDPA_DIR=/tmp/ \
+# webgpu_native_test hosts the quantized_linear / SDPA / update_cache / symint
+# sweeps. Gate on the executorch wheel being importable (the proxy for "the
+# exports above ran"): CI has the wheel so they ran; a bare local run without it
+# skips here rather than hard-failing the required-config guards.
+if [[ -x "${BIN_DIR}/webgpu_native_test" ]] &&
+  "${PYTHON_EXECUTABLE}" -c "import executorch" 2>/dev/null; then
+  env WEBGPU_TEST_SDPA_DIR=/tmp/ \
       WEBGPU_TEST_QUANTIZED_LINEAR_DIR=/tmp/ \
       "${BIN_DIR}/webgpu_native_test"
 else
-  echo "(skipping webgpu_native_test: no exported .pte — needs the executorch python wheel)"
-fi
-if [[ "${RMS_NORM_OK}" == "1" && -x "${BIN_DIR}/webgpu_rms_norm_test" ]]; then
-  "${BIN_DIR}/webgpu_rms_norm_test" "${RMS_NORM_DIR}"
+  echo "(skipping webgpu_native_test: executorch wheel absent — exports did not run)"
 fi
 if [[ "${UPDATE_CACHE_OK}" == "1" && -x "${BIN_DIR}/webgpu_update_cache_test" ]]; then
   "${BIN_DIR}/webgpu_update_cache_test" "${UPDATE_CACHE_DIR}"

diff --git a/backends/webgpu/test/native/test_rms_norm.cpp b/backends/webgpu/test/native/test_rms_norm.cpp
diff --git a/backends/webgpu/test/test_build_webgpu.sh b/backends/webgpu/test/test_build_webgpu.sh
@@ -26,36 +26,18 @@ $PYTHON_EXECUTABLE -m pytest "${SCRIPT_DIR}/test_wgsl_codegen.py" -v
 
 echo "=== Step 1: Run Python export tests ==="
 $PYTHON_EXECUTABLE -m pytest "${SCRIPT_DIR}/ops/add/test_add.py" -v
-# Non-fatal: a rms_norm pytest failure skips the rms_norm native test below
-# rather than aborting the whole run.
-RMS_NORM_PYTEST_OK=1
-$PYTHON_EXECUTABLE -m pytest "${SCRIPT_DIR}/ops/rms_norm/test_rms_norm.py" -v \
-    || RMS_NORM_PYTEST_OK=0
+$PYTHON_EXECUTABLE -m pytest "${SCRIPT_DIR}/ops/rms_norm/test_rms_norm.py" -v
 
 # ── Step 2: Export .pte model ─────────────────────────────────────────────────
 
 echo "=== Step 2: Export test models ==="
-PTE_MODEL="/tmp/webgpu_add_test.pte"
-PTE_CHAINED_MODEL="/tmp/webgpu_chained_add_test.pte"
-RMS_NORM_DIR="/tmp/rmsn"
 DISPATCH_ORDER_DIR="/tmp/dispatch_order"
 PTE_UPDATE_CACHE_MODEL="/tmp/webgpu_update_cache_test.pte"
 cd "${EXECUTORCH_ROOT}"
 $PYTHON_EXECUTABLE -c "
-from executorch.backends.webgpu.test.ops.add.test_add import export_add_model, export_chained_add_model
-export_add_model('${PTE_MODEL}')
-export_chained_add_model('${PTE_CHAINED_MODEL}')
-"
-$PYTHON_EXECUTABLE -c "
 from executorch.backends.webgpu.test.ops.dispatch_order.test_dispatch_order import export_dispatch_order_cases
 export_dispatch_order_cases('${DISPATCH_ORDER_DIR}')
 "
-if [[ "${RMS_NORM_PYTEST_OK}" == "1" ]]; then
-  $PYTHON_EXECUTABLE -c "
-from executorch.backends.webgpu.test.ops.rms_norm.test_rms_norm import export_rms_norm_cases
-export_rms_norm_cases('${RMS_NORM_DIR}')
-" || { echo "WARN: rms_norm export failed; skipping rms_norm native test"; RMS_NORM_PYTEST_OK=0; }
-fi
 
 echo "=== Export update_cache model ==="
 UPDATE_CACHE_OK=1
@@ -113,7 +95,6 @@ cmake \
     "${EXECUTORCH_ROOT}"
 
 cmake --build "${NATIVE_BUILD_DIR}" --target webgpu_native_test -j${NPROC}
-cmake --build "${NATIVE_BUILD_DIR}" --target webgpu_rms_norm_test -j${NPROC}
 cmake --build "${NATIVE_BUILD_DIR}" --target webgpu_dispatch_order_test -j${NPROC}
 cmake --build "${NATIVE_BUILD_DIR}" --target webgpu_scratch_buffer_test -j${NPROC}
 
@@ -125,18 +106,10 @@ else
   echo "(skipping update_cache native test: export did not complete)"
 fi
 env \
-    WEBGPU_TEST_MODEL="${PTE_MODEL}" \
-    WEBGPU_TEST_CHAINED_MODEL="${PTE_CHAINED_MODEL}" \
     ${UPDATE_CACHE_ENV_VAR} \
     WEBGPU_TEST_SDPA_DIR=/tmp/ \
     "${NATIVE_BUILD_DIR}/backends/webgpu/webgpu_native_test"
 
-if [[ "${RMS_NORM_PYTEST_OK}" == "1" ]]; then
-  "${NATIVE_BUILD_DIR}/backends/webgpu/webgpu_rms_norm_test" "${RMS_NORM_DIR}"
-else
-  echo "(skipping rms_norm native test: pytest or export did not complete)"
-fi
-
 "${NATIVE_BUILD_DIR}/backends/webgpu/webgpu_dispatch_order_test" "${DISPATCH_ORDER_DIR}"
 "${NATIVE_BUILD_DIR}/backends/webgpu/webgpu_scratch_buffer_test"