Skip to content

Commit 1abcd20

Browse files
JulianCloudNTHfacebook-github-bot
authored andcommitted
Add rms_norm op + named-data upload + x86 CI
Summary: Adds the `et_vk.rms_norm.default` operator to the WebGPU backend (a WGSL compute shader using a cooperative tree reduction, one workgroup per row), fixes constant upload so the op's weight loads correctly, and wires the backend into CI. The Vulkan serializer that the WebGPU backend reuses stores every non-empty constant (e.g. the rms_norm weight) in the PTE's named-data map with `offset == UINT64_MAX` and a `named_key`, rather than inline in the VK00 blob. `WebGPUGraph::build` previously handled only inline constants, so the weight was never uploaded and the op returned all zeros. `build` now also fetches named-data constants via `NamedDataMap::get_data`, mirroring the path `VulkanBackend` already uses. `aten.add` was unaffected since it has no constant tensors. The shader mirrors the Vulkan implementation (`backends/vulkan/runtime/graph/ops/impl/RmsNorm.cpp`, `backends/vulkan/runtime/graph/ops/glsl/rms_norm_buffer.glsl`); indexing assumes contiguous fp32 inputs. The handler fails loud (throws, mirroring Vulkan's `VK_CHECK_COND`) on invalid shape/dtype/dispatch-limit conditions, and defaults `eps` to the float32 machine epsilon. Also adds a simple x86 Linux CI job, mirroring the Vulkan delegate: `backends/test/suite/flows/webgpu.py` plus a `WebGPUTester`, run by `oss/.github/workflows/test-backend-webgpu.yml` on SwiftShader (a software Vulkan adapter, via `wgpu-native`, minimal dependencies, no GPU). Two fixes were needed for SwiftShader's downlevel limits: request the adapter's full `requiredLimits` at device creation (software adapters default storage-buffer limits to 0), and lower the `add` op `workgroup_size` from 256 to 64 (256 exceeded SwiftShader's 128-invocation cap; the Vulkan delegate uses 64). Differential Revision: D106887028
1 parent f512d7e commit 1abcd20

23 files changed

Lines changed: 798 additions & 9 deletions
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -ex
9+
10+
# SwiftShader: software Vulkan adapter for GPU-less CI (LunarG SDK not needed).
11+
install_swiftshader() {
12+
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
13+
_swiftshader_archive=swiftshader-abe07b943-prebuilt.tar.gz
14+
_swiftshader_dir=/tmp/swiftshader
15+
mkdir -p $_swiftshader_dir
16+
17+
_tmp_archive="/tmp/${_swiftshader_archive}"
18+
19+
curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
20+
--output "${_tmp_archive}" "$_https_amazon_aws/${_swiftshader_archive}"
21+
22+
tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"
23+
24+
export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
25+
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/"
26+
export ETVK_USING_SWIFTSHADER=1
27+
}
28+
29+
install_swiftshader
30+
bash backends/webgpu/scripts/setup-wgpu-native.sh

.ci/scripts/test_backend.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ if [[ "$FLOW" == *vulkan* ]]; then
5757
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
5858
fi
5959

60+
if [[ "$FLOW" == *webgpu* ]]; then
61+
# Setup swiftshader (software Vulkan adapter for GPU-less runners) and wgpu-native,
62+
# which are required to build and run the WebGPU delegate.
63+
source .ci/scripts/setup-webgpu-linux-deps.sh
64+
65+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_WEBGPU=ON"
66+
fi
67+
6068
if [[ "$FLOW" == *arm* ]]; then
6169
if [[ "$SUITE" == "operators" ]]; then
6270
PYTEST_RETRY_ARGS=(--reruns 2 --reruns-delay 1)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Test WebGPU Backend
2+
3+
on:
4+
schedule:
5+
- cron: 0 2 * * *
6+
push:
7+
branches:
8+
- main
9+
- release/*
10+
tags:
11+
- ciflow/nightly/*
12+
pull_request:
13+
workflow_dispatch:
14+
15+
concurrency:
16+
group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
17+
cancel-in-progress: true
18+
19+
jobs:
20+
test-webgpu:
21+
uses: ./.github/workflows/_test_backend.yml
22+
with:
23+
backend: webgpu
24+
flows: '["webgpu"]'
25+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
26+
timeout: 120
27+
run-linux: true

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,10 @@ if(EXECUTORCH_BUILD_PYBIND)
10561056
list(APPEND _dep_libs vulkan_backend)
10571057
endif()
10581058

1059+
if(EXECUTORCH_BUILD_WEBGPU)
1060+
list(APPEND _dep_libs webgpu_backend)
1061+
endif()
1062+
10591063
# compile options for pybind
10601064
set(_pybind_compile_options
10611065
$<$<CXX_COMPILER_ID:MSVC>:/EHsc

backends/test/suite/flow.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ def _load_vulkan() -> list[TestFlow]:
117117
return [VULKAN_TEST_FLOW, VULKAN_STATIC_INT8_PER_CHANNEL_TEST_FLOW]
118118

119119

120+
def _load_webgpu() -> list[TestFlow]:
121+
from executorch.backends.test.suite.flows.webgpu import WEBGPU_TEST_FLOW
122+
123+
return [WEBGPU_TEST_FLOW]
124+
125+
120126
def _load_openvino() -> list[TestFlow]:
121127
from executorch.backends.test.suite.flows.openvino import (
122128
OPENVINO_INT8_TEST_FLOW,
@@ -178,6 +184,7 @@ def all_flows() -> dict[str, TestFlow]:
178184
+ _register_flow(_load_xnnpack, "XNNPACK")
179185
+ _register_flow(_load_coreml, "Core ML")
180186
+ _register_flow(_load_vulkan, "Vulkan")
187+
+ _register_flow(_load_webgpu, "WebGPU")
181188
+ _register_flow(_load_openvino, "OpenVINO")
182189
+ _register_flow(_load_qnn, "QNN")
183190
+ _register_flow(_load_arm, "ARM")
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from executorch.backends.test.suite.flow import TestFlow
8+
from executorch.backends.webgpu.test.tester import WebGPUTester
9+
10+
11+
def _create_webgpu_flow() -> TestFlow:
12+
return TestFlow(
13+
"webgpu",
14+
backend="webgpu",
15+
tester_factory=WebGPUTester,
16+
skip_patterns=["float16", "float64"], # Not supported in swiftshader
17+
)
18+
19+
20+
WEBGPU_TEST_FLOW = _create_webgpu_flow()

backends/webgpu/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@ if(NOT TARGET vulkan_schema)
2222
add_subdirectory(
2323
${CMAKE_CURRENT_SOURCE_DIR}/../vulkan
2424
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema
25+
EXCLUDE_FROM_ALL
2526
)
2627
endif()
2728

2829
set(WEBGPU_SRCS
2930
runtime/WebGPUBackend.cpp runtime/WebGPUGraph.cpp
3031
runtime/WebGPUDelegateHeader.cpp runtime/WebGPUDevice.cpp
3132
runtime/ops/OperatorRegistry.cpp runtime/ops/add/BinaryOp.cpp
33+
runtime/ops/rms_norm/RmsNorm.cpp
3234
)
3335

3436
add_library(webgpu_backend ${WEBGPU_SRCS})

backends/webgpu/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.

backends/webgpu/runtime/WebGPUBackend.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ Result<DelegateHandle*> WebGPUBackend::init(
7676
}
7777

7878
try {
79-
graph->build(flatbuffer_data, constant_data);
79+
graph->build(flatbuffer_data, constant_data, context.get_named_data_map());
8080
} catch (const std::exception& e) {
8181
ET_LOG(Error, "WebGPU graph build failed: %s", e.what());
8282
graph->~WebGPUGraph();

backends/webgpu/runtime/WebGPUDevice.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,12 @@ WebGPUContext create_webgpu_context() {
121121
device_cb.callback = on_device_request;
122122
device_cb.userdata1 = &device_result;
123123

124+
// Request the adapter's full limits; software adapters default many to 0.
125+
WGPULimits supported_limits = {};
124126
WGPUDeviceDescriptor device_desc = {};
127+
if (wgpuAdapterGetLimits(ctx.adapter, &supported_limits) == WGPUStatus_Success) {
128+
device_desc.requiredLimits = &supported_limits;
129+
}
125130
device_desc.uncapturedErrorCallbackInfo.callback = on_device_error;
126131

127132
wgpuAdapterRequestDevice(ctx.adapter, &device_desc, device_cb);

0 commit comments

Comments
 (0)