Skip to content

Commit aec1978

Browse files
Update
[ghstack-poisoned]
1 parent 6a9a74a commit aec1978

15 files changed

Lines changed: 232 additions & 8 deletions

File tree

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -ex
9+
10+
# SwiftShader: software Vulkan adapter for GPU-less CI (LunarG SDK not needed).
11+
install_swiftshader() {
12+
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
13+
_swiftshader_archive=swiftshader-abe07b943-prebuilt.tar.gz
14+
_swiftshader_dir=/tmp/swiftshader
15+
mkdir -p $_swiftshader_dir
16+
17+
_tmp_archive="/tmp/${_swiftshader_archive}"
18+
19+
curl --silent --show-error --location --fail --retry 3 --retry-all-errors \
20+
--output "${_tmp_archive}" "$_https_amazon_aws/${_swiftshader_archive}"
21+
22+
tar -C "${_swiftshader_dir}" -xzf "${_tmp_archive}"
23+
24+
export VK_ICD_FILENAMES="${_swiftshader_dir}/swiftshader/build/Linux/vk_swiftshader_icd.json"
25+
export LD_LIBRARY_PATH="${_swiftshader_dir}/swiftshader/build/Linux/"
26+
export ETVK_USING_SWIFTSHADER=1
27+
}
28+
29+
install_swiftshader
30+
bash backends/webgpu/scripts/setup-wgpu-native.sh

.ci/scripts/test_backend.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ if [[ "$FLOW" == *vulkan* ]]; then
5757
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
5858
fi
5959

60+
if [[ "$FLOW" == *webgpu* ]]; then
61+
# Setup swiftshader (software Vulkan adapter for GPU-less runners) and wgpu-native,
62+
# which are required to build and run the WebGPU delegate.
63+
source .ci/scripts/setup-webgpu-linux-deps.sh
64+
65+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_WEBGPU=ON"
66+
fi
67+
6068
if [[ "$FLOW" == *arm* ]]; then
6169
if [[ "$SUITE" == "operators" ]]; then
6270
PYTEST_RETRY_ARGS=(--reruns 2 --reruns-delay 1)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: Test WebGPU Backend
2+
3+
on:
4+
schedule:
5+
- cron: 0 2 * * *
6+
push:
7+
branches:
8+
- main
9+
- release/*
10+
tags:
11+
- ciflow/nightly/*
12+
pull_request:
13+
workflow_dispatch:
14+
15+
concurrency:
16+
group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
17+
cancel-in-progress: true
18+
19+
jobs:
20+
test-webgpu:
21+
uses: ./.github/workflows/_test_backend.yml
22+
with:
23+
backend: webgpu
24+
flows: '["webgpu"]'
25+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
26+
timeout: 120
27+
run-linux: true

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,10 @@ if(EXECUTORCH_BUILD_PYBIND)
10561056
list(APPEND _dep_libs vulkan_backend)
10571057
endif()
10581058

1059+
if(EXECUTORCH_BUILD_WEBGPU)
1060+
list(APPEND _dep_libs webgpu_backend)
1061+
endif()
1062+
10591063
# compile options for pybind
10601064
set(_pybind_compile_options
10611065
$<$<CXX_COMPILER_ID:MSVC>:/EHsc

backends/test/suite/flow.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ def _load_vulkan() -> list[TestFlow]:
117117
return [VULKAN_TEST_FLOW, VULKAN_STATIC_INT8_PER_CHANNEL_TEST_FLOW]
118118

119119

120+
def _load_webgpu() -> list[TestFlow]:
121+
from executorch.backends.test.suite.flows.webgpu import WEBGPU_TEST_FLOW
122+
123+
return [WEBGPU_TEST_FLOW]
124+
125+
120126
def _load_openvino() -> list[TestFlow]:
121127
from executorch.backends.test.suite.flows.openvino import (
122128
OPENVINO_INT8_TEST_FLOW,
@@ -178,6 +184,7 @@ def all_flows() -> dict[str, TestFlow]:
178184
+ _register_flow(_load_xnnpack, "XNNPACK")
179185
+ _register_flow(_load_coreml, "Core ML")
180186
+ _register_flow(_load_vulkan, "Vulkan")
187+
+ _register_flow(_load_webgpu, "WebGPU")
181188
+ _register_flow(_load_openvino, "OpenVINO")
182189
+ _register_flow(_load_qnn, "QNN")
183190
+ _register_flow(_load_arm, "ARM")
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from executorch.backends.test.suite.flow import TestFlow
8+
from executorch.backends.webgpu.test.tester import WebGPUTester
9+
10+
11+
def _create_webgpu_flow() -> TestFlow:
12+
return TestFlow(
13+
"webgpu",
14+
backend="webgpu",
15+
tester_factory=WebGPUTester,
16+
skip_patterns=["float16", "float64"], # Not supported in swiftshader
17+
)
18+
19+
20+
WEBGPU_TEST_FLOW = _create_webgpu_flow()

backends/webgpu/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if(NOT TARGET vulkan_schema)
2121
# target), but vulkan_schema is unconditionally defined.
2222
add_subdirectory(
2323
${CMAKE_CURRENT_SOURCE_DIR}/../vulkan
24-
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema
24+
${CMAKE_CURRENT_BINARY_DIR}/_vulkan_schema EXCLUDE_FROM_ALL
2525
)
2626
endif()
2727

backends/webgpu/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.

backends/webgpu/runtime/WebGPUDevice.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,13 @@ WebGPUContext create_webgpu_context() {
121121
device_cb.callback = on_device_request;
122122
device_cb.userdata1 = &device_result;
123123

124+
// Request the adapter's full limits; software adapters default many to 0.
125+
WGPULimits supported_limits = {};
124126
WGPUDeviceDescriptor device_desc = {};
127+
if (wgpuAdapterGetLimits(ctx.adapter, &supported_limits) ==
128+
WGPUStatus_Success) {
129+
device_desc.requiredLimits = &supported_limits;
130+
}
125131
device_desc.uncapturedErrorCallbackInfo.callback = on_device_error;
126132

127133
wgpuAdapterRequestDevice(ctx.adapter, &device_desc, device_cb);

backends/webgpu/runtime/ops/add/BinaryOp.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <webgpu/webgpu.h>
1414

15+
#include <algorithm>
1516
#include <cmath>
1617
#include <cstring>
1718

@@ -50,6 +51,26 @@ void add_impl(WebGPUGraph& graph, const std::vector<int>& args) {
5051
uint32_t num_elements =
5152
static_cast<uint32_t>(out_tensor.nbytes / sizeof(float));
5253

54+
// Clamp the workgroup size to the device limit (SwiftShader caps at 128).
55+
WGPULimits limits = {};
56+
uint32_t device_max =
57+
wgpuDeviceGetLimits(device, &limits) == WGPUStatus_Success &&
58+
limits.maxComputeInvocationsPerWorkgroup > 0
59+
? limits.maxComputeInvocationsPerWorkgroup
60+
: kBinaryAddWorkgroupSize;
61+
uint32_t wg_size = std::min(kBinaryAddWorkgroupSize, device_max);
62+
uint32_t workgroup_count = (num_elements + wg_size - 1) / wg_size;
63+
64+
// Validate the 1D dispatch limit before allocating any GPU objects.
65+
if (workgroup_count > 65535u) {
66+
throw std::runtime_error(
67+
"WebGPU add: workgroup count exceeds the 1D dispatch limit (65535)");
68+
}
69+
70+
WGPUConstantEntry wg_size_constant = {};
71+
wg_size_constant.key = {"wg_size", WGPU_STRLEN};
72+
wg_size_constant.value = static_cast<double>(wg_size);
73+
5374
// Create uniform buffer for params
5475
AddParams params = {};
5576
params.num_elements = num_elements;
@@ -115,6 +136,8 @@ void add_impl(WebGPUGraph& graph, const std::vector<int>& args) {
115136
pipeline_desc.layout = pipeline_layout;
116137
pipeline_desc.compute.module = shader;
117138
pipeline_desc.compute.entryPoint = {"main", WGPU_STRLEN};
139+
pipeline_desc.compute.constantCount = 1;
140+
pipeline_desc.compute.constants = &wg_size_constant;
118141
WGPUComputePipeline pipeline =
119142
wgpuDeviceCreateComputePipeline(device, &pipeline_desc);
120143

@@ -146,16 +169,14 @@ void add_impl(WebGPUGraph& graph, const std::vector<int>& args) {
146169
bg_desc.entries = bg_entries;
147170
WGPUBindGroup bind_group = wgpuDeviceCreateBindGroup(device, &bg_desc);
148171

149-
uint32_t workgroup_count =
150-
(num_elements + kBinaryAddWorkgroupSize - 1) / kBinaryAddWorkgroupSize;
151-
152172
graph.add_dispatch({pipeline, bind_group, workgroup_count});
153173

154174
// Release intermediate objects (pipeline and bind_group are kept by dispatch)
155175
wgpuShaderModuleRelease(shader);
156176
wgpuBindGroupLayoutRelease(bgl);
157177
wgpuPipelineLayoutRelease(pipeline_layout);
158-
// uniform_buffer is kept alive by the bind group
178+
// Drop our ref; the bind group keeps the uniform buffer alive until release.
179+
wgpuBufferRelease(uniform_buffer);
159180
}
160181

161182
} // namespace

0 commit comments

Comments
 (0)