Update

JulianCloudNTH · JulianCloudNTH · commit a464b37eeb03 · 2026-06-03T13:57:37.000-07:00
[ghstack-poisoned]
diff --git a/backends/webgpu/runtime/WebGPUBackend.cpp b/backends/webgpu/runtime/WebGPUBackend.cpp
@@ -76,7 +76,7 @@ Result<DelegateHandle*> WebGPUBackend::init(
   }
 
   try {
-    graph->build(flatbuffer_data, constant_data, context.get_named_data_map());
+    graph->build(flatbuffer_data, constant_data);
   } catch (const std::exception& e) {
     ET_LOG(Error, "WebGPU graph build failed: %s", e.what());
     graph->~WebGPUGraph();
diff --git a/backends/webgpu/runtime/WebGPUGraph.cpp b/backends/webgpu/runtime/WebGPUGraph.cpp
@@ -10,7 +10,6 @@
 #include <executorch/backends/webgpu/runtime/ops/OperatorRegistry.h>
 
 #include <executorch/backends/vulkan/serialization/schema_generated.h>
-#include <executorch/runtime/core/named_data_map.h>
 
 #include <executorch/backends/webgpu/runtime/WebGPUDevice.h>
 #include <webgpu/wgpu.h>
@@ -94,8 +93,7 @@ WebGPUGraph::~WebGPUGraph() {
 
 void WebGPUGraph::build(
     const void* flatbuffer_data,
-    const uint8_t* constant_data,
-    const executorch::runtime::NamedDataMap* named_data_map) {
+    const uint8_t* constant_data) {
   if (!device_) {
     auto* ctx = get_default_webgpu_context();
     if (ctx) {
@@ -167,31 +165,6 @@ void WebGPUGraph::build(
                 const uint8_t* src = constant_data + vk_bytes->offset();
                 wgpuQueueWriteBuffer(
                     queue_, tensor.buffer, 0, src, tensor.nbytes);
-              } else if (
-                  vk_bytes->named_key() != nullptr &&
-                  named_data_map != nullptr) {
-                // Constant stored in the PTE named-data map.
-                auto buf =
-                    named_data_map->get_data(vk_bytes->named_key()->c_str());
-                if (!buf.ok()) {
-                  throw std::runtime_error(
-                      std::string("WebGPU: named constant '") +
-                      vk_bytes->named_key()->c_str() +
-                      "' not found in NamedDataMap");
-                }
-                if (buf->size() < tensor.nbytes) {
-                  throw std::runtime_error(
-                      std::string("WebGPU: named constant '") +
-                      vk_bytes->named_key()->c_str() + "' undersized: have " +
-                      std::to_string(buf->size()) + " bytes, need " +
-                      std::to_string(tensor.nbytes));
-                }
-                wgpuQueueWriteBuffer(
-                    queue_, tensor.buffer, 0, buf->data(), tensor.nbytes);
-                buf->Free();
-              } else {
-                throw std::runtime_error(
-                    "WebGPU: constant has no inline offset and no named-data key");
               }
             }
           }
diff --git a/backends/webgpu/runtime/WebGPUGraph.h b/backends/webgpu/runtime/WebGPUGraph.h
@@ -15,8 +15,6 @@
 #include <unordered_map>
 #include <vector>
 
-#include <executorch/runtime/core/named_data_map.h>
-
 namespace executorch {
 namespace backends {
 namespace webgpu {
@@ -68,10 +66,7 @@ class WebGPUGraph {
 
   // Build the graph from a deserialized VkGraph flatbuffer and constant data.
   // The flatbuffer_data pointer must remain valid during build().
-  void build(
-      const void* flatbuffer_data,
-      const uint8_t* constant_data,
-      const executorch::runtime::NamedDataMap* named_data_map = nullptr);
+  void build(const void* flatbuffer_data, const uint8_t* constant_data);
 
   // Copy input tensor data from host pointers into GPU buffers.
   void copy_inputs(const std::vector<std::pair<const void*, size_t>>& inputs);
diff --git a/backends/webgpu/runtime/WebGPUUtils.h b/backends/webgpu/runtime/WebGPUUtils.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <webgpu/webgpu.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+
+namespace executorch::backends::webgpu::utils {
+
+// Clamp workgroup size to device limit (SwiftShader caps at 128).
+inline uint32_t clamp_workgroup_size(WGPUDevice device, uint32_t desired) {
+  WGPULimits limits = {};
+  if (wgpuDeviceGetLimits(device, &limits) == WGPUStatus_Success &&
+      limits.maxComputeInvocationsPerWorkgroup > 0) {
+    return std::min(desired, limits.maxComputeInvocationsPerWorkgroup);
+  }
+  return desired;
+}
+
+// 1D dispatch count (mirrors Vulkan div_up); throws if > device limit.
+inline uint32_t compute_1d_workgroup_count(
+    WGPUDevice device,
+    uint32_t num_threads,
+    uint32_t workgroup_size,
+    const char* op_name) {
+  uint32_t count = (num_threads + workgroup_size - 1) / workgroup_size;
+  WGPULimits limits = {};
+  uint32_t max_count =
+      wgpuDeviceGetLimits(device, &limits) == WGPUStatus_Success &&
+          limits.maxComputeWorkgroupsPerDimension > 0
+      ? limits.maxComputeWorkgroupsPerDimension
+      : 65535u; // WebGPU spec-default floor
+  if (count > max_count) {
+    throw std::runtime_error(
+        std::string("WebGPU ") + op_name +
+        ": workgroup count exceeds the 1D dispatch limit");
+  }
+  return count;
+}
+
+} // namespace executorch::backends::webgpu::utils
diff --git a/backends/webgpu/runtime/ops/add/BinaryOp.cpp b/backends/webgpu/runtime/ops/add/BinaryOp.cpp
@@ -7,12 +7,12 @@
  */
 
 #include <executorch/backends/webgpu/runtime/WebGPUGraph.h>
+#include <executorch/backends/webgpu/runtime/WebGPUUtils.h>
 #include <executorch/backends/webgpu/runtime/ops/OperatorRegistry.h>
 #include <executorch/backends/webgpu/runtime/ops/add/binary_add_wgsl.h>
 
 #include <webgpu/webgpu.h>
 
-#include <algorithm>
 #include <cmath>
 #include <cstring>
 
@@ -51,21 +51,10 @@ void add_impl(WebGPUGraph& graph, const std::vector<int>& args) {
   uint32_t num_elements =
       static_cast<uint32_t>(out_tensor.nbytes / sizeof(float));
 
-  // Clamp the workgroup size to the device limit (SwiftShader caps at 128).
-  WGPULimits limits = {};
-  uint32_t device_max =
-      wgpuDeviceGetLimits(device, &limits) == WGPUStatus_Success &&
-          limits.maxComputeInvocationsPerWorkgroup > 0
-      ? limits.maxComputeInvocationsPerWorkgroup
-      : kBinaryAddWorkgroupSize;
-  uint32_t wg_size = std::min(kBinaryAddWorkgroupSize, device_max);
-  uint32_t workgroup_count = (num_elements + wg_size - 1) / wg_size;
-
-  // Validate the 1D dispatch limit before allocating any GPU objects.
-  if (workgroup_count > 65535u) {
-    throw std::runtime_error(
-        "WebGPU add: workgroup count exceeds the 1D dispatch limit (65535)");
-  }
+  uint32_t wg_size =
+      utils::clamp_workgroup_size(device, kBinaryAddWorkgroupSize);
+  uint32_t workgroup_count =
+      utils::compute_1d_workgroup_count(device, num_elements, wg_size, "add");
 
   WGPUConstantEntry wg_size_constant = {};
   wg_size_constant.key = {"wg_size", WGPU_STRLEN};
diff --git a/backends/webgpu/runtime/ops/rms_norm/rms_norm.wgsl b/backends/webgpu/runtime/ops/rms_norm/rms_norm.wgsl
@@ -1,3 +1,6 @@
+// NOTE: This file is for editor/tooling support only. The runtime consumes the
+// inline copy of this shader in `rms_norm_wgsl.h` (kRmsNormWGSL). Keep the two
+// in sync by hand — any edit here must be mirrored there.
 @group(0) @binding(0) var<storage, read_write> t_out: array<f32>;
 @group(0) @binding(1) var<storage, read> t_in: array<f32>;
 @group(0) @binding(2) var<storage, read> t_weight: array<f32>;
diff --git a/backends/webgpu/runtime/ops/rms_norm/rms_norm_wgsl.h b/backends/webgpu/runtime/ops/rms_norm/rms_norm_wgsl.h
@@ -13,6 +13,11 @@
 namespace executorch::backends::webgpu {
 
 // WGSL shader source for rms_norm: y = x * w * rsqrt(mean(x^2) + eps)
+//
+// NOTE: This inline string is the runtime source of truth — it is what gets
+// passed to wgpuDeviceCreateShaderModule. The sibling `rms_norm.wgsl` file
+// exists only for editor/tooling support and must be kept identical to this
+// string by hand; there is no build-time sync.
 inline constexpr const char* kRmsNormWGSL = R"(
 @group(0) @binding(0) var<storage, read_write> t_out: array<f32>;
 @group(0) @binding(1) var<storage, read> t_in: array<f32>;

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ Result<DelegateHandle*> WebGPUBackend::init(`
`76`	`76`	`}`
`77`	`77`
`78`	`78`	`try {`
`79`		`- graph->build(flatbuffer_data, constant_data, context.get_named_data_map());`
	`79`	`+ graph->build(flatbuffer_data, constant_data);`
`80`	`80`	`} catch (const std::exception& e) {`
`81`	`81`	`ET_LOG(Error, "WebGPU graph build failed: %s", e.what());`
`82`	`82`	`graph->~WebGPUGraph();`