From 4792e669f82899a4d4ebaf0eebaf73049eba0bbf Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Fri, 30 May 2025 07:56:41 -0700 Subject: [PATCH] [ET-VK] Fixing out_limits_scaled calculation for conv2d pw ops. The fix changing the calculation of `out_limits_scaled` from: ```glsl const int out_limits_scaled[2] = {out_limits.x + (TILE_SIZE_X - 1) * TILE_SIZE_X, out_limits.y + (TILE_SIZE_Y - 1) * TILE_SIZE_Y}; ``` to: ```glsl const int out_limits_scaled[2] = {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X, (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y}; ``` This change ensures that `out_limits_scaled` is calculated correctly, taking into account the tile size and the output limits of the convolution operation. Differential Revision: [D75575662](https://our.internmc.facebook.com/intern/diff/D75575662/) [ghstack-poisoned] --- backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl | 2 +- backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl index c218b8ac8cc..85c1ea1058e 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl @@ -46,7 +46,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; * size is only 1x1, making it easier to re-use loaded texels from t_kernel. */ void main() { - const int out_limits_scaled[2] = {out_limits.x + (TILE_SIZE_X - 1) * TILE_SIZE_X, out_limits.y + (TILE_SIZE_Y - 1) * TILE_SIZE_Y}; + const int out_limits_scaled[2] = {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X, (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y}; const int div_by_x = int(gl_GlobalInvocationID.x / out_limits_scaled[0]); const int out_pos[3] = {int(gl_GlobalInvocationID.x % out_limits_scaled[0]), div_by_x, int(gl_GlobalInvocationID.y)}; diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl index 01dcff00e59..03abd91b074 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw_s1p0.glsl @@ -48,7 +48,7 @@ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; * size is only 1x1, making it easier to re-use loaded texels from t_kernel. */ void main() { - const int out_limits_scaled[2] = {out_limits.x + (TILE_SIZE_X - 1) * TILE_SIZE_X, out_limits.y + (TILE_SIZE_Y - 1) * TILE_SIZE_Y}; + const int out_limits_scaled[2] = {(out_limits.x + (TILE_SIZE_X - 1)) / TILE_SIZE_X, (out_limits.y + (TILE_SIZE_Y - 1)) / TILE_SIZE_Y}; const uint16_t div_by_x = uint16_t(gl_GlobalInvocationID.x / out_limits_scaled[0]); const uint16_t out_pos_xy[2] = {uint16_t(gl_GlobalInvocationID.x % out_limits_scaled[0]), div_by_x};