Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void main() {
// Compute the value for each element in the texel along the packed dim.
VEC4_T outtex = VEC4_T(0);
int limit = min(
4, outp.sizes[packed_dim] - out_tidx.data[packed_dim]);
4, safe_idx(outp.sizes, packed_dim) - out_tidx.data[packed_dim]);
for (int comp = 0; comp < limit; comp++) {
int elem_idx = out_tidx.data[0]; // W index is the linear element index
outtex[comp] = VEC4_T(start + elem_idx * step).x;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ void main() {
VEC4_T out_texel = VEC4_T(0);

int limit = min(
4, outp.sizes[packed_dim] - out_tidx.data[packed_dim]);
4, safe_idx(outp.sizes, packed_dim) - out_tidx.data[packed_dim]);
for (int comp = 0; comp < 4; comp++) {
if (comp >= limit) {
break;
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/ops/glsl/full_texture.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void main() {

TensorIndex4D tidx =
texture_pos_to_tensor4d_idx_simple(outp, pos, out_layout);
const int packed_dim_size = outp.sizes[packed_dim];
const int packed_dim_size = safe_idx(outp.sizes, packed_dim);
int packed_idx = tidx.data[packed_dim];

if (packed_idx + 3 >= packed_dim_size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void main() {
VEC4_T out_texel = VEC4_T(0);

int limit = min(
4, outp.sizes[out_packed_dim] - out_tidx.data[out_packed_dim]);
4, safe_idx(outp.sizes, out_packed_dim) - out_tidx.data[out_packed_dim]);
for (int comp = 0; comp < 4; comp++) {
TensorIndex4D input_tidx = out_tidx;
int gather_idx = idx_texel[comp];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void main() {
VEC4_T out_texel = VEC4_T(0);

int limit = min(
4, outp.sizes[out_packed_dim] - out_tidx.data[out_packed_dim]);
4, safe_idx(outp.sizes, out_packed_dim) - out_tidx.data[out_packed_dim]);
for (int comp = 0; comp < limit; comp++) {
int idx = idx_texel[comp];

Expand Down
8 changes: 8 additions & 0 deletions backends/vulkan/runtime/graph/ops/glsl/indexing.glslh
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ uint safe_idx(const uvec4 v, const int idx) {
return v.w;
}

// Safe ivec3 component access via if/else chain. Same rationale as safe_idx
// for ivec4.
int safe_idx(const ivec3 v, const int idx) {
if (idx == 0) return v.x;
if (idx == 1) return v.y;
return v.z;
}

// Safe ivec4 component write via if/else chain. Companion to safe_idx for
// cases where we need to set a component by a spec-const-derived index.
void safe_set(inout ivec4 v, const int idx, const int val) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void main() {
texel_idx_to_tensor4d_idx(outp, texel_idx, outp_layout);

// Bounds check on outer dimension
if (tidx.data[outer_dim] >= int(outp.sizes[0][outer_dim])) {
if (tidx.data[outer_dim] >= int(safe_idx(outp.sizes[0], outer_dim))) {
return;
}

Expand All @@ -55,7 +55,7 @@ void main() {
int packed = 0;
[[unroll]] for (int i = 0; i < 4; ++i) {
const int elem_inner = tidx.data[inner_dim] + i;
if (elem_inner < int(outp.sizes[0][inner_dim])) {
if (elem_inner < int(safe_idx(outp.sizes[0], inner_dim))) {
// Build element coordinates
ivec4 elem = tidx.data;
elem[inner_dim] = elem_inner;
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/ops/glsl/pad_texture.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void main() {

// Tail texels may have fewer than 4 valid elements; leave extras as 0.
const int limit =
min(4, outp.sizes[packed_dim] - out_tidx.data[packed_dim]);
min(4, safe_idx(outp.sizes, packed_dim) - out_tidx.data[packed_dim]);

VEC4_T out_texel = VEC4_T(0);

Expand Down
13 changes: 7 additions & 6 deletions backends/vulkan/runtime/graph/ops/glsl/reduce.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ layout(constant_id = 5) const int group_dim = 1;
shared vec4 shared_vecs[MAX_NTHREADS];

#include "indexing_utils.h"
#include "indexing.glslh"

int tid_to_smi(const ivec2 tid) {
return tid.x + tid.y * NWORKERS;
Expand Down Expand Up @@ -95,7 +96,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
scan_pos[reduce_dim] = tid.x;
// Partially accumulate over elements i, i + NWORKERS, i + 2*NWORKERS, ... of
// the reduction row
for (int i = tid.x; i < tin_sizes[reduce_dim];
for (int i = tid.x; i < safe_idx(tin_sizes, reduce_dim);
i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
accum = UPDATE_ACCUM(accum, load_texel(tin, scan_pos));
}
Expand All @@ -115,11 +116,11 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {

// Determine if there are any padding elements in the final texel of the
// packed dimension
const int nspill = mod4(tin_sizes[packed_dim]);
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
// Detect if this thread is working on the final texels of the packed
// dimension, which may have padding elements
const bool is_last_texel =
scan_pos[packed_dim] == (tin_limits[packed_dim] - 1);
scan_pos[packed_dim] == (safe_idx(tin_limits, packed_dim) - 1);

// Explicitly set padding elements to 0
if (is_last_texel && nspill > 0) {
Expand All @@ -145,10 +146,10 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
const int smi = tid_to_smi(tid);

// Number of non-padding elements in the last texel in the reduction row
const int nspill = mod4(tin_sizes[packed_dim]);
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
// Only reduce up to the last "complete" texel. The last texel will need to be
// handled specially if it has padding elements.
const int reduce_len = tin_sizes[packed_dim] - nspill;
const int reduce_len = safe_idx(tin_sizes, packed_dim) - nspill;

scan_pos[reduce_dim] = 0;
vec4 accum = INIT_ACCUM(vec4(load_texel(tin, scan_pos).x));
Expand All @@ -163,7 +164,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
// For the last texel in the dim, if there are padding elements then each
// element of the texel needs to be processed individually such that the
// padding elements are ignored
if (scan_pos[reduce_dim] == tin_limits[reduce_dim] - 1 && nspill > 0) {
if (scan_pos[reduce_dim] == safe_idx(tin_limits, reduce_dim) - 1 && nspill > 0) {
const vec4 intex = load_texel(tin, scan_pos);
for (int i = 0; i < nspill; i++) {
accum.x = UPDATE_ACCUM(accum.x, intex[i]);
Expand Down
11 changes: 6 additions & 5 deletions backends/vulkan/runtime/graph/ops/glsl/reduce2d.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ layout(constant_id = 6) const int group_dim = 2;
shared vec4 shared_vecs[MAX_NTHREADS];

#include "indexing_utils.h"
#include "indexing.glslh"

int tid_to_smi(const ivec2 tid) {
return tid.x + tid.y * NWORKERS;
Expand All @@ -68,12 +69,12 @@ void reduce_2d_non_packed_dim(const ivec2 tid, ivec3 scan_pos) {

// First dimension reduction
scan_pos[reduce_dim1] = tid.x;
for (int i = tid.x; i < tin_sizes[reduce_dim1];
for (int i = tid.x; i < safe_idx(tin_sizes, reduce_dim1);
i += NWORKERS, scan_pos[reduce_dim1] += NWORKERS) {

// Second dimension reduction
scan_pos[reduce_dim2] = 0;
for (int j = 0; j < tin_sizes[reduce_dim2]; j++, scan_pos[reduce_dim2]++) {
for (int j = 0; j < safe_idx(tin_sizes, reduce_dim2); j++, scan_pos[reduce_dim2]++) {
accum = UPDATE_ACCUM(accum, load_texel(tin, scan_pos));
}
}
Expand All @@ -93,11 +94,11 @@ void reduce_2d_non_packed_dim(const ivec2 tid, ivec3 scan_pos) {

// Determine if there are any padding elements in the final texel of the
// packed dimension
const int nspill = mod4(tin_sizes[packed_dim]);
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
// Detect if this thread is working on the final texels of the packed
// dimension, which may have padding elements
const bool is_last_texel =
scan_pos[packed_dim] == (tin_limits[packed_dim] - 1);
const bool is_last_texel =
scan_pos[packed_dim] == (safe_idx(tin_limits, packed_dim) - 1);

// Explicitly set padding elements to 0
if (is_last_texel && nspill > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void main() {
VEC4_T out_texel = VEC4_T(0);

const int limit = min(
4, out_meta.sizes[packed_dim] - out_tidx.data[packed_dim]);
4, safe_idx(out_meta.sizes, packed_dim) - out_tidx.data[packed_dim]);
for (int comp = 0; comp < limit; comp++) {
TensorIndex4D in_tidx = out_tidx;
in_tidx.data = ivec4(
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/ops/glsl/select.glslh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ TensorIndex4D out_tidx_to_in_tidx(const TensorIndex4D out_tidx) {

int adjusted_index = index;
if (index < 0) {
adjusted_index = index + inp.sizes[selected_dim];
adjusted_index = index + safe_idx(inp.sizes, selected_dim);
}

// Handle different dimensions for selection
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/ops/glsl/slice.glslh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ TensorIndex4D out_tidx_to_in_tidx(const TensorIndex4D out_tidx) {

int adjusted_start = start;
if (start < 0) {
adjusted_start = start + inp.sizes[selected_dim];
adjusted_start = start + safe_idx(inp.sizes, selected_dim);
}

in_tidx.data[selected_dim] = adjusted_start + out_tidx.data[selected_dim] * step;
Expand Down
20 changes: 10 additions & 10 deletions backends/vulkan/runtime/graph/ops/glsl/softmax.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void softmax_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {

scan_pos[reduce_dim] = tid.x;
vec4 max_elements = texelFetch(tin, scan_pos, 0);
for (int i = tid.x; i < in_meta.sizes[reduce_dim];
for (int i = tid.x; i < safe_idx(in_meta.sizes, reduce_dim);
i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
max_elements = max(max_elements, texelFetch(tin, scan_pos, 0));
}
Expand All @@ -71,7 +71,7 @@ void softmax_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {

scan_pos[reduce_dim] = tid.x;
vec4 denominators = vec4(0);
for (int i = tid.x; i < in_meta.sizes[reduce_dim];
for (int i = tid.x; i < safe_idx(in_meta.sizes, reduce_dim);
i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
denominators += exp(texelFetch(tin, scan_pos, 0) - max_elements);
}
Expand All @@ -83,12 +83,12 @@ void softmax_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
denominators += shared_sum[group_i];
}

const int nspill = mod_4(in_meta.sizes[packed_dim]);
const int nspill = mod_4(safe_idx(in_meta.sizes, packed_dim));
const bool is_last_texel =
scan_pos[packed_dim] == (out_meta.limits[packed_dim] - 1);
scan_pos[packed_dim] == (safe_idx(out_meta.limits, packed_dim) - 1);

scan_pos[reduce_dim] = tid.x;
for (int i = tid.x; i < in_meta.sizes[reduce_dim];
for (int i = tid.x; i < safe_idx(in_meta.sizes, reduce_dim);
i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
const vec4 numerators = op1(texelFetch(tin, scan_pos, 0) - max_elements);
const vec4 safe_denom = max(denominators, vec4(1e-37));
Expand Down Expand Up @@ -124,16 +124,16 @@ void softmax_packed_dim(const ivec2 tid, ivec3 scan_pos) {
const int smi = tid_to_smi(tid);
int group_i;

const int nspill = mod_4(in_meta.sizes[packed_dim]);
const int reduce_len = in_meta.sizes[packed_dim] - nspill;
const int nspill = mod_4(safe_idx(in_meta.sizes, packed_dim));
const int reduce_len = safe_idx(in_meta.sizes, packed_dim) - nspill;

scan_pos[reduce_dim] = tid.x;
vec4 max_elements = vec4(-3.402823e+38);
for (int i = tid.x * 4; i < reduce_len;
i += NWORKERS * 4, scan_pos[reduce_dim] += NWORKERS) {
max_elements = max(max_elements, texelFetch(tin, scan_pos, 0));
}
if (scan_pos[reduce_dim] == out_meta.limits[reduce_dim] - 1 && nspill > 0) {
if (scan_pos[reduce_dim] == safe_idx(out_meta.limits, reduce_dim) - 1 && nspill > 0) {
const vec4 intex = texelFetch(tin, scan_pos, 0);
for (int i = 0; i < nspill; ++i) {
max_elements.x = max(intex[i], max_elements.x);
Expand All @@ -157,7 +157,7 @@ void softmax_packed_dim(const ivec2 tid, ivec3 scan_pos) {
i += NWORKERS * 4, scan_pos[reduce_dim] += NWORKERS) {
denominators += exp(texelFetch(tin, scan_pos, 0) - max_element);
}
if (nspill > 0 && scan_pos[reduce_dim] == out_meta.limits[reduce_dim] - 1) {
if (nspill > 0 && scan_pos[reduce_dim] == safe_idx(out_meta.limits, reduce_dim) - 1) {
const vec4 intex = texelFetch(tin, scan_pos, 0);
for (int i = 0; i < nspill; ++i) {
denominators.x += exp(intex[i] - max_element);
Expand All @@ -182,7 +182,7 @@ void softmax_packed_dim(const ivec2 tid, ivec3 scan_pos) {
const vec4 numerators = op1(texelFetch(tin, scan_pos, 0) - max_element);
imageStore(tout, scan_pos, op2(numerators, safe_denominator));
}
if (nspill > 0 && scan_pos[reduce_dim] == out_meta.limits[reduce_dim] - 1) {
if (nspill > 0 && scan_pos[reduce_dim] == safe_idx(out_meta.limits, reduce_dim) - 1) {
const vec4 numerator = op1(texelFetch(tin, scan_pos, 0) - max_element);
vec4 outtex = op2(numerator, safe_denominator);
[[unroll]] for (int i = nspill; i < 4; ++i) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void main() {
VEC4_T out_texel = VEC4_T(0);

int limit = min(
4, outp.sizes[out_packed_dim] - out_tidx.data[out_packed_dim]);
4, safe_idx(outp.sizes, out_packed_dim) - out_tidx.data[out_packed_dim]);

TensorIndex4D input_tidx = out_tidx;
input_tidx.data[split_dim] += split_offset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ void main() {
VEC4_T out_texel = VEC4_T(0);

int limit = min(
4, outp.sizes[out_packed_dim] - out_tidx.data[out_packed_dim]);
4, safe_idx(outp.sizes, out_packed_dim) - out_tidx.data[out_packed_dim]);
for (int comp = 0; comp < limit; comp++) {
TensorIndex4D in_tidx = out_tidx_to_in_tidx(out_tidx);

Expand Down
13 changes: 7 additions & 6 deletions backends/vulkan/runtime/graph/ops/glsl/var_texture3d.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ shared VEC4_T shared_sum_sq[MAX_NTHREADS];
shared int shared_count[MAX_NTHREADS];

#include "indexing_utils.h"
#include "indexing.glslh"

int tid_to_smi(const ivec2 tid) {
return tid.x + tid.y * NWORKERS;
Expand All @@ -73,7 +74,7 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {
int count = 0;

scan_pos[reduce_dim] = tid.x;
for (int i = tid.x; i < tin_sizes[reduce_dim];
for (int i = tid.x; i < safe_idx(tin_sizes, reduce_dim);
i += NWORKERS, scan_pos[reduce_dim] += NWORKERS) {
VEC4_T val = load_texel(tin, scan_pos);
sum += val;
Expand Down Expand Up @@ -103,11 +104,11 @@ void reduce_nonpacked_dim(const ivec2 tid, ivec3 scan_pos) {

// Determine if there are any padding elements in the final texel of the
// packed dimension
const int nspill = mod4(tin_sizes[packed_dim]);
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
// Detect if this thread is working on the final texels of the packed
// dimension, which may have padding elements
const bool is_last_texel =
scan_pos[packed_dim] == (tin_limits[packed_dim] - 1);
scan_pos[packed_dim] == (safe_idx(tin_limits, packed_dim) - 1);

VEC4_T variance = calculate_variance(sum, sum_sq, count);

Expand Down Expand Up @@ -136,10 +137,10 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
const int smi = tid_to_smi(tid);

// Number of non-padding elements in the last texel in the reduction row
const int nspill = mod4(tin_sizes[packed_dim]);
const int nspill = mod4(safe_idx(tin_sizes, packed_dim));
// Only reduce up to the last "complete" texel. The last texel will need to be
// handled specially if it has padding elements.
const int reduce_len = tin_sizes[packed_dim] - nspill;
const int reduce_len = safe_idx(tin_sizes, packed_dim) - nspill;

VEC4_T sum = VEC4_T(0);
VEC4_T sum_sq = VEC4_T(0);
Expand All @@ -158,7 +159,7 @@ void reduce_packed_dim(const ivec2 tid, ivec3 scan_pos) {
// For the last texel in the dim, if there are padding elements then each
// element of the texel needs to be processed individually such that the
// padding elements are ignored
if (scan_pos[reduce_dim] == tin_limits[reduce_dim] - 1 && nspill > 0) {
if (scan_pos[reduce_dim] == safe_idx(tin_limits, reduce_dim) - 1 && nspill > 0) {
const VEC4_T val = load_texel(tin, scan_pos);
for (int i = 0; i < nspill; i++) {
sum.x += val[i];
Expand Down
2 changes: 1 addition & 1 deletion backends/vulkan/runtime/graph/ops/glsl/where.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ void main() {
VEC4_T outtex = VEC4_T(0);

int limit = min(
4, outp.sizes[out_packed_dim] - out_tidx.data[out_packed_dim]);
4, safe_idx(outp.sizes, out_packed_dim) - out_tidx.data[out_packed_dim]);
for (int comp = 0; comp < limit; comp++) {
TensorIndex4D cond_tidx;
cond_tidx.data = min(out_tidx.data, condp.sizes - 1);
Expand Down
1 change: 0 additions & 1 deletion backends/vulkan/runtime/graph/ops/impl/Slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>

namespace vkcompute {

Expand Down
Loading