From 6fc849f1945e31b1362b2e0aabd2bbd01da1680c Mon Sep 17 00:00:00 2001 From: ssjia Date: Tue, 10 Mar 2026 16:08:46 -0700 Subject: [PATCH 1/2] [ET-VK] Fix fp16 inference: clamp constants and sync view_convert shader dtype combos Two fixes for force_fp16 inference mode: 1. Clamp constant tensors to the representable range of the effective dtype after conversion in VkGraphBuilder. When force_fp16 converts fp32 constants to fp16, values like -100000 (used as attention mask fill values) overflow to -inf. A subsequent `0 * (-inf)` produces NaN per IEEE 754, which propagates through the entire model. Clamping to finfo(fp16).min/max (-65504/65504) prevents the overflow. 2. Sync dtype combos across view_convert_buffer.yaml and view_convert_texture.yaml so both define the same set of conversion variants. Added float<->half, half<->int32 to the buffer YAML, and float<->half, half<->float, uint8<->float/half/int32 to the texture YAML. Both files now cover all 9 dtype combos: int32<->float, int32<->half, uint8<->float/half/int32, float<->half, half<->float, half<->int32. The float<->half variants are needed by the _to_dim_order_copy operator when running fp16 models (e.g. SceneX). Verified on edgeTAM first frame: mask IoU improved from 0.18 (NaN-corrupted) to 0.9991, matching the fp32 baseline range of 0.9926-0.9995. Also verified SceneX fp16 model (scenex_v9_512_vulkan_fp16_v3.et) no longer crashes with missing view_convert_buffer_float_half shader. Differential Revision: [D96036512](https://our.internmc.facebook.com/intern/diff/D96036512/) ghstack-source-id: 350221317 Pull Request resolved: https://github.com/pytorch/executorch/pull/18076 --- .../runtime/graph/ops/glsl/view_convert_buffer.yaml | 3 +++ .../runtime/graph/ops/glsl/view_convert_texture.yaml | 7 +++++++ backends/vulkan/serialization/vulkan_graph_builder.py | 8 ++++++++ 3 files changed, 18 insertions(+) diff --git a/backends/vulkan/runtime/graph/ops/glsl/view_convert_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/view_convert_buffer.yaml index b001df4024d..200d58e1217 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/view_convert_buffer.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/view_convert_buffer.yaml @@ -19,5 +19,8 @@ view_convert_buffer: - parameter_values: [uint8, half] - parameter_values: [uint8, int32] - parameter_values: [float, int32] + - parameter_values: [float, half] + - parameter_values: [half, float] + - parameter_values: [half, int32] shader_variants: - NAME: view_convert_buffer diff --git a/backends/vulkan/runtime/graph/ops/glsl/view_convert_texture.yaml b/backends/vulkan/runtime/graph/ops/glsl/view_convert_texture.yaml index 2aec8322dfe..47e9c43ee24 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/view_convert_texture.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/view_convert_texture.yaml @@ -14,6 +14,13 @@ view_convert_texture: parameter_names: [IN_DTYPE, OUT_DTYPE] combos: - parameter_values: [int32, float] + - parameter_values: [int32, half] + - parameter_values: [uint8, float] + - parameter_values: [uint8, half] + - parameter_values: [uint8, int32] - parameter_values: [float, int32] + - parameter_values: [float, half] + - parameter_values: [half, float] + - parameter_values: [half, int32] shader_variants: - NAME: view_convert_texture diff --git a/backends/vulkan/serialization/vulkan_graph_builder.py b/backends/vulkan/serialization/vulkan_graph_builder.py index 128015af4f2..ca5ab196dd2 100644 --- a/backends/vulkan/serialization/vulkan_graph_builder.py +++ b/backends/vulkan/serialization/vulkan_graph_builder.py @@ -118,6 +118,14 @@ def maybe_add_constant_tensor(self, node: Node) -> int: # Convert the tensor dtype if needed if tensor.dtype != effective_dtype: + # Clamp float tensors to the representable range of the effective + # dtype to avoid infinities. This is needed when force_fp16 converts + # fp32 constants (e.g. -100000 attention mask values) to fp16 where + # the max representable value is ~65504. + if effective_dtype.is_floating_point: + dtype_info = torch.finfo(effective_dtype) + tensor = tensor.clamp(min=dtype_info.min, max=dtype_info.max) + tensor = tensor.to(effective_dtype) # Serialize tensor data to bytes From 3c651090babec0341c32d53568d4341154f44230 Mon Sep 17 00:00:00 2001 From: ssjia Date: Tue, 10 Mar 2026 16:08:52 -0700 Subject: [PATCH 2/2] [ET-VK][qconv] Use ivec4 reads in pack_q8_conv2d_weights to fix Adreno 740 bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Adreno 740 (Quest 3S), scalar int[] SSBO reads from host-visible staging buffers in the pack_q8_conv2d_weights shader returned incorrect data — specifically t_int8_weight[N] returned the value of t_int8_weight[N-1] for N>0. This caused corrupted conv2d weights for all kernels with kx>0, resulting in 18/45 head-case failures on the SceneX V9 model. Switching the input buffer declaration from scalar int[] to ivec4[] changes the GPU load instruction from scalar loads to vec4 loads, which sidesteps the driver bug. The indexing is updated accordingly: t_int8_weight[word_idx] becomes t_int8_weight[word_idx >> 2][word_idx & 3]. Authored with Claude. Differential Revision: [D96036513](https://our.internmc.facebook.com/intern/diff/D96036513/) ghstack-source-id: 350221316 Pull Request resolved: https://github.com/pytorch/executorch/pull/18077 --- .../runtime/graph/ops/glsl/pack_q8_conv2d_weights.glsl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/pack_q8_conv2d_weights.glsl b/backends/vulkan/runtime/graph/ops/glsl/pack_q8_conv2d_weights.glsl index 91763ad5fa3..5682f044b1d 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/pack_q8_conv2d_weights.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/pack_q8_conv2d_weights.glsl @@ -17,7 +17,7 @@ ${define_active_storage_type(STORAGE)} layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_packed_int8_weight", "int", STORAGE, is_scalar_array=False)} -${layout_declare_tensor(B, "r", "t_int8_weight", "int", "buffer")} +${layout_declare_tensor(B, "r", "t_int8_weight", "int", "buffer", is_scalar_array=False)} layout(push_constant) uniform restrict Block { ivec4 qmat2_sizes; @@ -65,7 +65,8 @@ void main() { if (ic + col < orig_sizes.w) { const int byte_idx = buf_idx + col; const int byte_pos = byte_idx & 3; - weight_vals[col] = (t_int8_weight[byte_idx >> 2] >> (byte_pos * 8)) & 0xFF; + const int word_idx = byte_idx >> 2; + weight_vals[col] = (t_int8_weight[word_idx >> 2][word_idx & 3] >> (byte_pos * 8)) & 0xFF; } } packed_block[row] = pack_into_int32(weight_vals);