From 8e1640fbb2ae0f2dd826d5ecaa028b4e53d3d929 Mon Sep 17 00:00:00 2001 From: ssjia Date: Wed, 8 Apr 2026 07:08:24 -0700 Subject: [PATCH 1/2] [ET-VK] Fix force_fp16 texture bias being silently rejected for CONTIGUOUS_ANY ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `force_fp16` path in `TagMemoryMetaPass` applies `ANY_TEXTURE` to bias ops toward texture storage. However, `try_constrain_with_arg_repset` has a packed-dim compatibility check that requires ALL of the source repset's PDIs to exist in the output repset. `ANY_TEXTURE` has 3 texture layouts (WP, HP, CP) but `CONTIGUOUS_ANY` outputs only support WP, so the check fails and the texture bias is silently dropped. Without the bias, buffer storage cascades from ops that must use buffer (e.g. embedding with vocab exceeding texture limits) into downstream ops that could use texture, causing unnecessary buffer↔texture transitions. Fix: check PDI compatibility against the intersection of arg and source repsets (what would actually be applied) rather than the raw source. The intersection of `ANY_TEXTURE ∩ CONTIGUOUS_ANY` = `WIDTH_PACKED_TEXTURE`, which IS compatible with the output. Authored by Claude. Differential Revision: [D100004702](https://our.internmc.facebook.com/intern/diff/D100004702/) [ghstack-poisoned] --- backends/vulkan/utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/backends/vulkan/utils.py b/backends/vulkan/utils.py index c17f9332e0c..f93fec167eb 100644 --- a/backends/vulkan/utils.py +++ b/backends/vulkan/utils.py @@ -1511,14 +1511,19 @@ def try_constrain_with_arg_repset( if not arg_current_repset.any_in_common(source_repset): return False + # Compute the narrowed repset (intersection of current arg and source). + narrowed = arg_current_repset.make_intersect(source_repset) + if self.sync_primary_io_repr: - if not self.get_out_repset(0).has_compatible_packed_dim_info_set( - source_repset - ): + # Check that the narrowed result is compatible with the output. + # Using the intersection rather than the raw source_repset avoids + # rejecting valid constraints where the source has extra layouts + # (e.g. ANY_TEXTURE includes HP/CP) that don't exist in the output + # but also don't appear in the intersection. + if not self.get_out_repset(0).has_compatible_packed_dim_info_set(narrowed): return False # If this point is reached, then it is possible to constrain - narrowed = arg_current_repset.make_intersect(source_repset) self.args_repset_list[arg_i] = narrowed # Propagate to other synced args via packed-dim compatibility From f8c5861a8ff236ceb09ae08cdd115d2e82fce756 Mon Sep 17 00:00:00 2001 From: ssjia Date: Wed, 8 Apr 2026 08:51:53 -0700 Subject: [PATCH 2/2] Update on "[ET-VK] Fix force_fp16 texture bias being silently rejected for CONTIGUOUS_ANY ops" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `force_fp16` path in `TagMemoryMetaPass` applies `ANY_TEXTURE` to bias ops toward texture storage. However, `try_constrain_with_arg_repset` has a packed-dim compatibility check that requires ALL of the source repset's PDIs to exist in the output repset. `ANY_TEXTURE` has 3 texture layouts (WP, HP, CP) but `CONTIGUOUS_ANY` outputs only support WP, so the check fails and the texture bias is silently dropped. Without the bias, buffer storage cascades from ops that must use buffer (e.g. embedding with vocab exceeding texture limits) into downstream ops that could use texture, causing unnecessary buffer↔texture transitions. Fix: check PDI compatibility against the intersection of arg and source repsets (what would actually be applied) rather than the raw source. The intersection of `ANY_TEXTURE ∩ CONTIGUOUS_ANY` = `WIDTH_PACKED_TEXTURE`, which IS compatible with the output. Authored by Claude. Differential Revision: [D100004702](https://our.internmc.facebook.com/intern/diff/D100004702/) [ghstack-poisoned]