From c3baa77db6118a246fcbc945c72db57e41de158b Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Thu, 10 Jul 2025 11:40:43 -0700 Subject: [PATCH] [ET-VK] Adding push constant and ubo verison of select and slice ops to improve memory and performance. Adding push constant and ubo verison of select and slice ops to improve memory and performance. * Updated `transfer_buffer.yaml` and `transfer_texture.yaml` to include `UBO_PARAMS` parameter and generate variants for `select` and `slice` ops with UBO parameters. * Updated `transfer.glsl` to generate ubo and push constant versions of `select` and `slice` ops with UBO parameters. Differential Revision: [D78095262](https://our.internmc.facebook.com/intern/diff/D78095262/) [ghstack-poisoned] --- .../graph/ops/glsl/transfer_buffer.glsl | 19 ++++-- .../graph/ops/glsl/transfer_buffer.yaml | 7 +++ .../graph/ops/glsl/transfer_texture.glsl | 19 ++++-- .../graph/ops/glsl/transfer_texture.yaml | 7 +++ .../runtime/graph/ops/impl/Transfer.cpp | 63 ++++++++++++------- 5 files changed, 84 insertions(+), 31 deletions(-) diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl index 7e95b52d8f4..7605c59c72f 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.glsl @@ -9,6 +9,7 @@ #version 450 core #define PRECISION ${PRECISION} +#define UBO_PARAMS ${UBO_PARAMS} #define VEC4_T ${texel_type(DTYPE)} #define T ${buffer_scalar_type(DTYPE)} @@ -22,12 +23,13 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")} ${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")} -$if OP_NAME == "slice": - ${layout_declare_ubo(B, "int", "start")} - ${layout_declare_ubo(B, "int", "step")} +$if UBO_PARAMS: + $if OP_NAME == "slice": + ${layout_declare_ubo(B, "int", "start")} + ${layout_declare_ubo(B, "int", "step")} -$if OP_NAME == "select": - ${layout_declare_ubo(B, "int", "index")} + $if OP_NAME == "select": + ${layout_declare_ubo(B, "int", "index")} layout(push_constant) uniform restrict Block { ivec4 in_sizes; @@ -35,6 +37,13 @@ layout(push_constant) uniform restrict Block { ivec4 in_strides; int out_numel; int selected_dim; + $if not UBO_PARAMS: + $if OP_NAME == "slice": + int start; + int step; + + $if OP_NAME == "select": + int index; }; ${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml index bdde613c8ce..47440cd0a13 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_buffer.yaml @@ -2,6 +2,7 @@ transfer_buffer: parameter_names_with_default_values: DTYPE: float OP_NAME: select + UBO_PARAMS: False generate_variant_forall: DTYPE: - VALUE: half @@ -11,3 +12,9 @@ transfer_buffer: OP_NAME: select - NAME: slice_buffer OP_NAME: slice + - NAME: select_ubo_buffer + OP_NAME: select + UBO_PARAMS: True + - NAME: slice_ubo_buffer + OP_NAME: slice + UBO_PARAMS: True diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl index d3e25436c04..0f34713cb43 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.glsl @@ -9,6 +9,7 @@ #version 450 core #define PRECISION ${PRECISION} +#define UBO_PARAMS ${UBO_PARAMS} #define VEC4_T ${texel_type(DTYPE)} #define T ${buffer_scalar_type(DTYPE)} @@ -23,17 +24,25 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")} ${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")} -$if OP_NAME == "slice": - ${layout_declare_ubo(B, "int", "start")} - ${layout_declare_ubo(B, "int", "step")} +$if UBO_PARAMS: + $if OP_NAME == "slice": + ${layout_declare_ubo(B, "int", "start")} + ${layout_declare_ubo(B, "int", "step")} -$if OP_NAME == "select": - ${layout_declare_ubo(B, "int", "index")} + $if OP_NAME == "select": + ${layout_declare_ubo(B, "int", "index")} layout(push_constant) uniform restrict Block { ivec4 out_sizes; ivec4 in_sizes; int selected_dim; + $if not UBO_PARAMS: + $if OP_NAME == "slice": + int start; + int step; + + $if OP_NAME == "select": + int index; }; ${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_LAYOUT")} diff --git a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml index f877ee036e4..7484697f097 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/transfer_texture.yaml @@ -2,6 +2,7 @@ transfer_texture: parameter_names_with_default_values: DTYPE: float OP_NAME: select + UBO_PARAMS: False generate_variant_forall: DTYPE: - VALUE: half @@ -11,3 +12,9 @@ transfer_texture: OP_NAME: select - NAME: slice_texture3d OP_NAME: slice + - NAME: select_ubo_texture3d + OP_NAME: select + UBO_PARAMS: True + - NAME: slice_ubo_texture3d + OP_NAME: slice + UBO_PARAMS: True diff --git a/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp index 7b5fad57483..938996e26f1 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Transfer.cpp @@ -40,34 +40,52 @@ void add_transfer_copy_node( int64_t dim_whcn = nchw_dim_to_whcn_dim(dim, ndim); + struct TransferParams { + int32_t dim; + int32_t index_or_start_ref; + int32_t step_ref; + } transfer_params{static_cast(dim_whcn), 0, 0}; + + const bool param_is_scalar = graph.is_scalar(index_or_start_ref) && + (transfer_type == TransferType::SELECT || graph.is_scalar(step_ref)); + vkapi::ParamsBindList param_buffers; - if (transfer_type == TransferType::SELECT) { - param_buffers = { - graph.get_or_create_int_param_buffer(index_or_start_ref, 0)}; - } else { // TransferType::SLICE - param_buffers = { - graph.get_or_create_int_param_buffer(index_or_start_ref, 0), - graph.get_or_create_int_param_buffer(step_ref, 1)}; + if (!param_is_scalar) { + if (transfer_type == TransferType::SELECT) { + param_buffers = { + graph.get_or_create_int_param_buffer(index_or_start_ref, 0)}; + } else { // TransferType::SLICE + param_buffers = { + graph.get_or_create_int_param_buffer(index_or_start_ref, 0), + graph.get_or_create_int_param_buffer(step_ref, 1)}; + } + } else { + transfer_params.index_or_start_ref = + graph.get_or_create_int(index_or_start_ref, 0); + if (transfer_type != TransferType::SELECT) { + transfer_params.step_ref = graph.get_or_create_int(step_ref, 1); + } } - const struct TransferParams { - const int32_t dim; - } transfer_params{static_cast(dim_whcn)}; - std::vector push_constants; + push_constants.reserve(graph.is_buffer_storage(out) ? 5 : 3); if (graph.is_buffer_storage(out)) { - push_constants = { - graph.sizes_pc_of(in), - graph.strides_pc_of(out), - graph.strides_pc_of(in), - graph.numel_pc_of(out), - PushConstantDataInfo(&transfer_params, sizeof(transfer_params))}; + push_constants.emplace_back(graph.sizes_pc_of(in)); + push_constants.emplace_back(graph.strides_pc_of(out)); + push_constants.emplace_back(graph.strides_pc_of(in)); + push_constants.emplace_back(graph.numel_pc_of(out)); } else { - push_constants = { - graph.sizes_pc_of(out), - graph.sizes_pc_of(in), - PushConstantDataInfo(&transfer_params, sizeof(transfer_params))}; + push_constants.emplace_back(graph.sizes_pc_of(out)); + push_constants.emplace_back(graph.sizes_pc_of(in)); + } + + if (param_is_scalar) { + push_constants.emplace_back( + PushConstantDataInfo(&transfer_params, sizeof(transfer_params))); + } else { + push_constants.emplace_back(PushConstantDataInfo( + &transfer_params.dim, sizeof(transfer_params.dim))); } vkapi::SpecVarList spec_vars = { @@ -82,6 +100,9 @@ void add_transfer_copy_node( } else { // TransferType::SLICE kernel_name = "slice"; } + if (!param_is_scalar) { + kernel_name += "_ubo"; + } add_storage_type_suffix(kernel_name, graph.storage_type_of(out)); add_dtype_suffix(kernel_name, graph.dtype_of(out));