diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index 59fd561a2c5..1214c89e00a 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -612,6 +612,11 @@ void ComputeGraph::prepare() { if (config_.enable_querypool) { context_->initialize_querypool(); } + + for (SharedObject& shared_object : shared_objects_) { + shared_object.allocate(this); + shared_object.bind_users(this); + } } void ComputeGraph::encode_prepack() { @@ -636,11 +641,6 @@ void ComputeGraph::encode_execute() { context_->cmd_reset_querypool(); - for (SharedObject& shared_object : shared_objects_) { - shared_object.allocate(this); - shared_object.bind_users(this); - } - for (std::unique_ptr& node : execute_nodes_) { node->encode(this); } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index d09597ad778..32763417fc0 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -21,6 +21,7 @@ #include #include +#include #include #include diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h index 172ab49a98a..c45f0a741fd 100644 --- a/backends/vulkan/runtime/graph/ops/DispatchNode.h +++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h @@ -22,7 +22,7 @@ class ComputeGraph; /* * Represents a single shader execution op in a ML model. */ -class DispatchNode final : public ExecuteNode { +class DispatchNode : public ExecuteNode { friend class ComputeGraph; public: @@ -43,9 +43,9 @@ class DispatchNode final : public ExecuteNode { void encode(ComputeGraph* graph) override; protected: - const vkapi::ShaderInfo shader_; - const utils::uvec3 global_workgroup_size_; - const utils::WorkgroupSize local_workgroup_size_; + vkapi::ShaderInfo shader_; + utils::uvec3 global_workgroup_size_; + utils::WorkgroupSize local_workgroup_size_; const vkapi::ParamsBindList params_; const vkapi::SpecVarList spec_vars_; const std::vector push_constants_; diff --git a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp new file mode 100644 index 00000000000..ac84916c6fa --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +namespace vkcompute { + +DynamicDispatchNode::DynamicDispatchNode( + ComputeGraph& graph, + const PickShaderFn& pick_shader_fn, + const PickGlobalFn& pick_global_wg_fn, + const PickLocalFn& pick_local_wg_fn, + const std::vector& args, + const vkapi::ParamsBindList& params, + const std::vector& push_constants, + const vkapi::SpecVarList& spec_vars, + const std::vector& resize_args, + const ResizeFunction& resize_fn) + : DispatchNode( + graph, + pick_shader_fn(&graph, args, resize_args), + pick_global_wg_fn(&graph, args, resize_args), + pick_local_wg_fn(&graph, args, resize_args), + args, + params, + push_constants, + spec_vars, + resize_args, + resize_fn), + pick_shader_fn_(pick_shader_fn), + pick_global_wg_fn_(pick_global_wg_fn), + pick_local_wg_fn_(pick_local_wg_fn) {} + +void DynamicDispatchNode::encode(ComputeGraph* graph) { + shader_ = pick_shader_fn_(graph, args_, resize_args_); + global_workgroup_size_ = pick_global_wg_fn_(graph, args_, resize_args_); + local_workgroup_size_ = + utils::WorkgroupSize(pick_local_wg_fn_(graph, args_, resize_args_)); + DispatchNode::encode(graph); +} + +} // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h new file mode 100644 index 00000000000..ede50941415 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/DynamicDispatchNode.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +#include +#include + +#include + +namespace vkcompute { + +class ComputeGraph; + +/* + * Represents a single shader execution op in a ML model. + */ +class DynamicDispatchNode final : public DispatchNode { + friend class ComputeGraph; + + public: + using PickShaderFn = const std::function&, + const std::vector&)>; + using PickGlobalFn = const std::function&, + const std::vector&)>; + using PickLocalFn = const std::function&, + const std::vector&)>; + + explicit DynamicDispatchNode( + ComputeGraph& graph, + const PickShaderFn& pick_shader_fn, + const PickGlobalFn& pick_global_wg_fn, + const PickLocalFn& pick_local_wg_fn, + const std::vector& args, + const vkapi::ParamsBindList& params, + const std::vector& push_constants, + const vkapi::SpecVarList& spec_vars, + const std::vector& resize_args, + const ResizeFunction& resize_fn = nullptr); + + ~DynamicDispatchNode() override = default; + + void encode(ComputeGraph* graph) override; + + protected: + const PickShaderFn pick_shader_fn_; + const PickGlobalFn pick_global_wg_fn_; + const PickLocalFn pick_local_wg_fn_; + + public: + operator bool() const { + return shader_; + } +}; + +} // namespace vkcompute diff --git a/backends/vulkan/test/glsl/dynamic_dispatch_test.glsl b/backends/vulkan/test/glsl/dynamic_dispatch_test.glsl new file mode 100644 index 00000000000..341da3eeacd --- /dev/null +++ b/backends/vulkan/test/glsl/dynamic_dispatch_test.glsl @@ -0,0 +1,45 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#version 450 core + +#define PRECISION ${PRECISION} + +layout(std430) buffer; + +${layout_declare_tensor(0, "w", "t_out", "float", "texture3d")} +${layout_declare_tensor(1, "r", "t_in1", "float", "texture3d")} +${layout_declare_tensor(2, "r", "t_in2", "float", "texture3d")} + +layout(push_constant) uniform restrict Block { + ivec4 out_sizes; + ivec4 in1_sizes; + ivec4 in2_sizes; +}; + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; + +void main() { + const ivec3 pos = ivec3(gl_GlobalInvocationID); + + if (any(greaterThanEqual(pos, out_sizes.xyz))) { + return; + } + + + vec4 out_texel = vec4(0.0); + for (int row = 0; row < in1_sizes.y; ++row) { + ivec3 in_pos = ivec3(pos.x, row, pos.z); + vec4 in1_texel = texelFetch(t_in1, in_pos, 0); + vec4 in2_texel = texelFetch(t_in2, in_pos, 0); + + out_texel += in1_texel * in2_texel; + } + + imageStore(t_out, pos, out_texel + ${OFFSET}); +} diff --git a/backends/vulkan/test/glsl/dynamic_dispatch_test.yaml b/backends/vulkan/test/glsl/dynamic_dispatch_test.yaml new file mode 100644 index 00000000000..0f0f5f51685 --- /dev/null +++ b/backends/vulkan/test/glsl/dynamic_dispatch_test.yaml @@ -0,0 +1,7 @@ +dynamic_dispatch_test: + parameter_names_with_default_values: + OFFSET: 2.25 + shader_variants: + - NAME: dynamic_dispatch_test_var1 + - NAME: dynamic_dispatch_test_var2 + OFFSET: 5.5 diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index 3b6195a5c26..c4acb41b7b0 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -537,6 +537,59 @@ void execute_graph_and_check_output( } } +vkcompute::ComputeGraph build_mm_graph( + int B, + int M, + int K, + int N, + vkcompute::vkapi::ScalarType dtype, + vkcompute::utils::StorageType in_out_stype, + vkcompute::utils::GPUMemoryLayout memory_layout, + const bool prepack_mat2, + const float mat2_val) { + using namespace vkcompute; + GraphConfig config; + ComputeGraph graph(config); + + std::vector mat1_size = {M, K}; + std::vector mat2_size = {K, N}; + std::vector out_size = {M, N}; + if (B > 1) { + mat1_size.resize(3); + mat1_size = {B, M, K}; + mat2_size.resize(3); + mat2_size = {B, K, N}; + out_size.resize(3); + out_size = {B, M, N}; + } + + IOValueRef mat1 = + graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout); + IOValueRef mat2{}; + + CREATE_RAND_WEIGHT_TENSOR(mat2_w, mat2_size, dtype); + if (mat2_val != 0.0f) { + std::fill(data_mat2_w.begin(), data_mat2_w.end(), mat2_val); + } + + if (prepack_mat2) { + mat2.value = mat2_w; + } else { + mat2.value = + graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout); + mat2.staging = graph.set_input_tensor(mat2.value); + } + + IOValueRef out; + out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout); + + VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value}); + + out.staging = graph.set_output_tensor(out.value); + + return graph; +} + bool check_close(float a, float b, float atol, float rtol) { float max = std::max(std::abs(a), std::abs(b)); float diff = std::abs(a - b); diff --git a/backends/vulkan/test/utils/test_utils.h b/backends/vulkan/test/utils/test_utils.h index f3ee2a717a5..71d6d0bc0de 100644 --- a/backends/vulkan/test/utils/test_utils.h +++ b/backends/vulkan/test/utils/test_utils.h @@ -8,6 +8,8 @@ #pragma once +#include + #include #include @@ -16,6 +18,8 @@ #include #include +#include + #define CREATE_FLOAT_TEXTURE(sizes, allocate_memory) \ vkcompute::api::vTensor( \ vkcompute::api::context(), \ @@ -135,6 +139,22 @@ void record_matmul_texture3d( // Input & Output Utilities // +inline std::vector create_random_float_vector( + const size_t numel, + const float min = 0.0f, + const float max = 1.0f) { + std::vector result(numel); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(min, max); + + for (size_t i = 0; i < numel; ++i) { + result[i] = dis(gen); + } + + return result; +} + inline void fill_staging( vkcompute::api::StagingBuffer& staging, float val, @@ -232,6 +252,22 @@ void execute_graph_and_check_output( std::vector input_vals, std::vector expected_outputs); +#define CREATE_RAND_WEIGHT_TENSOR(name, sizes, dtype) \ + std::vector data_##name = \ + create_random_float_buffer(utils::multiply_integers(sizes)); \ + ValueRef name = graph.add_tensorref(sizes, dtype, data_##name.data()); + +vkcompute::ComputeGraph build_mm_graph( + int B, + int M, + int K, + int N, + vkcompute::vkapi::ScalarType dtype, + vkcompute::utils::StorageType in_out_stype, + vkcompute::utils::GPUMemoryLayout memory_layout, + const bool prepack_mat2 = false, + const float mat2_val = 0.0f); + // // Debugging Utilities // diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 143e6704889..a6475d95d07 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -2753,43 +2753,8 @@ void test_mm( utils::StorageType storage_type, utils::GPUMemoryLayout memory_layout, bool prepack = true) { - GraphConfig config; - config.set_storage_type_override(storage_type); - ComputeGraph graph(config); - - std::vector mat1_size = {M, K}; - std::vector mat2_size = {K, N}; - std::vector out_size = {M, N}; - if (B > 1) { - mat1_size.resize(3); - mat1_size = {B, M, K}; - mat2_size.resize(3); - mat2_size = {B, K, N}; - out_size.resize(3); - out_size = {B, M, N}; - } - - IOValueRef mat2{}; - - CREATE_WEIGHT_TENSOR(mat2_w, mat2_size, dtype, 2.0f); - - // Build graph - - IOValueRef mat1 = graph.add_input_tensor(mat1_size, dtype, memory_layout); - - if (prepack) { - mat2.value = mat2_w; - } else { - mat2.value = graph.add_tensor(mat2_size, dtype, memory_layout); - mat2.staging = graph.set_input_tensor(mat2.value); - } - - IOValueRef out; - out.value = graph.add_tensor(out_size, dtype, memory_layout); - - VK_GET_OP_FN("aten.mm.default")(graph, {mat1.value, mat2.value, out.value}); - - out.staging = graph.set_output_tensor(out.value); + ComputeGraph graph = build_mm_graph( + B, M, K, N, dtype, storage_type, memory_layout, prepack, 2.0f); graph.prepare(); graph.encode_prepack(); @@ -2855,6 +2820,60 @@ TEST(VulkanComputeGraphOpsTest, mm_smoke_test) { #undef RUN_TESTS } +void test_mm_with_resize_reencode( + int B, + int M, + int K, + int N, + vkapi::ScalarType dtype, + utils::StorageType storage_type, + utils::GPUMemoryLayout memory_layout) { + ASSERT_TRUE(M > 1); + + ComputeGraph graph = build_mm_graph( + B, M, K, N, dtype, storage_type, memory_layout, false, 2.0f); + + graph.prepare(); + graph.encode_prepack(); + graph.prepack(); + graph.encode_execute(); + + for (int i = 1; i < 4; i++) { + float val_mat1 = i; + float val_mat2 = i + 1; + float val_out = K * (val_mat1 * val_mat2); + execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out}); + } + + // Switch to GEMV mode + int new_K = K / 2; + std::vector new_mat1_size = {1, new_K}; + std::vector new_mat2_size = {new_K, N}; + graph.resize_input(0, new_mat1_size); + graph.resize_input(1, new_mat2_size); + graph.propagate_resize(); + + graph.encode_execute(); + + for (int i = 1; i < 4; i++) { + float val_mat1 = i; + float val_mat2 = i + 1; + float val_out = new_K * (val_mat1 * val_mat2); + execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out}); + } +} + +TEST(VulkanComputeGraphOpsTest, test_graph_resize_reencode) { + test_mm_with_resize_reencode( + /*B = */ 1, + /*M = */ 31, + /*K = */ 127, + /*N = */ 23, + vkapi::kFloat, + utils::kTexture3D, + utils::kWidthPacked); +} + void test_max_pool2d( const std::vector& in_size, const int64_t base_val, @@ -3278,3 +3297,140 @@ TEST(VulkanComputeGraphOpsTest, test_to_copy) { test_to_copy(); } } + +vkapi::ShaderInfo pick_dynamic_dispatch_shader( + ComputeGraph* graph, + const std::vector& args, + const std::vector& additional_args) { + const ValueRef mat1 = args[1].refs[0]; + + std::string kernel_name = "dynamic_dispatch_test"; + if (graph->size_at(-2, mat1) == 1) { + kernel_name += "_var1"; + } else { + kernel_name += "_var2"; + } + return VK_KERNEL_FROM_STR(kernel_name); +} + +utils::uvec3 pick_dynamic_dispatch_global_wg_size( + ComputeGraph* graph, + const std::vector& args, + const std::vector& additional_args) { + const ValueRef out = args[0].refs[0]; + + return graph->logical_limits_of(out); +} + +utils::uvec3 pick_dynamic_dispatch_local_wg_size( + ComputeGraph* graph, + const std::vector& args, + const std::vector& additional_args) { + return {64, 1, 1}; +} + +void resize_dynamic_dispatch_node( + ComputeGraph* graph, + const std::vector& args, + const std::vector& additional_args) { + const ValueRef out = args[0].refs[0]; + const ValueRef mat1 = args[1].refs[0]; + + std::vector out_sizes = graph->sizes_of(mat1); + out_sizes.at(out_sizes.size() - 2) = 1; + + graph->get_tensor(out)->virtual_resize(out_sizes); +} + +void add_dynamic_dispatch_test_node( + ComputeGraph& graph, + const ValueRef mat1, + const ValueRef mat2, + const ValueRef out) { + graph.execute_nodes().emplace_back(new DynamicDispatchNode( + graph, + pick_dynamic_dispatch_shader, + pick_dynamic_dispatch_global_wg_size, + pick_dynamic_dispatch_local_wg_size, + // Inputs and Outputs + {{out, vkapi::kWrite}, {{mat1, mat2}, vkapi::kRead}}, + // Shader params buffers + {}, + // Push Constants + {graph.sizes_pc_of(out), + graph.sizes_pc_of(mat1), + graph.sizes_pc_of(mat2)}, + // Specialization constants + {}, + // Resize Logic + {}, + resize_dynamic_dispatch_node)); +} + +vkcompute::ComputeGraph build_dynamic_dispatch_test_graph(int M, int N) { + using namespace vkcompute; + GraphConfig config; + ComputeGraph graph(config); + + vkapi::ScalarType dtype = vkapi::kFloat; + utils::StorageType in_out_stype = utils::kTexture3D; + utils::GPUMemoryLayout memory_layout = utils::kWidthPacked; + + std::vector mat1_size = {M, N}; + std::vector mat2_size = {M, N}; + std::vector out_size = {1, N}; + + IOValueRef mat1 = + graph.add_input_tensor(mat1_size, dtype, in_out_stype, memory_layout); + IOValueRef mat2{}; + + mat2.value = graph.add_tensor(mat2_size, dtype, in_out_stype, memory_layout); + mat2.staging = graph.set_input_tensor(mat2.value); + + IOValueRef out; + out.value = graph.add_tensor(out_size, dtype, in_out_stype, memory_layout); + + add_dynamic_dispatch_test_node(graph, mat1, mat2, out); + + out.staging = graph.set_output_tensor(out.value); + + return graph; +} + +void test_dynamic_dispatch(int M, int N) { + ComputeGraph graph = build_dynamic_dispatch_test_graph(M, N); + + graph.prepare(); + graph.encode_prepack(); + graph.prepack(); + graph.encode_execute(); + + for (int i = 1; i < 4; i++) { + float val_mat1 = i; + float val_mat2 = i + 1; + // 5.3 is a hardcoded offset in the compute shader + float val_out = M * (val_mat1 * val_mat2) + 5.5; + execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out}); + } + + // Switch to GEMV mode + int new_N = N / 2; + std::vector new_mat1_size = {1, new_N}; + std::vector new_mat2_size = {1, new_N}; + graph.resize_input(0, new_mat1_size); + graph.resize_input(1, new_mat2_size); + graph.propagate_resize(); + + graph.encode_execute(); + + for (int i = 1; i < 4; i++) { + float val_mat1 = i; + float val_mat2 = i + 1; + float val_out = (val_mat1 * val_mat2) + 2.25; + execute_graph_and_check_output(graph, {val_mat1, val_mat2}, {val_out}); + } +} + +TEST(VulkanComputeGraphOpsTest, test_dynamic_dispatch_graph) { + test_dynamic_dispatch(128, 128); +}