From d0d965b828a9b39c14aa6549a02b118528f846fd Mon Sep 17 00:00:00 2001
From: jorgep31415 <jorgepineda140@gmail.com>
Date: Wed, 4 Jun 2025 10:01:06 -0700
Subject: [PATCH 1/3] [ET-VK] Consolidate shader compilation into one
 vkCreateComputePipelines call

Pull Request resolved: https://github.com/pytorch/executorch/pull/11345

We target the QC Adreno driver implementation of Vulkan. The Vulkan API does not enforce how QC actually uses the cache. As the plural naming of `vkCreateComputePipelines` suggests, we observed that the `createInfoCount`, `pCreateInfos` and `pPipelines` arguments above allow construction of multiple compute pipelines in one invocation. We refactor ET-VK to accumulate metadata necessary for pipeline construction and invoke vkCreateComputePipelines only once. QC's implementation maximizes the cache if we create the same number of compute pipelines in fewer invocations of vkCreateComputePipelines. This decreases model load for a sample model from 1.7s to 200ms.
ghstack-source-id: 288170368

Differential Revision: [D75763660](https://our.internmc.facebook.com/intern/diff/D75763660/)
---
 backends/vulkan/runtime/VulkanBackend.cpp     |  1 +
 .../vulkan/runtime/graph/ComputeGraph.cpp     | 50 +++++++++++
 backends/vulkan/runtime/graph/ComputeGraph.h  | 13 +++
 .../vulkan/runtime/graph/ops/DispatchNode.cpp |  5 ++
 .../vulkan/runtime/graph/ops/DispatchNode.h   |  2 +
 .../vulkan/runtime/graph/ops/ExecuteNode.h    |  4 +
 .../vulkan/runtime/graph/ops/PrepackNode.cpp  |  7 ++
 .../vulkan/runtime/graph/ops/PrepackNode.h    |  2 +
 backends/vulkan/runtime/vk_api/Pipeline.cpp   | 82 ++++++++++++++++++-
 backends/vulkan/runtime/vk_api/Pipeline.h     |  9 ++
 10 files changed, 173 insertions(+), 2 deletions(-)
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
index 02df85c33e8..75726ae0892 100644
--- a/backends/vulkan/runtime/VulkanBackend.cpp
+++ b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -495,6 +495,7 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     builder.build_graph();
 
     compute_graph->prepare();
+    compute_graph->prepare_pipelines();
 
     compute_graph->encode_prepack();
     compute_graph->prepack();
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
index 68935e63123..8e498d5f2d1 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.cpp
+++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -561,6 +561,42 @@ void ComputeGraph::update_descriptor_counts(
   }
 }
 
+void ComputeGraph::register_pipeline_to_create(
+    const vkapi::ShaderInfo& shader_info,
+    const utils::WorkgroupSize& local_workgroup_size,
+    const vkapi::SpecVarList& spec_vars,
+    const std::vector<PushConstantDataInfo>& push_constants) {
+  VkDescriptorSetLayout shader_layout =
+      context()->shader_layout_cache().retrieve(shader_info.kernel_layout);
+
+  uint32_t pc_offset = 0;
+  std::array<uint8_t, kMaxPushConstantSize> pc_data;
+  for (const auto& pc : push_constants) {
+    pc_offset += pc.write(pc_data.data(), pc_offset, kMaxPushConstantSize);
+  }
+
+  vkapi::SpecVarList spec_constants = {
+      SV(local_workgroup_size[0u]),
+      SV(local_workgroup_size[1u]),
+      SV(local_workgroup_size[2u])};
+
+  spec_constants.append(spec_vars);
+
+  const vkapi::ComputePipelineCache::Key desc = {
+      context()->pipeline_layout_cache().retrieve(shader_layout, pc_offset),
+      context()->shader_cache().retrieve(shader_info),
+      spec_constants};
+
+  if (context_->pipeline_cache().contains(desc)) {
+    return;
+  }
+  auto it = pipeline_descriptors_.find(desc);
+  if (it != pipeline_descriptors_.cend()) {
+    return;
+  }
+  pipeline_descriptors_.insert(desc);
+}
+
 utils::uvec3 ComputeGraph::create_global_wg_size(const ValueRef idx) {
   if (is_buffer_storage(idx)) {
     return {uint32_t(numel_of(idx)), 1u, 1u};
@@ -670,6 +706,20 @@ void ComputeGraph::prepare() {
   }
 }
 
+void ComputeGraph::prepare_pipelines() {
+  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
+    node->prepare_pipelines(this);
+  }
+  context_->pipeline_cache().create_pipelines(pipeline_descriptors_);
+
+  pipeline_descriptors_ = std::unordered_set<
+      vkapi::ComputePipelineCache::Key,
+      vkapi::ComputePipelineCache::Hasher>();
+}
+
 void ComputeGraph::encode_prepack() {
   for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
     node->encode(this);
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
index 0e8a5eba51f..90f89ea18d6 100644
--- a/backends/vulkan/runtime/graph/ComputeGraph.h
+++ b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -185,6 +185,11 @@ class ComputeGraph final {
   std::vector<IOValueRef> inputs_;
   std::vector<IOValueRef> outputs_;
 
+  std::unordered_set<
+      vkapi::ComputePipelineCache::Key,
+      vkapi::ComputePipelineCache::Hasher>
+      pipeline_descriptors_;
+
  protected:
   size_t values_in_use_ = 0;
   size_t execute_count_ = 0;
@@ -711,8 +716,16 @@ class ComputeGraph final {
       const vkapi::ShaderInfo& shader_info,
       bool execute);
 
+  void register_pipeline_to_create(
+      const vkapi::ShaderInfo& shader_info,
+      const utils::WorkgroupSize& local_workgroup_size,
+      const vkapi::SpecVarList& spec_vars,
+      const std::vector<PushConstantDataInfo>& push_constants);
+
   void prepare();
 
+  void prepare_pipelines();
+
   //
   // Dispatch Utilities
   //
diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp
index a0d3a4c2e5c..b5644cf3dcd 100644
--- a/backends/vulkan/runtime/graph/ops/DispatchNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/DispatchNode.cpp
@@ -35,6 +35,11 @@ DispatchNode::DispatchNode(
   graph.update_descriptor_counts(shader, /*execute = */ true);
 }
 
+void DispatchNode::prepare_pipelines(ComputeGraph* graph) {
+  graph->register_pipeline_to_create(
+      shader_, local_workgroup_size_, spec_vars_, push_constants_);
+}
+
 void DispatchNode::encode(ComputeGraph* graph) {
   if (!shader_) {
     return;
diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h
index db95adfee39..b6eb8624c26 100644
--- a/backends/vulkan/runtime/graph/ops/DispatchNode.h
+++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h
@@ -40,6 +40,8 @@ class DispatchNode : public ExecuteNode {
 
   ~DispatchNode() override = default;
 
+  void prepare_pipelines(ComputeGraph* graph) override;
+
   void encode(ComputeGraph* graph) override;
 
  protected:
diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
index 0731722e13a..6a815b246ef 100644
--- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h
+++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h
@@ -61,6 +61,10 @@ class ExecuteNode {
 
   virtual ~ExecuteNode() = default;
 
+  virtual void prepare_pipelines(ComputeGraph* graph) {
+    (void)graph;
+  }
+
   virtual void encode(ComputeGraph* graph) {
     (void)graph;
   }
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
index d84d893540c..bdbecc866ab 100644
--- a/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
+++ b/backends/vulkan/runtime/graph/ops/PrepackNode.cpp
@@ -67,6 +67,13 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
   return staging;
 }
 
+void PrepackNode::prepare_pipelines(ComputeGraph* graph) {
+  graph->register_pipeline_to_create(
+      shader_, local_workgroup_size_, spec_vars_, push_constants_);
+  graph->register_pipeline_to_create(
+      noop_shader_, utils::WorkgroupSize(1, 1, 1), {}, {});
+}
+
 void PrepackNode::encode(ComputeGraph* graph) {
   api::Context* const context = graph->context();
 
diff --git a/backends/vulkan/runtime/graph/ops/PrepackNode.h b/backends/vulkan/runtime/graph/ops/PrepackNode.h
index a45deb9ff70..8ce8ac9f773 100644
--- a/backends/vulkan/runtime/graph/ops/PrepackNode.h
+++ b/backends/vulkan/runtime/graph/ops/PrepackNode.h
@@ -40,6 +40,8 @@ class PrepackNode final {
 
   ~PrepackNode() = default;
 
+  void prepare_pipelines(ComputeGraph* graph);
+
   void encode(ComputeGraph* graph);
 
   inline void set_node_id(uint32_t node_id) {
diff --git a/backends/vulkan/runtime/vk_api/Pipeline.cpp b/backends/vulkan/runtime/vk_api/Pipeline.cpp
index 5dcb00168b2..994b46b8c76 100644
--- a/backends/vulkan/runtime/vk_api/Pipeline.cpp
+++ b/backends/vulkan/runtime/vk_api/Pipeline.cpp
@@ -270,6 +270,9 @@ void swap(PipelineLayout& lhs, PipelineLayout& rhs) noexcept {
 // ComputePipeline
 //
 
+ComputePipeline::ComputePipeline(VkDevice device, VkPipeline handle)
+    : device_{device}, handle_{handle} {}
+
 ComputePipeline::ComputePipeline(
     VkDevice device,
     const ComputePipeline::Descriptor& descriptor,
@@ -444,19 +447,94 @@ ComputePipelineCache::~ComputePipelineCache() {
   pipeline_cache_ = VK_NULL_HANDLE;
 }
 
+bool ComputePipelineCache::contains(const ComputePipelineCache::Key& key) {
+  std::lock_guard<std::mutex> lock(cache_mutex_);
+
+  auto it = cache_.find(key);
+  return it != cache_.cend();
+}
+
+void ComputePipelineCache::create_pipelines(
+    const std::unordered_set<Key, Hasher>& descriptors) {
+  std::lock_guard<std::mutex> lock(cache_mutex_);
+
+  const auto num_pipelines = descriptors.size();
+  std::vector<VkPipeline> pipelines(num_pipelines);
+
+  std::vector<std::vector<VkSpecializationMapEntry>> map_entries;
+  map_entries.reserve(num_pipelines);
+
+  std::vector<VkSpecializationInfo> specialization_infos;
+  specialization_infos.reserve(num_pipelines);
+
+  std::vector<VkPipelineShaderStageCreateInfo> shader_stage_create_infos;
+  shader_stage_create_infos.reserve(num_pipelines);
+
+  std::vector<VkComputePipelineCreateInfo> create_infos;
+  create_infos.reserve(num_pipelines);
+
+  for (auto& key : descriptors) {
+    map_entries.push_back(key.specialization_constants.generate_map_entries());
+
+    specialization_infos.push_back(VkSpecializationInfo{
+        key.specialization_constants.size(), // mapEntryCount
+        map_entries.back().data(), // pMapEntries
+        key.specialization_constants.data_nbytes(), // dataSize
+        key.specialization_constants.data(), // pData
+    });
+
+    shader_stage_create_infos.push_back(VkPipelineShaderStageCreateInfo{
+        VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // sType
+        nullptr, // pNext
+        0u, // flags
+        VK_SHADER_STAGE_COMPUTE_BIT, // stage
+        key.shader_module, // module
+        "main", // pName
+        &specialization_infos.back(), // pSpecializationInfo
+    });
+
+    create_infos.push_back(VkComputePipelineCreateInfo{
+        VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // sType
+        nullptr, // pNext
+        0u, // flags
+        shader_stage_create_infos.back(), // stage
+        key.pipeline_layout, // layout
+        VK_NULL_HANDLE, // basePipelineHandle
+        0u, // basePipelineIndex
+    });
+  }
+
+  VK_CHECK(vkCreateComputePipelines(
+      device_,
+      pipeline_cache_,
+      create_infos.size(),
+      create_infos.data(),
+      nullptr,
+      pipelines.data()));
+
+  uint32_t i = 0;
+  for (auto& key : descriptors) {
+    auto it = cache_.find(key);
+    if (it != cache_.cend()) {
+      continue;
+    }
+    cache_.insert({key, ComputePipelineCache::Value(device_, pipelines[i])});
+    ++i;
+  }
+}
+
 VkPipeline ComputePipelineCache::retrieve(
     const ComputePipelineCache::Key& key) {
   std::lock_guard<std::mutex> lock(cache_mutex_);
 
   auto it = cache_.find(key);
-  if (cache_.cend() == it) {
+  if (it == cache_.cend()) {
     it = cache_
              .insert(
                  {key,
                   ComputePipelineCache::Value(device_, key, pipeline_cache_)})
              .first;
   }
-
   return it->second.handle();
 }
 
diff --git a/backends/vulkan/runtime/vk_api/Pipeline.h b/backends/vulkan/runtime/vk_api/Pipeline.h
index 1e0fc1e28aa..67dfaebe75b 100644
--- a/backends/vulkan/runtime/vk_api/Pipeline.h
+++ b/backends/vulkan/runtime/vk_api/Pipeline.h
@@ -19,6 +19,7 @@
 
 #include <mutex>
 #include <unordered_map>
+#include <unordered_set>
 
 #define SV(x) ::vkcompute::vkapi::SpecVar(x)
 
@@ -158,6 +159,8 @@ class ComputePipeline final {
     SpecVarList specialization_constants;
   };
 
+  explicit ComputePipeline(VkDevice device, VkPipeline handle);
+
   explicit ComputePipeline(
       VkDevice device,
       const Descriptor& descriptor,
@@ -185,6 +188,10 @@ class ComputePipeline final {
   // does not allow for move assignment. The swap function will
   // be used in the hash map.
   friend void swap(ComputePipeline& lhs, ComputePipeline& rhs) noexcept;
+
+  friend bool operator==(
+      const ComputePipeline::Descriptor& _1,
+      const ComputePipeline::Descriptor& _2);
 };
 
 class PipelineLayoutCache final {
@@ -293,6 +300,8 @@ class ComputePipelineCache final {
   const std::string cache_data_path_;
 
  public:
+  bool contains(const Key&);
+  void create_pipelines(const std::unordered_set<Key, Hasher>&);
   VkPipeline retrieve(const Key&);
   void purge();
 };

From 767b781ba68be96084184633f221e0b5c4fd2e56 Mon Sep 17 00:00:00 2001
From: jorgep31415 <jorgepineda140@gmail.com>
Date: Wed, 4 Jun 2025 10:45:57 -0700
Subject: [PATCH 2/3] [ET-VK] Support setting cache_data_path at run time

Pull Request resolved: https://github.com/pytorch/executorch/pull/11350
TSIA

ghstack-source-id: 288183826
@exported-using-ghexport

Differential Revision: [D75918105](https://our.internmc.facebook.com/intern/diff/D75918105/)
---
 backends/vulkan/runtime/vk_api/Runtime.cpp | 17 ++++++++++++++---
 backends/vulkan/runtime/vk_api/Runtime.h   |  2 ++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/backends/vulkan/runtime/vk_api/Runtime.cpp b/backends/vulkan/runtime/vk_api/Runtime.cpp
index 9b30aaacda8..2073a7364ba 100644
--- a/backends/vulkan/runtime/vk_api/Runtime.cpp
+++ b/backends/vulkan/runtime/vk_api/Runtime.cpp
@@ -258,7 +258,8 @@ uint32_t select_first(const std::vector<Runtime::DeviceMapping>& devices) {
 // Global runtime initialization
 //
 
-std::unique_ptr<Runtime> init_global_vulkan_runtime() {
+std::unique_ptr<Runtime> init_global_vulkan_runtime(
+    const std::string& cache_data_path) {
   // Load Vulkan drivers
 #if defined(USE_VULKAN_VOLK)
   if (VK_SUCCESS != volkInitialize()) {
@@ -278,7 +279,6 @@ std::unique_ptr<Runtime> init_global_vulkan_runtime() {
 #endif /* VULKAN_DEBUG */
   const bool init_default_device = true;
   const uint32_t num_requested_queues = 1; // TODO: raise this value
-  const std::string cache_data_path = ""; // TODO: expose to client
 
   const RuntimeConfig default_config{
       enable_validation_messages,
@@ -377,13 +377,24 @@ uint32_t Runtime::create_adapter(const Selector& selector) {
   return adapter_i;
 }
 
+std::string& set_and_get_pipeline_cache_data_path(
+    const std::string& file_path) {
+  // The global cache data path is declared as a static local variable for the
+  // same reasons as the global runtime below.
+  static std::string global_cache_data_path;
+  if (file_path.size() > 0) {
+    global_cache_data_path = file_path;
+  }
+  return global_cache_data_path;
+}
+
 Runtime* runtime() {
   // The global vulkan runtime is declared as a static local variable within a
   // non-static function to ensure it has external linkage. If it were a global
   // static variable there would be one copy per translation unit that includes
   // Runtime.h as it would have internal linkage.
   static const std::unique_ptr<Runtime> p_runtime =
-      init_global_vulkan_runtime();
+      init_global_vulkan_runtime(set_and_get_pipeline_cache_data_path(""));
 
   VK_CHECK_COND(
       p_runtime,
diff --git a/backends/vulkan/runtime/vk_api/Runtime.h b/backends/vulkan/runtime/vk_api/Runtime.h
index 16f1400021c..c1b67c0dbdc 100644
--- a/backends/vulkan/runtime/vk_api/Runtime.h
+++ b/backends/vulkan/runtime/vk_api/Runtime.h
@@ -100,6 +100,8 @@ class Runtime final {
   uint32_t create_adapter(const Selector&);
 };
 
+std::string& set_and_get_pipeline_cache_data_path(const std::string& file_path);
+
 // The global runtime is retrieved using this function, where it is declared as
 // a static local variable.
 Runtime* runtime();

From e7122c31a5722b15d5f77819fdebcac42d773ab1 Mon Sep 17 00:00:00 2001
From: jorgep31415 <jorgepineda140@gmail.com>
Date: Wed, 4 Jun 2025 10:45:59 -0700
Subject: [PATCH 3/3] [ET-VK] Support setting cache_data_path at build time

Pull Request resolved: https://github.com/pytorch/executorch/pull/11359
TSIA

ghstack-source-id: 288183827
@exported-using-ghexport

Differential Revision: [D75708084](https://our.internmc.facebook.com/intern/diff/D75708084/)
---
 backends/vulkan/runtime/vk_api/Runtime.cpp | 5 +++++
 backends/vulkan/targets.bzl                | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/backends/vulkan/runtime/vk_api/Runtime.cpp b/backends/vulkan/runtime/vk_api/Runtime.cpp
index 2073a7364ba..cadf1184a08 100644
--- a/backends/vulkan/runtime/vk_api/Runtime.cpp
+++ b/backends/vulkan/runtime/vk_api/Runtime.cpp
@@ -381,7 +381,12 @@ std::string& set_and_get_pipeline_cache_data_path(
     const std::string& file_path) {
   // The global cache data path is declared as a static local variable for the
   // same reasons as the global runtime below.
+#if defined(ETVK_DEFAULT_CACHE_PATH)
+  static std::string global_cache_data_path = ETVK_DEFAULT_CACHE_PATH;
+#else
   static std::string global_cache_data_path;
+#endif /* ETVK_DEFAULT_CACHE_PATH */
+
   if (file_path.size() > 0) {
     global_cache_data_path = file_path;
   }
diff --git a/backends/vulkan/targets.bzl b/backends/vulkan/targets.bzl
index 665fde103fc..29578aa177a 100644
--- a/backends/vulkan/targets.bzl
+++ b/backends/vulkan/targets.bzl
@@ -170,6 +170,10 @@ def define_common_targets(is_fbcode = False):
                 "ovr_config//os:macos": mac_flags,
             })
 
+            etvk_default_cache_path = read_config("etvk", "default_cache_path", "")
+            if etvk_default_cache_path != "":
+                VK_API_PREPROCESSOR_FLAGS += ["-DETVK_DEFAULT_CACHE_PATH={}".format(etvk_default_cache_path)]
+
             debug_mode = read_config("etvk", "debug", "0") == "1"
             if debug_mode:
                 VK_API_PREPROCESSOR_FLAGS += ["-DVULKAN_DEBUG"]