From 552a31f4a6d91bc766512e699260416a33d80ed9 Mon Sep 17 00:00:00 2001 From: ssjia Date: Wed, 18 Mar 2026 07:24:28 -0700 Subject: [PATCH 1/2] [ET-VK] Fix staging buffer allocation to check all memory types for HOST_CACHED `test_host_cached_available()` only checked `memoryTypes[0]` to determine if HOST_CACHED memory was available. On Pixel devices, `memoryTypes[0]` is DEVICE_LOCAL without HOST_CACHED, so the function incorrectly returned `SEQUENTIAL_WRITE_BIT`. This caused DEVICE_TO_HOST staging buffers to be allocated in write-combining (uncached) memory, making CPU reads during COPY_OUTPUTS ~170x slower than necessary (~40ms vs ~237us on S24). The fix iterates over all memory types to correctly detect HOST_CACHED support. On-device profiling of edgetam_first_frame_fp16_vulkan.pte confirms the fix: - Pixel 8 Pro COPY_OUTPUTS: 40ms -> 6.3ms (-84%) - Pixel 9 Pro XL COPY_OUTPUTS: 40ms -> 2.5ms (-94%) - Pixel 8 Pro Method::execute: 492ms -> 464ms (-5.7%) - Pixel 9 Pro XL Method::execute: 445ms -> 411ms (-7.6%) Differential Revision: [D97058156](https://our.internmc.facebook.com/intern/diff/D97058156/) [ghstack-poisoned] --- backends/vulkan/runtime/vk_api/memory/Allocator.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp index f5abf3e6d0c..f36b2b0c09e 100644 --- a/backends/vulkan/runtime/vk_api/memory/Allocator.cpp +++ b/backends/vulkan/runtime/vk_api/memory/Allocator.cpp @@ -16,13 +16,12 @@ VmaAllocationCreateFlags test_host_cached_available( VkPhysicalDeviceMemoryProperties mem_props; vkGetPhysicalDeviceMemoryProperties(physical_device, &mem_props); - VkMemoryPropertyFlags const flags = mem_props.memoryTypes->propertyFlags; - - bool const host_visible = flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - bool const host_cached = flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - - if (host_visible && host_cached) { - return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + for (uint32_t i = 0; i < mem_props.memoryTypeCount; i++) { + VkMemoryPropertyFlags flags = mem_props.memoryTypes[i].propertyFlags; + if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + (flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT)) { + return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + } } return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; From c4106b4ea031f5d3f46a04b271d5316f1b422345 Mon Sep 17 00:00:00 2001 From: ssjia Date: Wed, 18 Mar 2026 07:37:25 -0700 Subject: [PATCH 2/2] Update on "[ET-VK] Fix staging buffer allocation to check all memory types for HOST_CACHED" `test_host_cached_available()` only checked `memoryTypes[0]` to determine if HOST_CACHED memory was available. On Pixel devices, `memoryTypes[0]` is DEVICE_LOCAL without HOST_CACHED, so the function incorrectly returned `SEQUENTIAL_WRITE_BIT`. This caused DEVICE_TO_HOST staging buffers to be allocated in write-combining (uncached) memory, making CPU reads during COPY_OUTPUTS ~170x slower than necessary (~40ms vs ~237us on S24). The fix iterates over all memory types to correctly detect HOST_CACHED support. On-device profiling of edgetam_first_frame_fp16_vulkan.pte confirms the fix: - Pixel 8 Pro COPY_OUTPUTS: 40ms -> 6.3ms (-84%) - Pixel 9 Pro XL COPY_OUTPUTS: 40ms -> 2.5ms (-94%) - Pixel 8 Pro Method::execute: 492ms -> 464ms (-5.7%) - Pixel 9 Pro XL Method::execute: 445ms -> 411ms (-7.6%) Differential Revision: [D97058156](https://our.internmc.facebook.com/intern/diff/D97058156/) [ghstack-poisoned]