From 76b5fd498caf54ad116b192a6ae3aed6a2a5e3f0 Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Thu, 11 Jun 2026 00:00:52 -0300 Subject: [PATCH 1/8] [dxvk, config] backport dxvk.hideIntegratedGraphics --- src/dxvk/dxvk_instance.cpp | 16 +++++++++++++++- src/dxvk/dxvk_options.cpp | 1 + src/dxvk/dxvk_options.h | 9 +++++++++ src/util/config/config.cpp | 5 +++++ 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/dxvk/dxvk_instance.cpp b/src/dxvk/dxvk_instance.cpp index 43768fb1007..e7ebb0920c3 100644 --- a/src/dxvk/dxvk_instance.cpp +++ b/src/dxvk/dxvk_instance.cpp @@ -175,9 +175,18 @@ namespace dxvk { DxvkDeviceFilter filter(filterFlags); std::vector> result; + uint32_t numDGPU = 0; + uint32_t numIGPU = 0; + for (uint32_t i = 0; i < numAdapters; i++) { - if (filter.testAdapter(deviceProperties[i])) + if (filter.testAdapter(deviceProperties[i])) { + if (deviceProperties[i].deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) + numDGPU += 1; + else if (deviceProperties[i].deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) + numIGPU += 1; + result.push_back(new DxvkAdapter(m_vki, adapters[i])); + } } std::stable_sort(result.begin(), result.end(), @@ -199,6 +208,11 @@ namespace dxvk { return aRank < bRank; }); + if (m_options.hideIntegratedGraphics && numDGPU > 0 && numIGPU > 0) { + result.resize(numDGPU); + numIGPU = 0; + } + if (result.size() == 0) { Logger::warn("DXVK: No adapters found. Please check your " "device filter settings and Vulkan setup."); diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index 656cb7cff10..0c2d02758b2 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -9,6 +9,7 @@ namespace dxvk { useRawSsbo = config.getOption("dxvk.useRawSsbo", Tristate::Auto); shrinkNvidiaHvvHeap = config.getOption("dxvk.shrinkNvidiaHvvHeap", Tristate::Auto); hud = config.getOption("dxvk.hud", ""); + hideIntegratedGraphics = config.getOption ("dxvk.hideIntegratedGraphics", false); enableDyasync = config.getOption ("dxvk.enableDyasync", true); numDyasyncThreads = config.getOption ("dxvk.numDyasyncThreads", 0); } diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index da80edeb196..d6da4f9c2fc 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -18,7 +18,16 @@ namespace dxvk { /// when using the state cache int32_t numCompilerThreads; + // Hides integrated GPUs if dedicated GPUs are + // present. May be necessary for some games that + // incorrectly assume monitor layouts. + bool hideIntegratedGraphics; + + // Enable or disable Dyasync bool enableDyasync; + + // Number of compiler threads + // when using Dyasync int32_t numDyasyncThreads; /// Shader-related options diff --git a/src/util/config/config.cpp b/src/util/config/config.cpp index d5b0517bc2f..99c1a68f5ba 100644 --- a/src/util/config/config.cpp +++ b/src/util/config/config.cpp @@ -470,6 +470,11 @@ namespace dxvk { { R"(\\Kena-Win64-Shipping\.exe$)", {{ { "dxgi.hideIntelGpu", "True" }, }} }, + /* Warcraft 3 Reforged * + * Bugs out on some multi-gpu systems. */ + { R"(\\x86_64\\Warcraft III\.exe$)", {{ + { "dxvk.hideIntegratedGraphics", "True" }, + }} }, /* Earth Defense Force 5 */ { R"(\\EDF5\.exe$)", {{ { "dxgi.tearFree", "False" }, From 2bcebdf5ca8ad71bf0f9d1b61b34bb634063733d Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Thu, 11 Jun 2026 00:15:55 -0300 Subject: [PATCH 2/8] [dxvk, config] backport dxvk.deviceFilter --- src/dxvk/dxvk_device_filter.cpp | 23 ++++++++++++-------- src/dxvk/dxvk_device_filter.h | 37 ++++++++++++++++++--------------- src/dxvk/dxvk_instance.cpp | 2 +- src/dxvk/dxvk_options.cpp | 1 + src/dxvk/dxvk_options.h | 3 +++ 5 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/dxvk/dxvk_device_filter.cpp b/src/dxvk/dxvk_device_filter.cpp index dfb5edaf4e8..8d140c2be4e 100644 --- a/src/dxvk/dxvk_device_filter.cpp +++ b/src/dxvk/dxvk_device_filter.cpp @@ -1,21 +1,26 @@ #include "dxvk_device_filter.h" namespace dxvk { - - DxvkDeviceFilter::DxvkDeviceFilter(DxvkDeviceFilterFlags flags) + + DxvkDeviceFilter::DxvkDeviceFilter( + DxvkDeviceFilterFlags flags, + const DxvkOptions& options) : m_flags(flags) { m_matchDeviceName = env::getEnvVar("DXVK_FILTER_DEVICE_NAME"); - + + if (m_matchDeviceName.empty()) + m_matchDeviceName = options.deviceFilter; + if (m_matchDeviceName.size() != 0) m_flags.set(DxvkDeviceFilterFlag::MatchDeviceName); } - - + + DxvkDeviceFilter::~DxvkDeviceFilter() { - + } - - + + bool DxvkDeviceFilter::testAdapter(const VkPhysicalDeviceProperties& properties) const { if (properties.apiVersion < VK_MAKE_VERSION(1, 1, 0)) { Logger::warn(str::format("Skipping Vulkan 1.0 adapter: ", properties.deviceName)); @@ -34,5 +39,5 @@ namespace dxvk { return true; } - + } diff --git a/src/dxvk/dxvk_device_filter.h b/src/dxvk/dxvk_device_filter.h index 7b411e6ad29..9cf50a7ec1c 100644 --- a/src/dxvk/dxvk_device_filter.h +++ b/src/dxvk/dxvk_device_filter.h @@ -1,12 +1,13 @@ #pragma once #include "dxvk_adapter.h" +#include "dxvk_options.h" namespace dxvk { - + /** * \brief Device filter flags - * + * * The device filter flags specify which device * properties are considered when testing adapters. * If no flags are set, all devices pass the test. @@ -15,40 +16,42 @@ namespace dxvk { MatchDeviceName = 0, SkipCpuDevices = 1, }; - + using DxvkDeviceFilterFlags = Flags; - - + + /** * \brief DXVK device filter - * + * * Used to select specific Vulkan devices to use * with DXVK. This may be useful for games which * do not offer an option to select the correct * device. */ class DxvkDeviceFilter { - + public: - - DxvkDeviceFilter(DxvkDeviceFilterFlags flags); + + DxvkDeviceFilter( + DxvkDeviceFilterFlags flags, + const DxvkOptions& options); ~DxvkDeviceFilter(); - + /** * \brief Tests an adapter - * + * * \param [in] properties Adapter properties * \returns \c true if the test passes */ bool testAdapter( const VkPhysicalDeviceProperties& properties) const; - + private: - + DxvkDeviceFilterFlags m_flags; - + std::string m_matchDeviceName; - + }; - -} \ No newline at end of file + +} diff --git a/src/dxvk/dxvk_instance.cpp b/src/dxvk/dxvk_instance.cpp index e7ebb0920c3..7b1a9d4b62e 100644 --- a/src/dxvk/dxvk_instance.cpp +++ b/src/dxvk/dxvk_instance.cpp @@ -172,7 +172,7 @@ namespace dxvk { filterFlags.set(DxvkDeviceFilterFlag::SkipCpuDevices); } - DxvkDeviceFilter filter(filterFlags); + DxvkDeviceFilter filter(filterFlags, m_options); std::vector> result; uint32_t numDGPU = 0; diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index 0c2d02758b2..6ada7274083 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -10,6 +10,7 @@ namespace dxvk { shrinkNvidiaHvvHeap = config.getOption("dxvk.shrinkNvidiaHvvHeap", Tristate::Auto); hud = config.getOption("dxvk.hud", ""); hideIntegratedGraphics = config.getOption ("dxvk.hideIntegratedGraphics", false); + deviceFilter = config.getOption("dxvk.deviceFilter", ""); enableDyasync = config.getOption ("dxvk.enableDyasync", true); numDyasyncThreads = config.getOption ("dxvk.numDyasyncThreads", 0); } diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index d6da4f9c2fc..7d83ff68cb9 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -23,6 +23,9 @@ namespace dxvk { // incorrectly assume monitor layouts. bool hideIntegratedGraphics; + /// Device name + std::string deviceFilter; + // Enable or disable Dyasync bool enableDyasync; From aa24d71015f3b84b40b4d027d29b56bbd53c5a1f Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Thu, 11 Jun 2026 00:27:32 -0300 Subject: [PATCH 3/8] [dxvk, config] backport dxvk.maxMemoryBudget --- src/dxvk/dxvk_memory.cpp | 111 ++++++++++++++++++++------------------ src/dxvk/dxvk_options.cpp | 2 + src/dxvk/dxvk_options.h | 3 ++ 3 files changed, 63 insertions(+), 53 deletions(-) diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index 2fe01c1d73a..d6f607846d0 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -4,7 +4,7 @@ #include "dxvk_memory.h" namespace dxvk { - + DxvkMemory::DxvkMemory() { } DxvkMemory::DxvkMemory( DxvkMemoryAllocator* alloc, @@ -21,8 +21,8 @@ namespace dxvk { m_offset (offset), m_length (length), m_mapPtr (mapPtr) { } - - + + DxvkMemory::DxvkMemory(DxvkMemory&& other) : m_alloc (std::exchange(other.m_alloc, nullptr)), m_chunk (std::exchange(other.m_chunk, nullptr)), @@ -31,8 +31,8 @@ namespace dxvk { m_offset (std::exchange(other.m_offset, 0)), m_length (std::exchange(other.m_length, 0)), m_mapPtr (std::exchange(other.m_mapPtr, nullptr)) { } - - + + DxvkMemory& DxvkMemory::operator = (DxvkMemory&& other) { this->free(); m_alloc = std::exchange(other.m_alloc, nullptr); @@ -44,18 +44,18 @@ namespace dxvk { m_mapPtr = std::exchange(other.m_mapPtr, nullptr); return *this; } - - + + DxvkMemory::~DxvkMemory() { this->free(); } - - + + void DxvkMemory::free() { if (m_alloc != nullptr) m_alloc->free(*this); } - + DxvkMemoryChunk::DxvkMemoryChunk( DxvkMemoryAllocator* alloc, @@ -66,15 +66,15 @@ namespace dxvk { // Mark the entire chunk as free m_freeList.push_back(FreeSlice { 0, memory.memSize }); } - - + + DxvkMemoryChunk::~DxvkMemoryChunk() { // This call is technically not thread-safe, but it // doesn't need to be since we don't free chunks m_alloc->freeDeviceMemory(m_type, m_memory); } - - + + DxvkMemory DxvkMemoryChunk::alloc( VkMemoryPropertyFlags flags, VkDeviceSize size, @@ -84,15 +84,15 @@ namespace dxvk { // be refined a bit in the future if necessary. if (m_memory.memFlags != flags || !checkHints(hints)) return DxvkMemory(); - + // If the chunk is full, return if (m_freeList.size() == 0) return DxvkMemory(); - + // Select the slice to allocate from in a worst-fit // manner. This may help keep fragmentation low. auto bestSlice = m_freeList.begin(); - + for (auto slice = m_freeList.begin(); slice != m_freeList.end(); slice++) { if (slice->length == size) { bestSlice = slice; @@ -101,34 +101,34 @@ namespace dxvk { bestSlice = slice; } } - + // We need to align the allocation to the requested alignment const VkDeviceSize sliceStart = bestSlice->offset; const VkDeviceSize sliceEnd = bestSlice->offset + bestSlice->length; - + const VkDeviceSize allocStart = dxvk::align(sliceStart, align); const VkDeviceSize allocEnd = dxvk::align(allocStart + size, align); - + if (allocEnd > sliceEnd) return DxvkMemory(); - + // We can use this slice, but we'll have to add // the unused parts of it back to the free list. m_freeList.erase(bestSlice); - + if (allocStart != sliceStart) m_freeList.push_back({ sliceStart, allocStart - sliceStart }); - + if (allocEnd != sliceEnd) m_freeList.push_back({ allocEnd, sliceEnd - allocEnd }); - + // Create the memory object with the aligned slice return DxvkMemory(m_alloc, this, m_type, m_memory.memHandle, allocStart, allocEnd - allocStart, reinterpret_cast(m_memory.memPointer) + allocStart); } - - + + void DxvkMemoryChunk::free( VkDeviceSize offset, VkDeviceSize length) { @@ -136,7 +136,7 @@ namespace dxvk { // a new slice that covers all those entries. Without doing // so, the slice could not be reused for larger allocations. auto curr = m_freeList.begin(); - + while (curr != m_freeList.end()) { if (curr->offset == offset + length) { length += curr->length; @@ -149,11 +149,11 @@ namespace dxvk { curr++; } } - + m_freeList.push_back({ offset, length }); } - - + + bool DxvkMemoryChunk::isEmpty() const { return m_freeList.size() == 1 && m_freeList[0].length == m_memory.memSize; @@ -184,6 +184,8 @@ namespace dxvk { m_device (device), m_devProps (device->adapter()->deviceProperties()), m_memProps (device->adapter()->memoryProperties()) { + VkDeviceSize maxBudget = m_device->config().maxMemoryBudget; + for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) { m_memHeaps[i].properties = m_memProps.memoryHeaps[i]; m_memHeaps[i].stats = DxvkMemoryStats { 0, 0 }; @@ -194,8 +196,11 @@ namespace dxvk { if ((m_memProps.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) && (m_device->isUnifiedMemoryArchitecture())) m_memHeaps[i].budget = (8 * m_memProps.memoryHeaps[i].size) / 10; + + if (maxBudget && (!m_memHeaps[i].budget || m_memHeaps[i].budget > maxBudget)) + m_memHeaps[i].budget = maxBudget; } - + for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { m_memTypes[i].heap = &m_memHeaps[m_memProps.memoryTypes[i].heapIndex]; m_memTypes[i].heapId = m_memProps.memoryTypes[i].heapIndex; @@ -233,13 +238,13 @@ namespace dxvk { } } } - - + + DxvkMemoryAllocator::~DxvkMemoryAllocator() { - + } - - + + DxvkMemory DxvkMemoryAllocator::alloc( const VkMemoryRequirements* req, const VkMemoryDedicatedRequirements& dedAllocReq, @@ -281,14 +286,14 @@ namespace dxvk { VkMemoryPropertyFlags optFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; VkMemoryPropertyFlags remFlags = 0; - + while (!result && (flags & optFlags)) { remFlags |= optFlags & -optFlags; optFlags &= ~remFlags; result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, hints); } - + if (!result) { DxvkAdapterMemoryInfo memHeapInfo = m_device->adapter()->getMemoryHeapInfo(); @@ -314,11 +319,11 @@ namespace dxvk { throw DxvkError("DxvkMemoryAllocator: Memory allocation failed"); } - + return result; } - - + + DxvkMemory DxvkMemoryAllocator::tryAlloc( const VkMemoryRequirements* req, const VkMemoryDedicatedAllocateInfo* dedAllocInfo, @@ -329,17 +334,17 @@ namespace dxvk { for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) { const bool supported = (req->memoryTypeBits & (1u << i)) != 0; const bool adequate = (m_memTypes[i].memType.propertyFlags & flags) == flags; - + if (supported && adequate) { result = this->tryAllocFromType(&m_memTypes[i], flags, req->size, req->alignment, hints, dedAllocInfo); } } - + return result; } - - + + DxvkMemory DxvkMemoryAllocator::tryAllocFromType( DxvkMemoryType* type, VkMemoryPropertyFlags flags, @@ -363,10 +368,10 @@ namespace dxvk { } else { for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) memory = type->chunks[i]->alloc(flags, size, align, hints); - + if (!memory) { DxvkDeviceMemory devMem; - + if (this->shouldFreeEmptyChunks(type->heap, chunkSize)) this->freeEmptyChunks(type->heap); @@ -387,8 +392,8 @@ namespace dxvk { return memory; } - - + + DxvkDeviceMemory DxvkMemoryAllocator::tryAllocDeviceMemory( DxvkMemoryType* type, VkMemoryPropertyFlags flags, @@ -397,7 +402,7 @@ namespace dxvk { const VkMemoryDedicatedAllocateInfo* dedAllocInfo) { bool useMemoryPriority = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && (m_device->features().extMemoryPriority.memoryPriority); - + if (type->heap->budget && type->heap->stats.memoryAllocated + size > type->heap->budget) return DxvkDeviceMemory(); @@ -426,7 +431,7 @@ namespace dxvk { if (m_vkd->vkAllocateMemory(m_vkd->device(), &info, nullptr, &result.memHandle) != VK_SUCCESS) return DxvkDeviceMemory(); - + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VkResult status = m_vkd->vkMapMemory(m_vkd->device(), result.memHandle, 0, VK_WHOLE_SIZE, 0, &result.memPointer); @@ -463,7 +468,7 @@ namespace dxvk { } } - + void DxvkMemoryAllocator::freeChunkMemory( DxvkMemoryType* type, DxvkMemoryChunk* chunk, @@ -483,7 +488,7 @@ namespace dxvk { type->chunks.push_back(std::move(chunkRef)); } } - + void DxvkMemoryAllocator::freeDeviceMemory( DxvkMemoryType* type, diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index 6ada7274083..49d90b52e8c 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -11,6 +11,8 @@ namespace dxvk { hud = config.getOption("dxvk.hud", ""); hideIntegratedGraphics = config.getOption ("dxvk.hideIntegratedGraphics", false); deviceFilter = config.getOption("dxvk.deviceFilter", ""); + auto budget = config.getOption("dxvk.maxMemoryBudget", 0); + maxMemoryBudget = VkDeviceSize(std::max(budget, 0)) << 20u; enableDyasync = config.getOption ("dxvk.enableDyasync", true); numDyasyncThreads = config.getOption ("dxvk.numDyasyncThreads", 0); } diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index 7d83ff68cb9..f135beecbd8 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -26,6 +26,9 @@ namespace dxvk { /// Device name std::string deviceFilter; + /// Memory budget in bytes + VkDeviceSize maxMemoryBudget; + // Enable or disable Dyasync bool enableDyasync; From 420b68034bb6d3031a49b401686d5f7adfbf6b93 Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Thu, 11 Jun 2026 00:58:07 -0300 Subject: [PATCH 4/8] [dxvk, config] backport dxvk.tilerMode tilerMode is only partially active. preferCachedMemory biases host-visible allocations toward cached memory on tilers and works. preferRenderPassOps is detected and set but not yet consumed by Sarek render-pass code, which diverges significantly from upstream; wiring it requires changes to the context/render-pass layer and on-device testing, left as a future todo. --- src/dxvk/dxvk_adapter.cpp | 6 ++ src/dxvk/dxvk_adapter.h | 93 ++++++++++++++------------- src/dxvk/dxvk_device.cpp | 106 +++++++++++++++++-------------- src/dxvk/dxvk_device.h | 128 +++++++++++++++++++------------------- src/dxvk/dxvk_memory.cpp | 17 ++++- src/dxvk/dxvk_options.cpp | 1 + src/dxvk/dxvk_options.h | 7 +++ 7 files changed, 207 insertions(+), 151 deletions(-) diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp index b29d096e63f..265a333e7f1 100644 --- a/src/dxvk/dxvk_adapter.cpp +++ b/src/dxvk/dxvk_adapter.cpp @@ -530,6 +530,12 @@ namespace dxvk { } + bool DxvkAdapter::matchesDriver( + VkDriverIdKHR driver) const { + return driver == m_deviceInfo.khrDeviceDriverProperties.driverID; + } + + void DxvkAdapter::logAdapterInfo() const { VkPhysicalDeviceProperties deviceInfo = this->deviceProperties(); VkPhysicalDeviceMemoryProperties memoryInfo = this->memoryProperties(); diff --git a/src/dxvk/dxvk_adapter.h b/src/dxvk/dxvk_adapter.h index f2fb1f0af75..b954ca274c8 100644 --- a/src/dxvk/dxvk_adapter.h +++ b/src/dxvk/dxvk_adapter.h @@ -5,10 +5,10 @@ #include "dxvk_include.h" namespace dxvk { - + class DxvkDevice; class DxvkInstance; - + /** * \brief GPU vendors * Based on PCIe IDs. @@ -21,7 +21,7 @@ namespace dxvk { /** * \brief Adapter memory heap info - * + * * Stores info about a heap, and the amount * of memory allocated from it by the app. */ @@ -33,7 +33,7 @@ namespace dxvk { /** * \brief Adapter memory info - * + * * Stores properties and allocation * info of each available heap. */ @@ -49,23 +49,23 @@ namespace dxvk { uint32_t graphics; uint32_t transfer; }; - + /** * \brief DXVK adapter - * + * * Corresponds to a physical device in Vulkan. Provides * all kinds of information about the device itself and * the supported feature set. */ class DxvkAdapter : public RcObject { - + public: - + DxvkAdapter( const Rc& vki, VkPhysicalDevice handle); ~DxvkAdapter(); - + /** * \brief Vulkan instance functions * \returns Vulkan instance functions @@ -73,7 +73,7 @@ namespace dxvk { Rc vki() const { return m_vki; } - + /** * \brief Physical device handle * \returns The adapter handle @@ -81,10 +81,10 @@ namespace dxvk { VkPhysicalDevice handle() const { return m_handle; } - + /** * \brief Physical device properties - * + * * Returns a read-only reference to the core * properties of the Vulkan physical device. * \returns Physical device core properties @@ -95,7 +95,7 @@ namespace dxvk { /** * \brief Device info - * + * * Returns a read-only reference to the full * device info structure, including extended * properties. @@ -104,20 +104,20 @@ namespace dxvk { const DxvkDeviceInfo& devicePropertiesExt() const { return m_deviceInfo; } - + /** * \brief Supportred device features - * + * * Queries the supported device features. * \returns Device features */ const DxvkDeviceFeatures& features() const { return m_deviceFeatures; } - + /** * \brief Retrieves memory heap info - * + * * Returns properties of all available memory heaps, * both device-local and non-local heaps, and the * amount of memory allocated from those heaps by @@ -125,10 +125,10 @@ namespace dxvk { * \returns Memory heap info */ DxvkAdapterMemoryInfo getMemoryHeapInfo() const; - + /** * \brief Memory properties - * + * * Queries the memory types and memory heaps of * the device. This is useful for memory allocators. * \returns Device memory properties @@ -137,16 +137,16 @@ namespace dxvk { /** * \brief Queries format support - * + * * \param [in] format The format to query * \returns Format support info */ VkFormatProperties formatProperties( VkFormat format) const; - + /** * \brief Queries image format support - * + * * \param [in] format Format to query * \param [in] type Image type * \param [in] tiling Image tiling @@ -162,22 +162,22 @@ namespace dxvk { VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties& properties) const; - + /** * \brief Retrieves queue family indices * \returns Indices for all queue families */ DxvkAdapterQueueIndices findQueueFamilies() const; - + /** * \brief Tests whether all required features are supported - * + * * \param [in] features Required device features * \returns \c true if all features are supported */ bool checkFeatureSupport( const DxvkDeviceFeatures& required) const; - + /** * \brief Enables extensions for this adapter * @@ -188,10 +188,10 @@ namespace dxvk { */ void enableExtensions( const DxvkNameSet& extensions); - + /** * \brief Creates a DXVK device - * + * * Creates a logical device for this adapter. * \param [in] instance Parent instance * \param [in] enabledFeatures Device features @@ -200,10 +200,10 @@ namespace dxvk { Rc createDevice( const Rc& instance, DxvkDeviceFeatures enabledFeatures); - + /** * \brief Registers memory allocation - * + * * Updates memory alloc info accordingly. * \param [in] heap Memory heap index * \param [in] bytes Allocation size @@ -211,10 +211,10 @@ namespace dxvk { void notifyHeapMemoryAlloc( uint32_t heap, VkDeviceSize bytes); - + /** * \brief Registers memory deallocation - * + * * Updates memory alloc info accordingly. * \param [in] heap Memory heap index * \param [in] bytes Allocation size @@ -222,7 +222,7 @@ namespace dxvk { void notifyHeapMemoryFree( uint32_t heap, VkDeviceSize bytes); - + /** * \brief Tests if the driver matches certain criteria * @@ -238,15 +238,24 @@ namespace dxvk { VkDriverIdKHR driver, uint32_t minVer, uint32_t maxVer) const; - + + /** + * \brief Tests whether the driver matches a given driver ID + * + * \param [in] driver Driver ID to match against + * \returns \c true if the driver ID matches + */ + bool matchesDriver( + VkDriverIdKHR driver) const; + /** * \brief Logs DXVK adapter info - * + * * May be useful for bug reports * and general troubleshooting. */ void logAdapterInfo() const; - + /** * \brief Checks whether this is a UMA system * @@ -255,9 +264,9 @@ namespace dxvk { * \returns \c true if the system has unified memory. */ bool isUnifiedMemoryArchitecture() const; - + private: - + Rc m_vki; VkPhysicalDevice m_handle; @@ -267,7 +276,7 @@ namespace dxvk { DxvkDeviceFeatures m_deviceFeatures; bool m_hasMemoryBudget; - + std::vector m_queueFamilies; std::array, VK_MAX_MEMORY_HEAPS> m_heapAlloc; @@ -281,11 +290,11 @@ namespace dxvk { uint32_t findQueueFamily( VkQueueFlags mask, VkQueueFlags flags) const; - + static void logNameList(const DxvkNameList& names); static void logFeatures(const DxvkDeviceFeatures& features); static void logQueueFamilies(const DxvkAdapterQueueIndices& queues); - + }; - + } diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index efe8354e27d..b57e6e0886a 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -2,7 +2,7 @@ #include "dxvk_instance.h" namespace dxvk { - + DxvkDevice::DxvkDevice( const Rc& instance, const Rc& adapter, @@ -23,8 +23,8 @@ namespace dxvk { m_queues.graphics = getQueue(queueFamilies.graphics, 0); m_queues.transfer = getQueue(queueFamilies.transfer, 0); } - - + + DxvkDevice::~DxvkDevice() { // If we are being destroyed during/after DLL process detachment // from TerminateProcess, etc, our CS threads are already destroyed @@ -60,10 +60,10 @@ namespace dxvk { VkPipelineStageFlags result = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - + if (m_features.core.features.geometryShader) result |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; - + if (m_features.core.features.tessellationShader) { result |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; @@ -79,14 +79,14 @@ namespace dxvk { options.maxNumDynamicStorageBuffers = m_properties.core.properties.limits.maxDescriptorSetStorageBuffersDynamic; return options; } - - + + Rc DxvkDevice::createCommandList() { Rc cmdList = m_recycledCommandLists.retrieveObject(); - + if (cmdList == nullptr) cmdList = new DxvkCommandList(this); - + return cmdList; } @@ -96,11 +96,11 @@ namespace dxvk { if (pool == nullptr) pool = new DxvkDescriptorPool(m_vkd); - + return pool; } - - + + Rc DxvkDevice::createContext() { return new DxvkContext(this); } @@ -117,63 +117,63 @@ namespace dxvk { uint32_t index) { return new DxvkGpuQuery(m_vkd, type, flags, index); } - - + + Rc DxvkDevice::createFence( const DxvkFenceCreateInfo& fenceInfo) { return new DxvkFence(this, fenceInfo); } - - + + Rc DxvkDevice::createFramebuffer( const DxvkFramebufferInfo& info) { return new DxvkFramebuffer(m_vkd, info); } - - + + Rc DxvkDevice::createBuffer( const DxvkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memoryType) { return new DxvkBuffer(this, createInfo, m_objects.memoryManager(), memoryType); } - - + + Rc DxvkDevice::createBufferView( const Rc& buffer, const DxvkBufferViewCreateInfo& createInfo) { return new DxvkBufferView(m_vkd, buffer, createInfo); } - - + + Rc DxvkDevice::createImage( const DxvkImageCreateInfo& createInfo, VkMemoryPropertyFlags memoryType) { return new DxvkImage(this, createInfo, m_objects.memoryManager(), memoryType); } - - + + Rc DxvkDevice::createImageFromVkImage( const DxvkImageCreateInfo& createInfo, VkImage image) { return new DxvkImage(this, createInfo, image); } - + Rc DxvkDevice::createImageView( const Rc& image, const DxvkImageViewCreateInfo& createInfo) { return new DxvkImageView(m_vkd, image, createInfo); } - - + + Rc DxvkDevice::createSampler( const DxvkSamplerCreateInfo& createInfo) { return new DxvkSampler(this, createInfo); } - - + + DxvkStatCounters DxvkDevice::getStatCounters() { DxvkPipelineCount pipe = m_objects.pipelineManager().getPipelineCount(); - + DxvkStatCounters result; result.setCtr(DxvkStatCounter::PipeCountGraphics, pipe.numGraphicsPipelines); result.setCtr(DxvkStatCounter::PipeCountCompute, pipe.numComputePipelines); @@ -184,8 +184,8 @@ namespace dxvk { result.merge(m_statCounters); return result; } - - + + DxvkMemoryStats DxvkDevice::getMemoryStats(uint32_t heap) { return m_objects.memoryManager().getMemoryStats(heap); } @@ -194,8 +194,8 @@ namespace dxvk { uint32_t DxvkDevice::getCurrentFrameId() const { return m_statCounters.getCtr(DxvkStatCounter::QueuePresentCount); } - - + + void DxvkDevice::initResources() { m_objects.dummyResources().clearResources(this); } @@ -204,8 +204,8 @@ namespace dxvk { void DxvkDevice::registerShader(const Rc& shader) { m_objects.pipelineManager().registerShader(shader); } - - + + void DxvkDevice::presentImage( const Rc& presenter, DxvkSubmitStatus* status) { @@ -214,7 +214,7 @@ namespace dxvk { DxvkPresentInfo presentInfo; presentInfo.presenter = presenter; m_submissionQueue.present(presentInfo, status); - + std::lock_guard statLock(m_statLock); m_statCounters.addCtr(DxvkStatCounter::QueuePresentCount, 1); } @@ -234,8 +234,8 @@ namespace dxvk { m_statCounters.merge(commandList->statCounters()); m_statCounters.addCtr(DxvkStatCounter::QueueSubmitCount, 1); } - - + + VkResult DxvkDevice::waitForSubmission(DxvkSubmitStatus* status) { VkResult result = status->result.load(); @@ -264,16 +264,16 @@ namespace dxvk { m_statCounters.addCtr(DxvkStatCounter::GpuSyncTicks, us.count()); } } - - + + void DxvkDevice::waitForIdle() { this->lockSubmission(); if (m_vkd->vkDeviceWaitIdle(m_vkd->device()) != VK_SUCCESS) Logger::err("DxvkDevice: waitForIdle: Operation failed"); this->unlockSubmission(); } - - + + DxvkDevicePerfHints DxvkDevice::getPerfHints() { DxvkDevicePerfHints hints; hints.preferFbDepthStencilCopy = m_extensions.extShaderStencilExport @@ -283,6 +283,22 @@ namespace dxvk { hints.preferFbResolve = m_extensions.amdShaderFragmentMask && (m_adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, 0, 0) || m_adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_AMD_PROPRIETARY_KHR, 0, 0)); + + // Detect tiler GPUs. Currently only used to prefer cached memory + // for host-visible allocations; render-pass handling is a todo. + bool tilerMode = m_adapter->matchesDriver(VK_DRIVER_ID_MESA_TURNIP) + || m_adapter->matchesDriver(VK_DRIVER_ID_QUALCOMM_PROPRIETARY) + || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_HONEYKRISP) + || m_adapter->matchesDriver(VK_DRIVER_ID_MOLTENVK) + || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_PANVK) + || m_adapter->matchesDriver(VK_DRIVER_ID_ARM_PROPRIETARY) + || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_V3DV) + || m_adapter->matchesDriver(VK_DRIVER_ID_BROADCOM_PROPRIETARY) + || m_adapter->matchesDriver(VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA) + || m_adapter->matchesDriver(VK_DRIVER_ID_IMAGINATION_PROPRIETARY); + + applyTristate(tilerMode, m_options.tilerMode); + hints.preferCachedMemory = tilerMode; return hints; } @@ -290,7 +306,7 @@ namespace dxvk { void DxvkDevice::recycleCommandList(const Rc& cmdList) { m_recycledCommandLists.returnObject(cmdList); } - + void DxvkDevice::recycleDescriptorPool(const Rc& pool) { m_recycledDescriptorPools.returnObject(pool); @@ -304,5 +320,5 @@ namespace dxvk { m_vkd->vkGetDeviceQueue(m_vkd->device(), family, index, &queue); return DxvkDeviceQueue { queue, family, index }; } - + } diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index e23f2c6f048..1e963161a87 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -27,7 +27,7 @@ #include "../vulkan/vulkan_presenter.h" namespace dxvk { - + class DxvkInstance; /** @@ -44,11 +44,13 @@ namespace dxvk { struct DxvkDevicePerfHints { VkBool32 preferFbDepthStencilCopy : 1; VkBool32 preferFbResolve : 1; + VkBool32 preferRenderPassOps : 1; + VkBool32 preferCachedMemory : 1; }; - + /** * \brief Device queue - * + * * Stores a Vulkan queue and the * queue family that it belongs to. */ @@ -65,10 +67,10 @@ namespace dxvk { DxvkDeviceQueue graphics; DxvkDeviceQueue transfer; }; - + /** * \brief DXVK device - * + * * Device object. This is responsible for resource creation, * memory allocation, command submission and state tracking. * Rendering commands are recorded into command lists using @@ -79,16 +81,16 @@ namespace dxvk { friend class DxvkSubmissionQueue; friend class DxvkDescriptorPoolTracker; public: - + DxvkDevice( const Rc& instance, const Rc& adapter, const Rc& vkd, const DxvkDeviceExtensions& extensions, const DxvkDeviceFeatures& features); - + ~DxvkDevice(); - + /** * \brief Vulkan device functions * \returns Vulkan device functions @@ -96,7 +98,7 @@ namespace dxvk { Rc vkd() const { return m_vkd; } - + /** * \brief Logical device handle * \returns The device handle @@ -112,10 +114,10 @@ namespace dxvk { const DxvkOptions& config() const { return m_options; } - + /** * \brief Queue handles - * + * * Handles and queue family indices * of all known device queues. * \returns Device queue infos @@ -132,10 +134,10 @@ namespace dxvk { return m_queues.transfer.queueHandle != m_queues.graphics.queueHandle; } - + /** * \brief The instance - * + * * The DXVK instance that created this device. * \returns Instance */ @@ -145,7 +147,7 @@ namespace dxvk { /** * \brief The adapter - * + * * The physical device that the * device has been created for. * \returns Adapter @@ -161,7 +163,7 @@ namespace dxvk { const DxvkDeviceExtensions& extensions() const { return m_extensions; } - + /** * \brief Enabled device features * \returns Enabled features @@ -180,7 +182,7 @@ namespace dxvk { /** * \brief Get device status - * + * * This may report device loss in * case a submission failed. * \returns Device status @@ -209,7 +211,7 @@ namespace dxvk { * \returns Supported shader pipeline stages */ VkPipelineStageFlags getShaderPipelineStages() const; - + /** * \brief Retrieves device options * \returns Device options @@ -223,26 +225,26 @@ namespace dxvk { DxvkDevicePerfHints perfHints() const { return m_perfHints; } - + /** * \brief Creates a command list * \returns The command list */ Rc createCommandList(); - + /** * \brief Creates a descriptor pool - * + * * Returns a previously recycled pool, or creates * a new one if necessary. The context should take * ownership of the returned pool. * \returns Descriptor pool */ Rc createDescriptorPool(); - + /** * \brief Creates a context - * + * * Creates a context object that can * be used to record command buffers. * \returns The context object @@ -257,7 +259,7 @@ namespace dxvk { /** * \brief Creates a query - * + * * \param [in] type Query type * \param [in] flags Query flags * \param [in] index Query index @@ -276,19 +278,19 @@ namespace dxvk { */ Rc createFence( const DxvkFenceCreateInfo& fenceInfo); - + /** * \brief Creates framebuffer for a set of render targets - * + * * \param [in] info Framebuffer info * \returns The framebuffer object */ Rc createFramebuffer( const DxvkFramebufferInfo& info); - + /** * \brief Creates a buffer object - * + * * \param [in] createInfo Buffer create info * \param [in] memoryType Memory type flags * \returns The buffer object @@ -296,10 +298,10 @@ namespace dxvk { Rc createBuffer( const DxvkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memoryType); - + /** * \brief Creates a buffer view - * + * * \param [in] buffer The buffer to view * \param [in] createInfo Buffer view properties * \returns The buffer view object @@ -307,10 +309,10 @@ namespace dxvk { Rc createBufferView( const Rc& buffer, const DxvkBufferViewCreateInfo& createInfo); - + /** * \brief Creates an image object - * + * * \param [in] createInfo Image create info * \param [in] memoryType Memory type flags * \returns The image object @@ -321,7 +323,7 @@ namespace dxvk { /** * \brief Creates an image object for an existing VkImage - * + * * \param [in] createInfo Image create info * \param [in] image Vulkan image to wrap * \returns The image object @@ -329,10 +331,10 @@ namespace dxvk { Rc createImageFromVkImage( const DxvkImageCreateInfo& createInfo, VkImage image); - + /** * \brief Creates an image view - * + * * \param [in] image The image to create a view for * \param [in] createInfo Image view create info * \returns The image view @@ -340,19 +342,19 @@ namespace dxvk { Rc createImageView( const Rc& image, const DxvkImageViewCreateInfo& createInfo); - + /** * \brief Creates a sampler object - * + * * \param [in] createInfo Sampler parameters * \returns Newly created sampler object */ Rc createSampler( const DxvkSamplerCreateInfo& createInfo); - + /** * \brief Retrieves stat counters - * + * * Can be used by the HUD to display some * internal information, such as memory * usage, draw calls, etc. @@ -372,26 +374,26 @@ namespace dxvk { * \returns Current frame ID */ uint32_t getCurrentFrameId() const; - + /** * \brief Initializes dummy resources - * + * * Should be called after creating the device in * case the device initialization was successful * and the device is usable. */ void initResources(); - + /** * \brief Registers a shader * \param [in] shader Newly compiled shader */ void registerShader( const Rc& shader); - + /** * \brief Presents a swap chain image - * + * * Invokes the presenter's \c presentImage method on * the submission thread. The status of this operation * can be retrieved with \ref waitForSubmission. @@ -401,10 +403,10 @@ namespace dxvk { void presentImage( const Rc& presenter, DxvkSubmitStatus* status); - + /** * \brief Submits a command list - * + * * Submits the given command list to the device using * the given set of optional synchronization primitives. * \param [in] commandList The command list to submit @@ -418,7 +420,7 @@ namespace dxvk { /** * \brief Locks submission queue - * + * * Since Vulkan queues are only meant to be accessed * from one thread at a time, external libraries need * to lock the queue before submitting command buffers. @@ -427,10 +429,10 @@ namespace dxvk { m_submissionQueue.synchronize(); m_submissionQueue.lockDeviceQueue(); } - + /** * \brief Unlocks submission queue - * + * * Releases the Vulkan queues again so that DXVK * itself can use them for submissions again. */ @@ -440,7 +442,7 @@ namespace dxvk { /** * \brief Number of pending submissions - * + * * A return value of 0 indicates * that the GPU is currently idle. * \returns Pending submission count @@ -462,7 +464,7 @@ namespace dxvk { /** * \brief Waits for a given submission - * + * * \param [in,out] status Submission status * \returns Result of the submission */ @@ -475,19 +477,19 @@ namespace dxvk { * \param [in] access Access mode to check */ void waitForResource(const Rc& resource, DxvkAccess access); - + /** * \brief Waits until the device becomes idle - * + * * Waits for the GPU to complete the execution of all * previously submitted command buffers. This may be * used to ensure that resources that were previously * used by the GPU can be safely destroyed. */ void waitForIdle(); - + private: - + DxvkOptions m_options; Rc m_instance; @@ -497,32 +499,32 @@ namespace dxvk { DxvkDeviceFeatures m_features; DxvkDeviceInfo m_properties; - + DxvkDevicePerfHints m_perfHints; DxvkObjects m_objects; sync::Spinlock m_statLock; DxvkStatCounters m_statCounters; - + DxvkDeviceQueueSet m_queues; - + DxvkRecycler m_recycledCommandLists; DxvkRecycler m_recycledDescriptorPools; - + DxvkSubmissionQueue m_submissionQueue; DxvkDevicePerfHints getPerfHints(); - + void recycleCommandList( const Rc& cmdList); - + void recycleDescriptorPool( const Rc& pool); - + DxvkDeviceQueue getQueue( uint32_t family, uint32_t index) const; - + }; - + } diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index d6f607846d0..f21516dd1ae 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -266,9 +266,24 @@ namespace dxvk { if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) hints = hints & DxvkMemoryFlag::Transient; + // On tiling GPUs, prefer cached memory for host-visible allocations + // to speed up readbacks. This is a preference only: if no suitable + // cached memory type exists, we fall back to uncached below. + VkMemoryPropertyFlags cachedFlags = 0; + + if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + && m_device->perfHints().preferCachedMemory) + cachedFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + // Try to allocate from a memory type which supports the given flags exactly auto dedAllocPtr = dedAllocReq.prefersDedicatedAllocation ? &dedAllocInfo : nullptr; - DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, hints); + DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags | cachedFlags, hints); + + // If we asked for cached memory but none was available, retry uncached + if (!result && cachedFlags) { + cachedFlags = 0; + result = this->tryAlloc(req, dedAllocPtr, flags, hints); + } // If the first attempt failed, try ignoring the dedicated allocation if (!result && dedAllocPtr && !dedAllocReq.requiresDedicatedAllocation) { diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index 49d90b52e8c..de042543122 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -11,6 +11,7 @@ namespace dxvk { hud = config.getOption("dxvk.hud", ""); hideIntegratedGraphics = config.getOption ("dxvk.hideIntegratedGraphics", false); deviceFilter = config.getOption("dxvk.deviceFilter", ""); + tilerMode = config.getOption("dxvk.tilerMode", Tristate::Auto); auto budget = config.getOption("dxvk.maxMemoryBudget", 0); maxMemoryBudget = VkDeviceSize(std::max(budget, 0)) << 20u; enableDyasync = config.getOption ("dxvk.enableDyasync", true); diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index f135beecbd8..e69eac70f72 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -2,6 +2,8 @@ #include "../util/config/config.h" +#include "../vulkan/vulkan_loader.h" + namespace dxvk { struct DxvkOptions { @@ -26,6 +28,11 @@ namespace dxvk { /// Device name std::string deviceFilter; + // Tiler GPU tweaks. Currently biases host-visible + // allocations toward cached memory on tilers; the + // render-pass-op side is detected but not yet acted on. + Tristate tilerMode; + /// Memory budget in bytes VkDeviceSize maxMemoryBudget; From 82113a6aeb429be67a31eee4709bf395ab3fb015 Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Thu, 11 Jun 2026 01:22:52 -0300 Subject: [PATCH 5/8] [dxvk, config] backport dxvk.zeroMappedMemory Zero initializes host-visible mapped memory to work around games that assume freshly mapped buffers are clean and break on stale data. Sarek static budget allocator has no per allocation flags or clear on free hook like upstream, so this clears on handout instead of on free. Memory is zeroed at every site that returns a mapped pointer: sub-allocated slices in DxvkMemoryChunk::alloc (covering reused slices from recycled chunks) and direct large/dedicated allocations in tryAllocFromType. Nonmapped memory is skipped via a null map-pointer guard. App visible guarantee matches upstream (no resource ever receives stale mapped data), but the mechanism differs. Off by default; adds one memset per host visible allocation when enabled. --- src/dxvk/dxvk_memory.cpp | 20 ++++++- src/dxvk/dxvk_memory.h | 118 ++++++++++++++++++++----------------- src/dxvk/dxvk_options.cpp | 1 + src/dxvk/dxvk_options.h | 4 ++ src/util/config/config.cpp | 15 ++++- 5 files changed, 98 insertions(+), 60 deletions(-) diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index f21516dd1ae..5f4625a4596 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -1,4 +1,5 @@ #include +#include #include "dxvk_device.h" #include "dxvk_memory.h" @@ -123,9 +124,18 @@ namespace dxvk { m_freeList.push_back({ allocEnd, sliceEnd - allocEnd }); // Create the memory object with the aligned slice + void* mapPtr = m_memory.memPointer + ? reinterpret_cast(m_memory.memPointer) + allocStart + : nullptr; + + // Some games assume freshly mapped buffers are zero-initialized and + // break on stale data. Clear the slice on hand-out if requested, which + // also covers reused slices from recycled chunks. + if (unlikely(mapPtr && m_alloc->zeroMappedMemory())) + std::memset(mapPtr, 0, allocEnd - allocStart); + return DxvkMemory(m_alloc, this, m_type, - m_memory.memHandle, allocStart, allocEnd - allocStart, - reinterpret_cast(m_memory.memPointer) + allocStart); + m_memory.memHandle, allocStart, allocEnd - allocStart, mapPtr); } @@ -378,8 +388,12 @@ namespace dxvk { DxvkDeviceMemory devMem = this->tryAllocDeviceMemory( type, flags, size, hints, dedAllocInfo); - if (devMem.memHandle != VK_NULL_HANDLE) + if (devMem.memHandle != VK_NULL_HANDLE) { + if (unlikely(devMem.memPointer && this->zeroMappedMemory())) + std::memset(devMem.memPointer, 0, size); + memory = DxvkMemory(this, nullptr, type, devMem.memHandle, 0, size, devMem.memPointer); + } } else { for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) memory = type->chunks[i]->alloc(flags, size, align, hints); diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index 795dd74d62a..b74e8d322b1 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -3,13 +3,13 @@ #include "dxvk_adapter.h" namespace dxvk { - + class DxvkMemoryAllocator; class DxvkMemoryChunk; - + /** * \brief Memory stats - * + * * Reports the amount of device memory * allocated and used by the application. */ @@ -46,7 +46,7 @@ namespace dxvk { /** * \brief Device memory object - * + * * Stores a Vulkan memory object. If the object * was allocated on host-visible memory, it will * be persistently mapped. @@ -59,10 +59,10 @@ namespace dxvk { float priority = 0.0f; }; - + /** * \brief Memory heap - * + * * Corresponds to a Vulkan memory heap and stores * its properties as well as allocation statistics. */ @@ -75,7 +75,7 @@ namespace dxvk { /** * \brief Memory type - * + * * Corresponds to a Vulkan memory type and stores * memory chunks used to sub-allocate memory on * this memory type. @@ -89,18 +89,18 @@ namespace dxvk { std::vector> chunks; }; - - + + /** * \brief Memory slice - * + * * Represents a slice of memory that has * been sub-allocated from a bigger chunk. */ class DxvkMemory { friend class DxvkMemoryAllocator; public: - + DxvkMemory(); DxvkMemory( DxvkMemoryAllocator* alloc, @@ -113,10 +113,10 @@ namespace dxvk { DxvkMemory (DxvkMemory&& other); DxvkMemory& operator = (DxvkMemory&& other); ~DxvkMemory(); - + /** * \brief Memory object - * + * * This information is required when * binding memory to Vulkan objects. * \returns Memory object @@ -124,10 +124,10 @@ namespace dxvk { VkDeviceMemory memory() const { return m_memory; } - + /** * \brief Offset into device memory - * + * * This information is required when * binding memory to Vulkan objects. * \returns Offset into device memory @@ -135,10 +135,10 @@ namespace dxvk { VkDeviceSize offset() const { return m_offset; } - + /** * \brief Pointer to mapped data - * + * * \param [in] offset Byte offset * \returns Pointer to mapped data */ @@ -148,7 +148,7 @@ namespace dxvk { /** * \brief Returns length of memory allocated - * + * * \returns Memory size */ VkDeviceSize length() const { @@ -157,16 +157,16 @@ namespace dxvk { /** * \brief Checks whether the memory slice is defined - * + * * \returns \c true if this slice points to actual device * memory, and \c false if it is undefined. */ operator bool () const { return m_memory != VK_NULL_HANDLE; } - + private: - + DxvkMemoryAllocator* m_alloc = nullptr; DxvkMemoryChunk* m_chunk = nullptr; DxvkMemoryType* m_type = nullptr; @@ -174,9 +174,9 @@ namespace dxvk { VkDeviceSize m_offset = 0; VkDeviceSize m_length = 0; void* m_mapPtr = nullptr; - + void free(); - + }; @@ -195,29 +195,29 @@ namespace dxvk { }; using DxvkMemoryFlags = Flags; - - + + /** * \brief Memory chunk - * + * * A single chunk of memory that provides a * sub-allocator. This is not thread-safe. */ class DxvkMemoryChunk : public RcObject { - + public: - + DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, DxvkDeviceMemory memory, DxvkMemoryFlags m_hints); - + ~DxvkMemoryChunk(); /** * \brief Allocates memory from the chunk - * + * * On failure, this returns a slice with * \c VK_NULL_HANDLE as the memory handle. * \param [in] flags Requested memory type flags @@ -231,10 +231,10 @@ namespace dxvk { VkDeviceSize size, VkDeviceSize align, DxvkMemoryFlags hints); - + /** * \brief Frees memory - * + * * Returns a slice back to the chunk. * Called automatically when a memory * slice runs out of scope. @@ -258,27 +258,27 @@ namespace dxvk { bool isCompatible(const Rc& other) const; private: - + struct FreeSlice { VkDeviceSize offset; VkDeviceSize length; }; - + DxvkMemoryAllocator* m_alloc; DxvkMemoryType* m_type; DxvkDeviceMemory m_memory; DxvkMemoryFlags m_hints; - + std::vector m_freeList; bool checkHints(DxvkMemoryFlags hints) const; - + }; - - + + /** * \brief Memory allocator - * + * * Allocates device memory for Vulkan resources. * Memory objects will be destroyed automatically. */ @@ -288,13 +288,13 @@ namespace dxvk { constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10; public: - + DxvkMemoryAllocator(const DxvkDevice* device); ~DxvkMemoryAllocator(); - + /** * \brief Buffer-image granularity - * + * * The granularity between linear and non-linear * resources in adjacent memory locations. See * section 11.6 of the Vulkan spec for details. @@ -303,10 +303,10 @@ namespace dxvk { VkDeviceSize bufferImageGranularity() const { return m_devProps.limits.bufferImageGranularity; } - + /** * \brief Allocates device memory - * + * * \param [in] req Memory requirements * \param [in] dedAllocReq Dedicated allocation requirements * \param [in] dedAllocInfo Dedicated allocation info @@ -320,10 +320,10 @@ namespace dxvk { const VkMemoryDedicatedAllocateInfo& dedAllocInfo, VkMemoryPropertyFlags flags, DxvkMemoryFlags hints); - + /** * \brief Queries memory stats - * + * * Returns the total amount of memory * allocated and used for a given heap. * \param [in] heap Heap index @@ -332,14 +332,22 @@ namespace dxvk { DxvkMemoryStats getMemoryStats(uint32_t heap) const { return m_memHeaps[heap].stats; } - + + /** + * \brief Whether mapped memory should be zero-initialized + * \returns \c true if zeroMappedMemory is enabled + */ + bool zeroMappedMemory() const { + return m_device->config().zeroMappedMemory; + } + private: const Rc m_vkd; const DxvkDevice* m_device; const VkPhysicalDeviceProperties m_devProps; const VkPhysicalDeviceMemoryProperties m_memProps; - + dxvk::mutex m_mutex; std::array m_memHeaps; std::array m_memTypes; @@ -349,7 +357,7 @@ namespace dxvk { const VkMemoryDedicatedAllocateInfo* dedAllocInfo, VkMemoryPropertyFlags flags, DxvkMemoryFlags hints); - + DxvkMemory tryAllocFromType( DxvkMemoryType* type, VkMemoryPropertyFlags flags, @@ -357,27 +365,27 @@ namespace dxvk { VkDeviceSize align, DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo); - + DxvkDeviceMemory tryAllocDeviceMemory( DxvkMemoryType* type, VkMemoryPropertyFlags flags, VkDeviceSize size, DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo); - + void free( const DxvkMemory& memory); - + void freeChunkMemory( DxvkMemoryType* type, DxvkMemoryChunk* chunk, VkDeviceSize offset, VkDeviceSize length); - + void freeDeviceMemory( DxvkMemoryType* type, DxvkDeviceMemory memory); - + VkDeviceSize pickChunkSize( uint32_t memTypeId, DxvkMemoryFlags hints) const; @@ -394,5 +402,5 @@ namespace dxvk { const DxvkMemoryHeap* heap); }; - + } diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index de042543122..f18a1ee96bc 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -12,6 +12,7 @@ namespace dxvk { hideIntegratedGraphics = config.getOption ("dxvk.hideIntegratedGraphics", false); deviceFilter = config.getOption("dxvk.deviceFilter", ""); tilerMode = config.getOption("dxvk.tilerMode", Tristate::Auto); + zeroMappedMemory = config.getOption("dxvk.zeroMappedMemory", false); auto budget = config.getOption("dxvk.maxMemoryBudget", 0); maxMemoryBudget = VkDeviceSize(std::max(budget, 0)) << 20u; enableDyasync = config.getOption ("dxvk.enableDyasync", true); diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index e69eac70f72..e34c74b140c 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -33,6 +33,10 @@ namespace dxvk { // render-pass-op side is detected but not yet acted on. Tristate tilerMode; + // Zero-initialize host-visible mapped memory on allocation. + // Works around games that assume freshly mapped buffers are clean. + bool zeroMappedMemory; + /// Memory budget in bytes VkDeviceSize maxMemoryBudget; diff --git a/src/util/config/config.cpp b/src/util/config/config.cpp index 99c1a68f5ba..de871da8053 100644 --- a/src/util/config/config.cpp +++ b/src/util/config/config.cpp @@ -278,9 +278,10 @@ namespace dxvk { { "d3d11.invariantPosition", "False" }, { "d3d11.floatControls", "False" }, }} }, - /* Nioh 2 */ + /* Nioh 2 - Fixes some fires looking glitchy */ { R"(\\nioh2\.exe$)", {{ { "dxgi.deferSurfaceCreation", "True" }, + { "dxvk.zeroMappedMemory", "True" }, }} }, /* Crazy Machines 3 - crashes on long device * * descriptions */ @@ -440,7 +441,8 @@ namespace dxvk { * around the game not properly initializing * * some of its constant buffers after discard */ { R"(\\Vindictus(_x64)?\.exe$)", {{ - { "d3d11.cachedDynamicResources", "cr" }, + { "d3d11.cachedDynamicResources", "cr" }, + { "dxvk.zeroMappedMemory", "True" }, }} }, /* Riders Republic - Statically linked AMDAGS */ { R"(\\RidersRepublic(_BE)?\.exe$)", {{ @@ -877,6 +879,15 @@ namespace dxvk { { "d3d9.memoryTrackTest", "True" }, { "d3d9.maxAvailableMemory", "2048" }, }} }, + /* Ridge Racer Unbounded - Vertex explosions */ + { R"(\\RRU(_demo)?\.exe$)", {{ + { "dxvk.zeroMappedMemory", "True" }, + }} }, + /* Warhammer 40,000: Dawn of War DE * + * Fixes occasional vertex explosions */ + { R"(\\W40k(_gog)?\.exe$)", {{ + { "dxvk.zeroMappedMemory", "True" }, + }} }, /* The Ship (2004) */ { R"(\\ship\.exe$)", {{ { "d3d9.memoryTrackTest", "True" }, From 03d772f6d73e0d920ee12a92d1e807ff2c41cd8e Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Thu, 11 Jun 2026 01:27:18 -0300 Subject: [PATCH 6/8] [dxvk] move DxvkMemoryAllocator::zeroMappedMemory out of line --- src/dxvk/dxvk_memory.cpp | 5 +++++ src/dxvk/dxvk_memory.h | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index 5f4625a4596..6e678c0ecbf 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -255,6 +255,11 @@ namespace dxvk { } + bool DxvkMemoryAllocator::zeroMappedMemory() const { + return m_device->config().zeroMappedMemory; + } + + DxvkMemory DxvkMemoryAllocator::alloc( const VkMemoryRequirements* req, const VkMemoryDedicatedRequirements& dedAllocReq, diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index b74e8d322b1..ac877672331 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -337,9 +337,7 @@ namespace dxvk { * \brief Whether mapped memory should be zero-initialized * \returns \c true if zeroMappedMemory is enabled */ - bool zeroMappedMemory() const { - return m_device->config().zeroMappedMemory; - } + bool zeroMappedMemory() const; private: From 15dc0e274eddd3f4a7a3231b9484bd084cf4a943 Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Mon, 15 Jun 2026 00:43:33 -0300 Subject: [PATCH 7/8] [dxvk, config] backport dxvk.lowerSinCos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces native SPIR‑V Sin/Cos with a Taylor series approximation on Intel GPUs to fix rendering bugs caused by hardware inaccuracy near x ≈ ±π/2 (dxvk #4861). --- src/dxbc/dxbc_compiler.cpp | 77 ++++++++++++++++++++++++----------- src/dxbc/dxbc_options.cpp | 15 ++++--- src/dxbc/dxbc_options.h | 9 ++++- src/dxso/dxso_compiler.cpp | 15 +++++-- src/dxso/dxso_options.cpp | 7 +++- src/dxso/dxso_options.h | 4 ++ src/dxvk/dxvk_options.cpp | 1 + src/dxvk/dxvk_options.h | 3 ++ src/spirv/spirv_module.cpp | 82 ++++++++++++++++++++++++++++++++++++++ src/spirv/spirv_module.h | 11 +++++ 10 files changed, 189 insertions(+), 35 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index e87ba3c8996..de77448cf11 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -2228,32 +2228,61 @@ namespace dxvk { const DxbcRegisterValue srcValue = emitRegisterLoad( ins.src[0], DxbcRegMask(true, true, true, true)); - // Either output may be DxbcOperandType::Null, in - // which case we don't have to generate any code. - if (ins.dst[0].type != DxbcOperandType::Null) { - const DxbcRegisterValue sinInput = - emitRegisterExtract(srcValue, ins.dst[0].mask); - - DxbcRegisterValue sin; - sin.type = sinInput.type; - sin.id = m_module.opSin( - getVectorTypeId(sin.type), - sinInput.id); - - emitRegisterStore(ins.dst[0], sin); - } - - if (ins.dst[1].type != DxbcOperandType::Null) { - const DxbcRegisterValue cosInput = - emitRegisterExtract(srcValue, ins.dst[1].mask); + // Compute sin and cos together; either output may be null. + const bool useBuiltIn = !m_moduleInfo.options.sincosEmulation; + + if (ins.dst[0].type != DxbcOperandType::Null || + ins.dst[1].type != DxbcOperandType::Null) { + + // Determine component count from whichever destination is non‑null + DxbcRegMask sharedMask = ins.dst[0].type != DxbcOperandType::Null + ? ins.dst[0].mask : ins.dst[1].mask; + const DxbcRegisterValue srcInput = + emitRegisterExtract(srcValue, sharedMask); + + uint32_t componentCount = srcInput.type.ccount; + std::array sinIds = {}; + std::array cosIds = {}; + + uint32_t floatType = m_module.defFloatType(32); + + for (uint32_t i = 0; i < componentCount; i++) { + uint32_t scalarX = componentCount > 1 + ? m_module.opVectorExtractDynamic( + floatType, srcInput.id, + m_module.constu32(i)) + : srcInput.id; + + uint32_t sincos = m_module.opSinCos(scalarX, useBuiltIn); + + // opSinCos returns vec2: index 0 = sin, index 1 = cos + sinIds[i] = m_module.opCompositeExtract( + floatType, sincos, 1u, &(uint32_t){0u}); + cosIds[i] = m_module.opCompositeExtract( + floatType, sincos, 1u, &(uint32_t){1u}); + } - DxbcRegisterValue cos; - cos.type = cosInput.type; - cos.id = m_module.opCos( - getVectorTypeId(cos.type), - cosInput.id); + if (ins.dst[0].type != DxbcOperandType::Null) { + DxbcRegisterValue sin; + sin.type = srcInput.type; + sin.id = componentCount > 1 + ? m_module.opCompositeConstruct( + getVectorTypeId(sin.type), + componentCount, sinIds.data()) + : sinIds[0]; + emitRegisterStore(ins.dst[0], sin); + } - emitRegisterStore(ins.dst[1], cos); + if (ins.dst[1].type != DxbcOperandType::Null) { + DxbcRegisterValue cos; + cos.type = srcInput.type; + cos.id = componentCount > 1 + ? m_module.opCompositeConstruct( + getVectorTypeId(cos.type), + componentCount, cosIds.data()) + : cosIds[0]; + emitRegisterStore(ins.dst[1], cos); + } } } diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp index ef4b4992acf..6193922d64c 100644 --- a/src/dxbc/dxbc_options.cpp +++ b/src/dxbc/dxbc_options.cpp @@ -3,7 +3,7 @@ #include "dxbc_options.h" namespace dxvk { - + DxbcOptions::DxbcOptions() { } @@ -19,6 +19,11 @@ namespace dxvk { // Disable unbound texture optimization on Mali GPUs due to black screen issues disableUnboundTextureOptimization = (devProps.vendorID == 0x13B5); // ARM Mali + // Use software sin/cos approximation on Intel iGPUs by default + sincosEmulation = adapter->matchesDriver(VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA) + || adapter->matchesDriver(VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS); + applyTristate(sincosEmulation, device->config().lowerSinCos); + useDepthClipWorkaround = !devFeatures.extDepthClipEnable.depthClipEnable; useStorageImageReadWithoutFormat @@ -38,13 +43,13 @@ namespace dxvk { && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT); useSdivForBufferIndex = adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0); - + switch (device->config().useRawSsbo) { case Tristate::Auto: minSsboAlignment = devInfo.core.properties.limits.minStorageBufferOffsetAlignment; break; case Tristate::True: minSsboAlignment = 4u; break; case Tristate::False: minSsboAlignment = ~0u; break; } - + invariantPosition = options.invariantPosition; enableRtOutputNanFixup = options.enableRtOutputNanFixup; zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory; @@ -55,7 +60,7 @@ namespace dxvk { // Disable subgroup early discard on Nvidia because it may hurt performance if (adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0)) useSubgroupOpsForEarlyDiscard = false; - + // Figure out float control flags to match D3D11 rules if (options.floatControls) { if (devInfo.khrShaderFloatControls.shaderSignedZeroInfNanPreserveFloat32) @@ -75,5 +80,5 @@ namespace dxvk { || adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_MESA_RADV_KHR, 0, VK_MAKE_VERSION(20, 3, 0))) enableRtOutputNanFixup = true; } - + } diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h index 36891de2d55..5d1147a2ef9 100644 --- a/src/dxbc/dxbc_options.h +++ b/src/dxbc/dxbc_options.h @@ -59,7 +59,12 @@ namespace dxvk { /// Disable unbound texture optimization on Mali GPUs /// to prevent black screen issues due to strict binding validation bool disableUnboundTextureOptimization = false; - + + /// Use a Taylor approximation for sin/cos instead of the + /// native GLSL.std.450 Sin/Cos instructions. Required for + /// correct results on Intel iGPUs (dxvk #4866). + bool sincosEmulation = false; + /// Clear thread-group shared memory to zero bool zeroInitWorkgroupMemory = false; @@ -78,5 +83,5 @@ namespace dxvk { /// Minimum storage buffer alignment VkDeviceSize minSsboAlignment = 0; }; - + } diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index 91cbb4a217f..640d7dd49cc 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -2196,14 +2196,23 @@ namespace dxvk { DxsoRegMask srcMask(true, false, false, false); uint32_t src0 = emitRegisterLoad(src[0], srcMask).id; - std::array sincosVectorIndices = { 0, 0, 0, 0 }; + uint32_t sincos = m_module.opSinCos(src0, + !m_moduleInfo.options.sincosEmulation); + std::array sincosVectorIndices = { 0, 0, 0, 0 }; uint32_t index = 0; + + uint32_t cosIdx = 1u, sinIdx = 0u; + + // Original order: mask[0] = cos, mask[1] = sin + // opSinCos returns vec2(sin, cos) if (mask[0]) - sincosVectorIndices[index++] = m_module.opCos(scalarTypeId, src0); + sincosVectorIndices[index++] = m_module.opCompositeExtract( + scalarTypeId, sincos, 1u, &cosIdx); if (mask[1]) - sincosVectorIndices[index++] = m_module.opSin(scalarTypeId, src0); + sincosVectorIndices[index++] = m_module.opCompositeExtract( + scalarTypeId, sincos, 1u, &sinIdx); for (; index < result.type.ccount; index++) { if (sincosVectorIndices[index] == 0) diff --git a/src/dxso/dxso_options.cpp b/src/dxso/dxso_options.cpp index 03ee70a0a2f..fa201ab221d 100644 --- a/src/dxso/dxso_options.cpp +++ b/src/dxso/dxso_options.cpp @@ -14,6 +14,11 @@ namespace dxvk { const DxvkDeviceFeatures& devFeatures = device->features(); const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt(); + // Use software sin/cos approximation on Intel iGPUs by default + sincosEmulation = adapter->matchesDriver(VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA) + || adapter->matchesDriver(VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS); + applyTristate(sincosEmulation, device->config().lowerSinCos); + useDemoteToHelperInvocation = (devFeatures.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation); @@ -25,7 +30,7 @@ namespace dxvk { // Disable early discard on Nvidia because it may hurt performance if (adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0)) useSubgroupOpsForEarlyDiscard = false; - + // Apply shader-related options strictConstantCopies = options.strictConstantCopies; diff --git a/src/dxso/dxso_options.h b/src/dxso/dxso_options.h index b6cb3a4ad62..6d0fb465a03 100644 --- a/src/dxso/dxso_options.h +++ b/src/dxso/dxso_options.h @@ -63,6 +63,10 @@ namespace dxvk { /// Whether vertex shaders may emit ClipDistance builtins. bool enableClipDistance = false; + + /// Use a Taylor approximation for sin/cos instead of the + /// native GLSL.std.450 Sin/Cos instructions. + bool sincosEmulation = false; }; } diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index f18a1ee96bc..3f674fe636e 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -13,6 +13,7 @@ namespace dxvk { deviceFilter = config.getOption("dxvk.deviceFilter", ""); tilerMode = config.getOption("dxvk.tilerMode", Tristate::Auto); zeroMappedMemory = config.getOption("dxvk.zeroMappedMemory", false); + lowerSinCos = config.getOption("dxvk.lowerSinCos", Tristate::Auto); auto budget = config.getOption("dxvk.maxMemoryBudget", 0); maxMemoryBudget = VkDeviceSize(std::max(budget, 0)) << 20u; enableDyasync = config.getOption ("dxvk.enableDyasync", true); diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index e34c74b140c..d225ce5e6fa 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -37,6 +37,9 @@ namespace dxvk { // Works around games that assume freshly mapped buffers are clean. bool zeroMappedMemory; + /// Whether to use custom sin/cos approximation + Tristate lowerSinCos = Tristate::Auto; + /// Memory budget in bytes VkDeviceSize maxMemoryBudget; diff --git a/src/spirv/spirv_module.cpp b/src/spirv/spirv_module.cpp index 7cccdd135dc..0c02acd95e2 100644 --- a/src/spirv/spirv_module.cpp +++ b/src/spirv/spirv_module.cpp @@ -2647,6 +2647,88 @@ namespace dxvk { return resultId; } + uint32_t SpirvModule::opSinCos( + uint32_t x, + bool useBuiltIn) { + uint32_t floatType = defFloatType(32); + uint32_t resultType = defVectorType(floatType, 2u); + + if (useBuiltIn) { + std::array members = { + opSin(floatType, x), + opCos(floatType, x), + }; + return opCompositeConstruct(resultType, members.size(), members.data()); + } + + uint32_t uintType = defIntType(32, false); + uint32_t boolType = defBoolType(); + + uint32_t xNorm = opFMul(floatType, opFAbs(floatType, x), + constf32(4.0f / pi)); + uint32_t xTrunc = opTrunc(floatType, xNorm); + uint32_t xFract = opFSub(floatType, xNorm, xTrunc); + uint32_t xInt = opConvertFtoU(uintType, xTrunc); + + uint32_t mirror = opINotEqual(boolType, + opBitwiseAnd(uintType, xInt, constu32(1u)), constu32(0u)); + xFract = opSelect(floatType, mirror, + opFSub(floatType, constf32(1.0f), xFract), xFract); + + uint32_t xFract_2 = opFMul(floatType, xFract, xFract); + uint32_t xFract_4 = opFMul(floatType, xFract_2, xFract_2); + uint32_t xFract_6 = opFMul(floatType, xFract_4, xFract_2); + + uint32_t taylor = opFMul(floatType, xFract_6, + constf32(static_cast(-sincosTaylorFactor(7)))); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFFma(floatType, xFract_4, + constf32(static_cast(sincosTaylorFactor(5))), taylor); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFFma(floatType, xFract_2, + constf32(static_cast(-sincosTaylorFactor(3))), taylor); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFAdd(floatType, + constf32(static_cast(sincosTaylorFactor(1))), taylor); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFMul(floatType, taylor, xFract); + decorate(taylor, spv::DecorationNoContraction); + + uint32_t coFunc = opSqrt(floatType, + opFSub(floatType, constf32(1.0f), + opFMul(floatType, taylor, taylor))); + + uint32_t funcIsSin = opIEqual(boolType, + opBitwiseAnd(uintType, + opIAdd(uintType, xInt, constu32(1u)), constu32(2u)), + constu32(0u)); + + uint32_t sin = opSelect(floatType, funcIsSin, taylor, coFunc); + uint32_t cos = opSelect(floatType, funcIsSin, coFunc, taylor); + + uint32_t sinNeg = opINotEqual(boolType, + opBitwiseAnd(uintType, xInt, constu32(4u)), constu32(0u)); + sinNeg = opLogicalNotEqual(boolType, sinNeg, + opFOrdLessThan(boolType, x, constf32(0.0f))); + + uint32_t cosNeg = opINotEqual(boolType, + opBitwiseAnd(uintType, + opIAdd(uintType, xInt, constu32(2u)), constu32(4u)), + constu32(0u)); + + sin = opSelect(floatType, sinNeg, + opFNegate(floatType, sin), sin); + cos = opSelect(floatType, cosNeg, + opFNegate(floatType, cos), cos); + + std::array members = { sin, cos }; + return opCompositeConstruct(resultType, members.size(), members.data()); + } + uint32_t SpirvModule::opSqrt( uint32_t resultType, diff --git a/src/spirv/spirv_module.h b/src/spirv/spirv_module.h index a1d027ea62c..f1381fbd1a5 100644 --- a/src/spirv/spirv_module.h +++ b/src/spirv/spirv_module.h @@ -927,6 +927,10 @@ namespace dxvk { uint32_t resultType, uint32_t vector); + uint32_t opSinCos( + uint32_t x, + bool useBuiltIn); + uint32_t opSqrt( uint32_t resultType, uint32_t operand); @@ -1269,6 +1273,13 @@ namespace dxvk { void instImportGlsl450(); + static constexpr double sincosTaylorFactor(uint32_t power) { + double r = 1.0; + for (uint32_t i = 1; i <= power; i++) + r *= pi * 0.25 / double(i); + return r; + } + uint32_t getImageOperandWordCount( const SpirvImageOperands& op) const; From 3b526f832806a784fa9c202f320b6fad95bc26b1 Mon Sep 17 00:00:00 2001 From: pythonlover02 Date: Mon, 15 Jun 2026 00:48:29 -0300 Subject: [PATCH 8/8] [dxbc] fix invalid compound literal in emitVectorSinCos --- src/dxbc/dxbc_compiler.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index de77448cf11..745c020f974 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -2256,10 +2256,11 @@ namespace dxvk { uint32_t sincos = m_module.opSinCos(scalarX, useBuiltIn); // opSinCos returns vec2: index 0 = sin, index 1 = cos + uint32_t sinIndex = 0u, cosIndex = 1u; sinIds[i] = m_module.opCompositeExtract( - floatType, sincos, 1u, &(uint32_t){0u}); + floatType, sincos, 1u, &sinIndex); cosIds[i] = m_module.opCompositeExtract( - floatType, sincos, 1u, &(uint32_t){1u}); + floatType, sincos, 1u, &cosIndex); } if (ins.dst[0].type != DxbcOperandType::Null) {