diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index e87ba3c8996..745c020f974 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -2228,32 +2228,62 @@ namespace dxvk { const DxbcRegisterValue srcValue = emitRegisterLoad( ins.src[0], DxbcRegMask(true, true, true, true)); - // Either output may be DxbcOperandType::Null, in - // which case we don't have to generate any code. - if (ins.dst[0].type != DxbcOperandType::Null) { - const DxbcRegisterValue sinInput = - emitRegisterExtract(srcValue, ins.dst[0].mask); - - DxbcRegisterValue sin; - sin.type = sinInput.type; - sin.id = m_module.opSin( - getVectorTypeId(sin.type), - sinInput.id); - - emitRegisterStore(ins.dst[0], sin); - } - - if (ins.dst[1].type != DxbcOperandType::Null) { - const DxbcRegisterValue cosInput = - emitRegisterExtract(srcValue, ins.dst[1].mask); + // Compute sin and cos together; either output may be null. + const bool useBuiltIn = !m_moduleInfo.options.sincosEmulation; + + if (ins.dst[0].type != DxbcOperandType::Null || + ins.dst[1].type != DxbcOperandType::Null) { + + // Determine component count from whichever destination is non‑null + DxbcRegMask sharedMask = ins.dst[0].type != DxbcOperandType::Null + ? ins.dst[0].mask : ins.dst[1].mask; + const DxbcRegisterValue srcInput = + emitRegisterExtract(srcValue, sharedMask); + + uint32_t componentCount = srcInput.type.ccount; + std::array sinIds = {}; + std::array cosIds = {}; + + uint32_t floatType = m_module.defFloatType(32); + + for (uint32_t i = 0; i < componentCount; i++) { + uint32_t scalarX = componentCount > 1 + ? m_module.opVectorExtractDynamic( + floatType, srcInput.id, + m_module.constu32(i)) + : srcInput.id; + + uint32_t sincos = m_module.opSinCos(scalarX, useBuiltIn); + + // opSinCos returns vec2: index 0 = sin, index 1 = cos + uint32_t sinIndex = 0u, cosIndex = 1u; + sinIds[i] = m_module.opCompositeExtract( + floatType, sincos, 1u, &sinIndex); + cosIds[i] = m_module.opCompositeExtract( + floatType, sincos, 1u, &cosIndex); + } - DxbcRegisterValue cos; - cos.type = cosInput.type; - cos.id = m_module.opCos( - getVectorTypeId(cos.type), - cosInput.id); + if (ins.dst[0].type != DxbcOperandType::Null) { + DxbcRegisterValue sin; + sin.type = srcInput.type; + sin.id = componentCount > 1 + ? m_module.opCompositeConstruct( + getVectorTypeId(sin.type), + componentCount, sinIds.data()) + : sinIds[0]; + emitRegisterStore(ins.dst[0], sin); + } - emitRegisterStore(ins.dst[1], cos); + if (ins.dst[1].type != DxbcOperandType::Null) { + DxbcRegisterValue cos; + cos.type = srcInput.type; + cos.id = componentCount > 1 + ? m_module.opCompositeConstruct( + getVectorTypeId(cos.type), + componentCount, cosIds.data()) + : cosIds[0]; + emitRegisterStore(ins.dst[1], cos); + } } } diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp index ef4b4992acf..6193922d64c 100644 --- a/src/dxbc/dxbc_options.cpp +++ b/src/dxbc/dxbc_options.cpp @@ -3,7 +3,7 @@ #include "dxbc_options.h" namespace dxvk { - + DxbcOptions::DxbcOptions() { } @@ -19,6 +19,11 @@ namespace dxvk { // Disable unbound texture optimization on Mali GPUs due to black screen issues disableUnboundTextureOptimization = (devProps.vendorID == 0x13B5); // ARM Mali + // Use software sin/cos approximation on Intel iGPUs by default + sincosEmulation = adapter->matchesDriver(VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA) + || adapter->matchesDriver(VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS); + applyTristate(sincosEmulation, device->config().lowerSinCos); + useDepthClipWorkaround = !devFeatures.extDepthClipEnable.depthClipEnable; useStorageImageReadWithoutFormat @@ -38,13 +43,13 @@ namespace dxvk { && (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT); useSdivForBufferIndex = adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0); - + switch (device->config().useRawSsbo) { case Tristate::Auto: minSsboAlignment = devInfo.core.properties.limits.minStorageBufferOffsetAlignment; break; case Tristate::True: minSsboAlignment = 4u; break; case Tristate::False: minSsboAlignment = ~0u; break; } - + invariantPosition = options.invariantPosition; enableRtOutputNanFixup = options.enableRtOutputNanFixup; zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory; @@ -55,7 +60,7 @@ namespace dxvk { // Disable subgroup early discard on Nvidia because it may hurt performance if (adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0)) useSubgroupOpsForEarlyDiscard = false; - + // Figure out float control flags to match D3D11 rules if (options.floatControls) { if (devInfo.khrShaderFloatControls.shaderSignedZeroInfNanPreserveFloat32) @@ -75,5 +80,5 @@ namespace dxvk { || adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_MESA_RADV_KHR, 0, VK_MAKE_VERSION(20, 3, 0))) enableRtOutputNanFixup = true; } - + } diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h index 36891de2d55..5d1147a2ef9 100644 --- a/src/dxbc/dxbc_options.h +++ b/src/dxbc/dxbc_options.h @@ -59,7 +59,12 @@ namespace dxvk { /// Disable unbound texture optimization on Mali GPUs /// to prevent black screen issues due to strict binding validation bool disableUnboundTextureOptimization = false; - + + /// Use a Taylor approximation for sin/cos instead of the + /// native GLSL.std.450 Sin/Cos instructions. Required for + /// correct results on Intel iGPUs (dxvk #4866). + bool sincosEmulation = false; + /// Clear thread-group shared memory to zero bool zeroInitWorkgroupMemory = false; @@ -78,5 +83,5 @@ namespace dxvk { /// Minimum storage buffer alignment VkDeviceSize minSsboAlignment = 0; }; - + } diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index 91cbb4a217f..640d7dd49cc 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -2196,14 +2196,23 @@ namespace dxvk { DxsoRegMask srcMask(true, false, false, false); uint32_t src0 = emitRegisterLoad(src[0], srcMask).id; - std::array sincosVectorIndices = { 0, 0, 0, 0 }; + uint32_t sincos = m_module.opSinCos(src0, + !m_moduleInfo.options.sincosEmulation); + std::array sincosVectorIndices = { 0, 0, 0, 0 }; uint32_t index = 0; + + uint32_t cosIdx = 1u, sinIdx = 0u; + + // Original order: mask[0] = cos, mask[1] = sin + // opSinCos returns vec2(sin, cos) if (mask[0]) - sincosVectorIndices[index++] = m_module.opCos(scalarTypeId, src0); + sincosVectorIndices[index++] = m_module.opCompositeExtract( + scalarTypeId, sincos, 1u, &cosIdx); if (mask[1]) - sincosVectorIndices[index++] = m_module.opSin(scalarTypeId, src0); + sincosVectorIndices[index++] = m_module.opCompositeExtract( + scalarTypeId, sincos, 1u, &sinIdx); for (; index < result.type.ccount; index++) { if (sincosVectorIndices[index] == 0) diff --git a/src/dxso/dxso_options.cpp b/src/dxso/dxso_options.cpp index 03ee70a0a2f..fa201ab221d 100644 --- a/src/dxso/dxso_options.cpp +++ b/src/dxso/dxso_options.cpp @@ -14,6 +14,11 @@ namespace dxvk { const DxvkDeviceFeatures& devFeatures = device->features(); const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt(); + // Use software sin/cos approximation on Intel iGPUs by default + sincosEmulation = adapter->matchesDriver(VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA) + || adapter->matchesDriver(VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS); + applyTristate(sincosEmulation, device->config().lowerSinCos); + useDemoteToHelperInvocation = (devFeatures.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation); @@ -25,7 +30,7 @@ namespace dxvk { // Disable early discard on Nvidia because it may hurt performance if (adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0)) useSubgroupOpsForEarlyDiscard = false; - + // Apply shader-related options strictConstantCopies = options.strictConstantCopies; diff --git a/src/dxso/dxso_options.h b/src/dxso/dxso_options.h index b6cb3a4ad62..6d0fb465a03 100644 --- a/src/dxso/dxso_options.h +++ b/src/dxso/dxso_options.h @@ -63,6 +63,10 @@ namespace dxvk { /// Whether vertex shaders may emit ClipDistance builtins. bool enableClipDistance = false; + + /// Use a Taylor approximation for sin/cos instead of the + /// native GLSL.std.450 Sin/Cos instructions. + bool sincosEmulation = false; }; } diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp index b29d096e63f..265a333e7f1 100644 --- a/src/dxvk/dxvk_adapter.cpp +++ b/src/dxvk/dxvk_adapter.cpp @@ -530,6 +530,12 @@ namespace dxvk { } + bool DxvkAdapter::matchesDriver( + VkDriverIdKHR driver) const { + return driver == m_deviceInfo.khrDeviceDriverProperties.driverID; + } + + void DxvkAdapter::logAdapterInfo() const { VkPhysicalDeviceProperties deviceInfo = this->deviceProperties(); VkPhysicalDeviceMemoryProperties memoryInfo = this->memoryProperties(); diff --git a/src/dxvk/dxvk_adapter.h b/src/dxvk/dxvk_adapter.h index f2fb1f0af75..b954ca274c8 100644 --- a/src/dxvk/dxvk_adapter.h +++ b/src/dxvk/dxvk_adapter.h @@ -5,10 +5,10 @@ #include "dxvk_include.h" namespace dxvk { - + class DxvkDevice; class DxvkInstance; - + /** * \brief GPU vendors * Based on PCIe IDs. @@ -21,7 +21,7 @@ namespace dxvk { /** * \brief Adapter memory heap info - * + * * Stores info about a heap, and the amount * of memory allocated from it by the app. */ @@ -33,7 +33,7 @@ namespace dxvk { /** * \brief Adapter memory info - * + * * Stores properties and allocation * info of each available heap. */ @@ -49,23 +49,23 @@ namespace dxvk { uint32_t graphics; uint32_t transfer; }; - + /** * \brief DXVK adapter - * + * * Corresponds to a physical device in Vulkan. Provides * all kinds of information about the device itself and * the supported feature set. */ class DxvkAdapter : public RcObject { - + public: - + DxvkAdapter( const Rc& vki, VkPhysicalDevice handle); ~DxvkAdapter(); - + /** * \brief Vulkan instance functions * \returns Vulkan instance functions @@ -73,7 +73,7 @@ namespace dxvk { Rc vki() const { return m_vki; } - + /** * \brief Physical device handle * \returns The adapter handle @@ -81,10 +81,10 @@ namespace dxvk { VkPhysicalDevice handle() const { return m_handle; } - + /** * \brief Physical device properties - * + * * Returns a read-only reference to the core * properties of the Vulkan physical device. * \returns Physical device core properties @@ -95,7 +95,7 @@ namespace dxvk { /** * \brief Device info - * + * * Returns a read-only reference to the full * device info structure, including extended * properties. @@ -104,20 +104,20 @@ namespace dxvk { const DxvkDeviceInfo& devicePropertiesExt() const { return m_deviceInfo; } - + /** * \brief Supportred device features - * + * * Queries the supported device features. * \returns Device features */ const DxvkDeviceFeatures& features() const { return m_deviceFeatures; } - + /** * \brief Retrieves memory heap info - * + * * Returns properties of all available memory heaps, * both device-local and non-local heaps, and the * amount of memory allocated from those heaps by @@ -125,10 +125,10 @@ namespace dxvk { * \returns Memory heap info */ DxvkAdapterMemoryInfo getMemoryHeapInfo() const; - + /** * \brief Memory properties - * + * * Queries the memory types and memory heaps of * the device. This is useful for memory allocators. * \returns Device memory properties @@ -137,16 +137,16 @@ namespace dxvk { /** * \brief Queries format support - * + * * \param [in] format The format to query * \returns Format support info */ VkFormatProperties formatProperties( VkFormat format) const; - + /** * \brief Queries image format support - * + * * \param [in] format Format to query * \param [in] type Image type * \param [in] tiling Image tiling @@ -162,22 +162,22 @@ namespace dxvk { VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties& properties) const; - + /** * \brief Retrieves queue family indices * \returns Indices for all queue families */ DxvkAdapterQueueIndices findQueueFamilies() const; - + /** * \brief Tests whether all required features are supported - * + * * \param [in] features Required device features * \returns \c true if all features are supported */ bool checkFeatureSupport( const DxvkDeviceFeatures& required) const; - + /** * \brief Enables extensions for this adapter * @@ -188,10 +188,10 @@ namespace dxvk { */ void enableExtensions( const DxvkNameSet& extensions); - + /** * \brief Creates a DXVK device - * + * * Creates a logical device for this adapter. * \param [in] instance Parent instance * \param [in] enabledFeatures Device features @@ -200,10 +200,10 @@ namespace dxvk { Rc createDevice( const Rc& instance, DxvkDeviceFeatures enabledFeatures); - + /** * \brief Registers memory allocation - * + * * Updates memory alloc info accordingly. * \param [in] heap Memory heap index * \param [in] bytes Allocation size @@ -211,10 +211,10 @@ namespace dxvk { void notifyHeapMemoryAlloc( uint32_t heap, VkDeviceSize bytes); - + /** * \brief Registers memory deallocation - * + * * Updates memory alloc info accordingly. * \param [in] heap Memory heap index * \param [in] bytes Allocation size @@ -222,7 +222,7 @@ namespace dxvk { void notifyHeapMemoryFree( uint32_t heap, VkDeviceSize bytes); - + /** * \brief Tests if the driver matches certain criteria * @@ -238,15 +238,24 @@ namespace dxvk { VkDriverIdKHR driver, uint32_t minVer, uint32_t maxVer) const; - + + /** + * \brief Tests whether the driver matches a given driver ID + * + * \param [in] driver Driver ID to match against + * \returns \c true if the driver ID matches + */ + bool matchesDriver( + VkDriverIdKHR driver) const; + /** * \brief Logs DXVK adapter info - * + * * May be useful for bug reports * and general troubleshooting. */ void logAdapterInfo() const; - + /** * \brief Checks whether this is a UMA system * @@ -255,9 +264,9 @@ namespace dxvk { * \returns \c true if the system has unified memory. */ bool isUnifiedMemoryArchitecture() const; - + private: - + Rc m_vki; VkPhysicalDevice m_handle; @@ -267,7 +276,7 @@ namespace dxvk { DxvkDeviceFeatures m_deviceFeatures; bool m_hasMemoryBudget; - + std::vector m_queueFamilies; std::array, VK_MAX_MEMORY_HEAPS> m_heapAlloc; @@ -281,11 +290,11 @@ namespace dxvk { uint32_t findQueueFamily( VkQueueFlags mask, VkQueueFlags flags) const; - + static void logNameList(const DxvkNameList& names); static void logFeatures(const DxvkDeviceFeatures& features); static void logQueueFamilies(const DxvkAdapterQueueIndices& queues); - + }; - + } diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index efe8354e27d..b57e6e0886a 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -2,7 +2,7 @@ #include "dxvk_instance.h" namespace dxvk { - + DxvkDevice::DxvkDevice( const Rc& instance, const Rc& adapter, @@ -23,8 +23,8 @@ namespace dxvk { m_queues.graphics = getQueue(queueFamilies.graphics, 0); m_queues.transfer = getQueue(queueFamilies.transfer, 0); } - - + + DxvkDevice::~DxvkDevice() { // If we are being destroyed during/after DLL process detachment // from TerminateProcess, etc, our CS threads are already destroyed @@ -60,10 +60,10 @@ namespace dxvk { VkPipelineStageFlags result = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - + if (m_features.core.features.geometryShader) result |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; - + if (m_features.core.features.tessellationShader) { result |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; @@ -79,14 +79,14 @@ namespace dxvk { options.maxNumDynamicStorageBuffers = m_properties.core.properties.limits.maxDescriptorSetStorageBuffersDynamic; return options; } - - + + Rc DxvkDevice::createCommandList() { Rc cmdList = m_recycledCommandLists.retrieveObject(); - + if (cmdList == nullptr) cmdList = new DxvkCommandList(this); - + return cmdList; } @@ -96,11 +96,11 @@ namespace dxvk { if (pool == nullptr) pool = new DxvkDescriptorPool(m_vkd); - + return pool; } - - + + Rc DxvkDevice::createContext() { return new DxvkContext(this); } @@ -117,63 +117,63 @@ namespace dxvk { uint32_t index) { return new DxvkGpuQuery(m_vkd, type, flags, index); } - - + + Rc DxvkDevice::createFence( const DxvkFenceCreateInfo& fenceInfo) { return new DxvkFence(this, fenceInfo); } - - + + Rc DxvkDevice::createFramebuffer( const DxvkFramebufferInfo& info) { return new DxvkFramebuffer(m_vkd, info); } - - + + Rc DxvkDevice::createBuffer( const DxvkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memoryType) { return new DxvkBuffer(this, createInfo, m_objects.memoryManager(), memoryType); } - - + + Rc DxvkDevice::createBufferView( const Rc& buffer, const DxvkBufferViewCreateInfo& createInfo) { return new DxvkBufferView(m_vkd, buffer, createInfo); } - - + + Rc DxvkDevice::createImage( const DxvkImageCreateInfo& createInfo, VkMemoryPropertyFlags memoryType) { return new DxvkImage(this, createInfo, m_objects.memoryManager(), memoryType); } - - + + Rc DxvkDevice::createImageFromVkImage( const DxvkImageCreateInfo& createInfo, VkImage image) { return new DxvkImage(this, createInfo, image); } - + Rc DxvkDevice::createImageView( const Rc& image, const DxvkImageViewCreateInfo& createInfo) { return new DxvkImageView(m_vkd, image, createInfo); } - - + + Rc DxvkDevice::createSampler( const DxvkSamplerCreateInfo& createInfo) { return new DxvkSampler(this, createInfo); } - - + + DxvkStatCounters DxvkDevice::getStatCounters() { DxvkPipelineCount pipe = m_objects.pipelineManager().getPipelineCount(); - + DxvkStatCounters result; result.setCtr(DxvkStatCounter::PipeCountGraphics, pipe.numGraphicsPipelines); result.setCtr(DxvkStatCounter::PipeCountCompute, pipe.numComputePipelines); @@ -184,8 +184,8 @@ namespace dxvk { result.merge(m_statCounters); return result; } - - + + DxvkMemoryStats DxvkDevice::getMemoryStats(uint32_t heap) { return m_objects.memoryManager().getMemoryStats(heap); } @@ -194,8 +194,8 @@ namespace dxvk { uint32_t DxvkDevice::getCurrentFrameId() const { return m_statCounters.getCtr(DxvkStatCounter::QueuePresentCount); } - - + + void DxvkDevice::initResources() { m_objects.dummyResources().clearResources(this); } @@ -204,8 +204,8 @@ namespace dxvk { void DxvkDevice::registerShader(const Rc& shader) { m_objects.pipelineManager().registerShader(shader); } - - + + void DxvkDevice::presentImage( const Rc& presenter, DxvkSubmitStatus* status) { @@ -214,7 +214,7 @@ namespace dxvk { DxvkPresentInfo presentInfo; presentInfo.presenter = presenter; m_submissionQueue.present(presentInfo, status); - + std::lock_guard statLock(m_statLock); m_statCounters.addCtr(DxvkStatCounter::QueuePresentCount, 1); } @@ -234,8 +234,8 @@ namespace dxvk { m_statCounters.merge(commandList->statCounters()); m_statCounters.addCtr(DxvkStatCounter::QueueSubmitCount, 1); } - - + + VkResult DxvkDevice::waitForSubmission(DxvkSubmitStatus* status) { VkResult result = status->result.load(); @@ -264,16 +264,16 @@ namespace dxvk { m_statCounters.addCtr(DxvkStatCounter::GpuSyncTicks, us.count()); } } - - + + void DxvkDevice::waitForIdle() { this->lockSubmission(); if (m_vkd->vkDeviceWaitIdle(m_vkd->device()) != VK_SUCCESS) Logger::err("DxvkDevice: waitForIdle: Operation failed"); this->unlockSubmission(); } - - + + DxvkDevicePerfHints DxvkDevice::getPerfHints() { DxvkDevicePerfHints hints; hints.preferFbDepthStencilCopy = m_extensions.extShaderStencilExport @@ -283,6 +283,22 @@ namespace dxvk { hints.preferFbResolve = m_extensions.amdShaderFragmentMask && (m_adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, 0, 0) || m_adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_AMD_PROPRIETARY_KHR, 0, 0)); + + // Detect tiler GPUs. Currently only used to prefer cached memory + // for host-visible allocations; render-pass handling is a todo. + bool tilerMode = m_adapter->matchesDriver(VK_DRIVER_ID_MESA_TURNIP) + || m_adapter->matchesDriver(VK_DRIVER_ID_QUALCOMM_PROPRIETARY) + || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_HONEYKRISP) + || m_adapter->matchesDriver(VK_DRIVER_ID_MOLTENVK) + || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_PANVK) + || m_adapter->matchesDriver(VK_DRIVER_ID_ARM_PROPRIETARY) + || m_adapter->matchesDriver(VK_DRIVER_ID_MESA_V3DV) + || m_adapter->matchesDriver(VK_DRIVER_ID_BROADCOM_PROPRIETARY) + || m_adapter->matchesDriver(VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA) + || m_adapter->matchesDriver(VK_DRIVER_ID_IMAGINATION_PROPRIETARY); + + applyTristate(tilerMode, m_options.tilerMode); + hints.preferCachedMemory = tilerMode; return hints; } @@ -290,7 +306,7 @@ namespace dxvk { void DxvkDevice::recycleCommandList(const Rc& cmdList) { m_recycledCommandLists.returnObject(cmdList); } - + void DxvkDevice::recycleDescriptorPool(const Rc& pool) { m_recycledDescriptorPools.returnObject(pool); @@ -304,5 +320,5 @@ namespace dxvk { m_vkd->vkGetDeviceQueue(m_vkd->device(), family, index, &queue); return DxvkDeviceQueue { queue, family, index }; } - + } diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index e23f2c6f048..1e963161a87 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -27,7 +27,7 @@ #include "../vulkan/vulkan_presenter.h" namespace dxvk { - + class DxvkInstance; /** @@ -44,11 +44,13 @@ namespace dxvk { struct DxvkDevicePerfHints { VkBool32 preferFbDepthStencilCopy : 1; VkBool32 preferFbResolve : 1; + VkBool32 preferRenderPassOps : 1; + VkBool32 preferCachedMemory : 1; }; - + /** * \brief Device queue - * + * * Stores a Vulkan queue and the * queue family that it belongs to. */ @@ -65,10 +67,10 @@ namespace dxvk { DxvkDeviceQueue graphics; DxvkDeviceQueue transfer; }; - + /** * \brief DXVK device - * + * * Device object. This is responsible for resource creation, * memory allocation, command submission and state tracking. * Rendering commands are recorded into command lists using @@ -79,16 +81,16 @@ namespace dxvk { friend class DxvkSubmissionQueue; friend class DxvkDescriptorPoolTracker; public: - + DxvkDevice( const Rc& instance, const Rc& adapter, const Rc& vkd, const DxvkDeviceExtensions& extensions, const DxvkDeviceFeatures& features); - + ~DxvkDevice(); - + /** * \brief Vulkan device functions * \returns Vulkan device functions @@ -96,7 +98,7 @@ namespace dxvk { Rc vkd() const { return m_vkd; } - + /** * \brief Logical device handle * \returns The device handle @@ -112,10 +114,10 @@ namespace dxvk { const DxvkOptions& config() const { return m_options; } - + /** * \brief Queue handles - * + * * Handles and queue family indices * of all known device queues. * \returns Device queue infos @@ -132,10 +134,10 @@ namespace dxvk { return m_queues.transfer.queueHandle != m_queues.graphics.queueHandle; } - + /** * \brief The instance - * + * * The DXVK instance that created this device. * \returns Instance */ @@ -145,7 +147,7 @@ namespace dxvk { /** * \brief The adapter - * + * * The physical device that the * device has been created for. * \returns Adapter @@ -161,7 +163,7 @@ namespace dxvk { const DxvkDeviceExtensions& extensions() const { return m_extensions; } - + /** * \brief Enabled device features * \returns Enabled features @@ -180,7 +182,7 @@ namespace dxvk { /** * \brief Get device status - * + * * This may report device loss in * case a submission failed. * \returns Device status @@ -209,7 +211,7 @@ namespace dxvk { * \returns Supported shader pipeline stages */ VkPipelineStageFlags getShaderPipelineStages() const; - + /** * \brief Retrieves device options * \returns Device options @@ -223,26 +225,26 @@ namespace dxvk { DxvkDevicePerfHints perfHints() const { return m_perfHints; } - + /** * \brief Creates a command list * \returns The command list */ Rc createCommandList(); - + /** * \brief Creates a descriptor pool - * + * * Returns a previously recycled pool, or creates * a new one if necessary. The context should take * ownership of the returned pool. * \returns Descriptor pool */ Rc createDescriptorPool(); - + /** * \brief Creates a context - * + * * Creates a context object that can * be used to record command buffers. * \returns The context object @@ -257,7 +259,7 @@ namespace dxvk { /** * \brief Creates a query - * + * * \param [in] type Query type * \param [in] flags Query flags * \param [in] index Query index @@ -276,19 +278,19 @@ namespace dxvk { */ Rc createFence( const DxvkFenceCreateInfo& fenceInfo); - + /** * \brief Creates framebuffer for a set of render targets - * + * * \param [in] info Framebuffer info * \returns The framebuffer object */ Rc createFramebuffer( const DxvkFramebufferInfo& info); - + /** * \brief Creates a buffer object - * + * * \param [in] createInfo Buffer create info * \param [in] memoryType Memory type flags * \returns The buffer object @@ -296,10 +298,10 @@ namespace dxvk { Rc createBuffer( const DxvkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memoryType); - + /** * \brief Creates a buffer view - * + * * \param [in] buffer The buffer to view * \param [in] createInfo Buffer view properties * \returns The buffer view object @@ -307,10 +309,10 @@ namespace dxvk { Rc createBufferView( const Rc& buffer, const DxvkBufferViewCreateInfo& createInfo); - + /** * \brief Creates an image object - * + * * \param [in] createInfo Image create info * \param [in] memoryType Memory type flags * \returns The image object @@ -321,7 +323,7 @@ namespace dxvk { /** * \brief Creates an image object for an existing VkImage - * + * * \param [in] createInfo Image create info * \param [in] image Vulkan image to wrap * \returns The image object @@ -329,10 +331,10 @@ namespace dxvk { Rc createImageFromVkImage( const DxvkImageCreateInfo& createInfo, VkImage image); - + /** * \brief Creates an image view - * + * * \param [in] image The image to create a view for * \param [in] createInfo Image view create info * \returns The image view @@ -340,19 +342,19 @@ namespace dxvk { Rc createImageView( const Rc& image, const DxvkImageViewCreateInfo& createInfo); - + /** * \brief Creates a sampler object - * + * * \param [in] createInfo Sampler parameters * \returns Newly created sampler object */ Rc createSampler( const DxvkSamplerCreateInfo& createInfo); - + /** * \brief Retrieves stat counters - * + * * Can be used by the HUD to display some * internal information, such as memory * usage, draw calls, etc. @@ -372,26 +374,26 @@ namespace dxvk { * \returns Current frame ID */ uint32_t getCurrentFrameId() const; - + /** * \brief Initializes dummy resources - * + * * Should be called after creating the device in * case the device initialization was successful * and the device is usable. */ void initResources(); - + /** * \brief Registers a shader * \param [in] shader Newly compiled shader */ void registerShader( const Rc& shader); - + /** * \brief Presents a swap chain image - * + * * Invokes the presenter's \c presentImage method on * the submission thread. The status of this operation * can be retrieved with \ref waitForSubmission. @@ -401,10 +403,10 @@ namespace dxvk { void presentImage( const Rc& presenter, DxvkSubmitStatus* status); - + /** * \brief Submits a command list - * + * * Submits the given command list to the device using * the given set of optional synchronization primitives. * \param [in] commandList The command list to submit @@ -418,7 +420,7 @@ namespace dxvk { /** * \brief Locks submission queue - * + * * Since Vulkan queues are only meant to be accessed * from one thread at a time, external libraries need * to lock the queue before submitting command buffers. @@ -427,10 +429,10 @@ namespace dxvk { m_submissionQueue.synchronize(); m_submissionQueue.lockDeviceQueue(); } - + /** * \brief Unlocks submission queue - * + * * Releases the Vulkan queues again so that DXVK * itself can use them for submissions again. */ @@ -440,7 +442,7 @@ namespace dxvk { /** * \brief Number of pending submissions - * + * * A return value of 0 indicates * that the GPU is currently idle. * \returns Pending submission count @@ -462,7 +464,7 @@ namespace dxvk { /** * \brief Waits for a given submission - * + * * \param [in,out] status Submission status * \returns Result of the submission */ @@ -475,19 +477,19 @@ namespace dxvk { * \param [in] access Access mode to check */ void waitForResource(const Rc& resource, DxvkAccess access); - + /** * \brief Waits until the device becomes idle - * + * * Waits for the GPU to complete the execution of all * previously submitted command buffers. This may be * used to ensure that resources that were previously * used by the GPU can be safely destroyed. */ void waitForIdle(); - + private: - + DxvkOptions m_options; Rc m_instance; @@ -497,32 +499,32 @@ namespace dxvk { DxvkDeviceFeatures m_features; DxvkDeviceInfo m_properties; - + DxvkDevicePerfHints m_perfHints; DxvkObjects m_objects; sync::Spinlock m_statLock; DxvkStatCounters m_statCounters; - + DxvkDeviceQueueSet m_queues; - + DxvkRecycler m_recycledCommandLists; DxvkRecycler m_recycledDescriptorPools; - + DxvkSubmissionQueue m_submissionQueue; DxvkDevicePerfHints getPerfHints(); - + void recycleCommandList( const Rc& cmdList); - + void recycleDescriptorPool( const Rc& pool); - + DxvkDeviceQueue getQueue( uint32_t family, uint32_t index) const; - + }; - + } diff --git a/src/dxvk/dxvk_device_filter.cpp b/src/dxvk/dxvk_device_filter.cpp index dfb5edaf4e8..8d140c2be4e 100644 --- a/src/dxvk/dxvk_device_filter.cpp +++ b/src/dxvk/dxvk_device_filter.cpp @@ -1,21 +1,26 @@ #include "dxvk_device_filter.h" namespace dxvk { - - DxvkDeviceFilter::DxvkDeviceFilter(DxvkDeviceFilterFlags flags) + + DxvkDeviceFilter::DxvkDeviceFilter( + DxvkDeviceFilterFlags flags, + const DxvkOptions& options) : m_flags(flags) { m_matchDeviceName = env::getEnvVar("DXVK_FILTER_DEVICE_NAME"); - + + if (m_matchDeviceName.empty()) + m_matchDeviceName = options.deviceFilter; + if (m_matchDeviceName.size() != 0) m_flags.set(DxvkDeviceFilterFlag::MatchDeviceName); } - - + + DxvkDeviceFilter::~DxvkDeviceFilter() { - + } - - + + bool DxvkDeviceFilter::testAdapter(const VkPhysicalDeviceProperties& properties) const { if (properties.apiVersion < VK_MAKE_VERSION(1, 1, 0)) { Logger::warn(str::format("Skipping Vulkan 1.0 adapter: ", properties.deviceName)); @@ -34,5 +39,5 @@ namespace dxvk { return true; } - + } diff --git a/src/dxvk/dxvk_device_filter.h b/src/dxvk/dxvk_device_filter.h index 7b411e6ad29..9cf50a7ec1c 100644 --- a/src/dxvk/dxvk_device_filter.h +++ b/src/dxvk/dxvk_device_filter.h @@ -1,12 +1,13 @@ #pragma once #include "dxvk_adapter.h" +#include "dxvk_options.h" namespace dxvk { - + /** * \brief Device filter flags - * + * * The device filter flags specify which device * properties are considered when testing adapters. * If no flags are set, all devices pass the test. @@ -15,40 +16,42 @@ namespace dxvk { MatchDeviceName = 0, SkipCpuDevices = 1, }; - + using DxvkDeviceFilterFlags = Flags; - - + + /** * \brief DXVK device filter - * + * * Used to select specific Vulkan devices to use * with DXVK. This may be useful for games which * do not offer an option to select the correct * device. */ class DxvkDeviceFilter { - + public: - - DxvkDeviceFilter(DxvkDeviceFilterFlags flags); + + DxvkDeviceFilter( + DxvkDeviceFilterFlags flags, + const DxvkOptions& options); ~DxvkDeviceFilter(); - + /** * \brief Tests an adapter - * + * * \param [in] properties Adapter properties * \returns \c true if the test passes */ bool testAdapter( const VkPhysicalDeviceProperties& properties) const; - + private: - + DxvkDeviceFilterFlags m_flags; - + std::string m_matchDeviceName; - + }; - -} \ No newline at end of file + +} diff --git a/src/dxvk/dxvk_instance.cpp b/src/dxvk/dxvk_instance.cpp index 43768fb1007..7b1a9d4b62e 100644 --- a/src/dxvk/dxvk_instance.cpp +++ b/src/dxvk/dxvk_instance.cpp @@ -172,12 +172,21 @@ namespace dxvk { filterFlags.set(DxvkDeviceFilterFlag::SkipCpuDevices); } - DxvkDeviceFilter filter(filterFlags); + DxvkDeviceFilter filter(filterFlags, m_options); std::vector> result; + uint32_t numDGPU = 0; + uint32_t numIGPU = 0; + for (uint32_t i = 0; i < numAdapters; i++) { - if (filter.testAdapter(deviceProperties[i])) + if (filter.testAdapter(deviceProperties[i])) { + if (deviceProperties[i].deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) + numDGPU += 1; + else if (deviceProperties[i].deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) + numIGPU += 1; + result.push_back(new DxvkAdapter(m_vki, adapters[i])); + } } std::stable_sort(result.begin(), result.end(), @@ -199,6 +208,11 @@ namespace dxvk { return aRank < bRank; }); + if (m_options.hideIntegratedGraphics && numDGPU > 0 && numIGPU > 0) { + result.resize(numDGPU); + numIGPU = 0; + } + if (result.size() == 0) { Logger::warn("DXVK: No adapters found. Please check your " "device filter settings and Vulkan setup."); diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index 2fe01c1d73a..6e678c0ecbf 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -1,10 +1,11 @@ #include +#include #include "dxvk_device.h" #include "dxvk_memory.h" namespace dxvk { - + DxvkMemory::DxvkMemory() { } DxvkMemory::DxvkMemory( DxvkMemoryAllocator* alloc, @@ -21,8 +22,8 @@ namespace dxvk { m_offset (offset), m_length (length), m_mapPtr (mapPtr) { } - - + + DxvkMemory::DxvkMemory(DxvkMemory&& other) : m_alloc (std::exchange(other.m_alloc, nullptr)), m_chunk (std::exchange(other.m_chunk, nullptr)), @@ -31,8 +32,8 @@ namespace dxvk { m_offset (std::exchange(other.m_offset, 0)), m_length (std::exchange(other.m_length, 0)), m_mapPtr (std::exchange(other.m_mapPtr, nullptr)) { } - - + + DxvkMemory& DxvkMemory::operator = (DxvkMemory&& other) { this->free(); m_alloc = std::exchange(other.m_alloc, nullptr); @@ -44,18 +45,18 @@ namespace dxvk { m_mapPtr = std::exchange(other.m_mapPtr, nullptr); return *this; } - - + + DxvkMemory::~DxvkMemory() { this->free(); } - - + + void DxvkMemory::free() { if (m_alloc != nullptr) m_alloc->free(*this); } - + DxvkMemoryChunk::DxvkMemoryChunk( DxvkMemoryAllocator* alloc, @@ -66,15 +67,15 @@ namespace dxvk { // Mark the entire chunk as free m_freeList.push_back(FreeSlice { 0, memory.memSize }); } - - + + DxvkMemoryChunk::~DxvkMemoryChunk() { // This call is technically not thread-safe, but it // doesn't need to be since we don't free chunks m_alloc->freeDeviceMemory(m_type, m_memory); } - - + + DxvkMemory DxvkMemoryChunk::alloc( VkMemoryPropertyFlags flags, VkDeviceSize size, @@ -84,15 +85,15 @@ namespace dxvk { // be refined a bit in the future if necessary. if (m_memory.memFlags != flags || !checkHints(hints)) return DxvkMemory(); - + // If the chunk is full, return if (m_freeList.size() == 0) return DxvkMemory(); - + // Select the slice to allocate from in a worst-fit // manner. This may help keep fragmentation low. auto bestSlice = m_freeList.begin(); - + for (auto slice = m_freeList.begin(); slice != m_freeList.end(); slice++) { if (slice->length == size) { bestSlice = slice; @@ -101,34 +102,43 @@ namespace dxvk { bestSlice = slice; } } - + // We need to align the allocation to the requested alignment const VkDeviceSize sliceStart = bestSlice->offset; const VkDeviceSize sliceEnd = bestSlice->offset + bestSlice->length; - + const VkDeviceSize allocStart = dxvk::align(sliceStart, align); const VkDeviceSize allocEnd = dxvk::align(allocStart + size, align); - + if (allocEnd > sliceEnd) return DxvkMemory(); - + // We can use this slice, but we'll have to add // the unused parts of it back to the free list. m_freeList.erase(bestSlice); - + if (allocStart != sliceStart) m_freeList.push_back({ sliceStart, allocStart - sliceStart }); - + if (allocEnd != sliceEnd) m_freeList.push_back({ allocEnd, sliceEnd - allocEnd }); - + // Create the memory object with the aligned slice + void* mapPtr = m_memory.memPointer + ? reinterpret_cast(m_memory.memPointer) + allocStart + : nullptr; + + // Some games assume freshly mapped buffers are zero-initialized and + // break on stale data. Clear the slice on hand-out if requested, which + // also covers reused slices from recycled chunks. + if (unlikely(mapPtr && m_alloc->zeroMappedMemory())) + std::memset(mapPtr, 0, allocEnd - allocStart); + return DxvkMemory(m_alloc, this, m_type, - m_memory.memHandle, allocStart, allocEnd - allocStart, - reinterpret_cast(m_memory.memPointer) + allocStart); + m_memory.memHandle, allocStart, allocEnd - allocStart, mapPtr); } - - + + void DxvkMemoryChunk::free( VkDeviceSize offset, VkDeviceSize length) { @@ -136,7 +146,7 @@ namespace dxvk { // a new slice that covers all those entries. Without doing // so, the slice could not be reused for larger allocations. auto curr = m_freeList.begin(); - + while (curr != m_freeList.end()) { if (curr->offset == offset + length) { length += curr->length; @@ -149,11 +159,11 @@ namespace dxvk { curr++; } } - + m_freeList.push_back({ offset, length }); } - - + + bool DxvkMemoryChunk::isEmpty() const { return m_freeList.size() == 1 && m_freeList[0].length == m_memory.memSize; @@ -184,6 +194,8 @@ namespace dxvk { m_device (device), m_devProps (device->adapter()->deviceProperties()), m_memProps (device->adapter()->memoryProperties()) { + VkDeviceSize maxBudget = m_device->config().maxMemoryBudget; + for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) { m_memHeaps[i].properties = m_memProps.memoryHeaps[i]; m_memHeaps[i].stats = DxvkMemoryStats { 0, 0 }; @@ -194,8 +206,11 @@ namespace dxvk { if ((m_memProps.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) && (m_device->isUnifiedMemoryArchitecture())) m_memHeaps[i].budget = (8 * m_memProps.memoryHeaps[i].size) / 10; + + if (maxBudget && (!m_memHeaps[i].budget || m_memHeaps[i].budget > maxBudget)) + m_memHeaps[i].budget = maxBudget; } - + for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { m_memTypes[i].heap = &m_memHeaps[m_memProps.memoryTypes[i].heapIndex]; m_memTypes[i].heapId = m_memProps.memoryTypes[i].heapIndex; @@ -233,13 +248,18 @@ namespace dxvk { } } } - - + + DxvkMemoryAllocator::~DxvkMemoryAllocator() { - + } - - + + + bool DxvkMemoryAllocator::zeroMappedMemory() const { + return m_device->config().zeroMappedMemory; + } + + DxvkMemory DxvkMemoryAllocator::alloc( const VkMemoryRequirements* req, const VkMemoryDedicatedRequirements& dedAllocReq, @@ -261,9 +281,24 @@ namespace dxvk { if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) hints = hints & DxvkMemoryFlag::Transient; + // On tiling GPUs, prefer cached memory for host-visible allocations + // to speed up readbacks. This is a preference only: if no suitable + // cached memory type exists, we fall back to uncached below. + VkMemoryPropertyFlags cachedFlags = 0; + + if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + && m_device->perfHints().preferCachedMemory) + cachedFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + // Try to allocate from a memory type which supports the given flags exactly auto dedAllocPtr = dedAllocReq.prefersDedicatedAllocation ? &dedAllocInfo : nullptr; - DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, hints); + DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags | cachedFlags, hints); + + // If we asked for cached memory but none was available, retry uncached + if (!result && cachedFlags) { + cachedFlags = 0; + result = this->tryAlloc(req, dedAllocPtr, flags, hints); + } // If the first attempt failed, try ignoring the dedicated allocation if (!result && dedAllocPtr && !dedAllocReq.requiresDedicatedAllocation) { @@ -281,14 +316,14 @@ namespace dxvk { VkMemoryPropertyFlags optFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; VkMemoryPropertyFlags remFlags = 0; - + while (!result && (flags & optFlags)) { remFlags |= optFlags & -optFlags; optFlags &= ~remFlags; result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, hints); } - + if (!result) { DxvkAdapterMemoryInfo memHeapInfo = m_device->adapter()->getMemoryHeapInfo(); @@ -314,11 +349,11 @@ namespace dxvk { throw DxvkError("DxvkMemoryAllocator: Memory allocation failed"); } - + return result; } - - + + DxvkMemory DxvkMemoryAllocator::tryAlloc( const VkMemoryRequirements* req, const VkMemoryDedicatedAllocateInfo* dedAllocInfo, @@ -329,17 +364,17 @@ namespace dxvk { for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) { const bool supported = (req->memoryTypeBits & (1u << i)) != 0; const bool adequate = (m_memTypes[i].memType.propertyFlags & flags) == flags; - + if (supported && adequate) { result = this->tryAllocFromType(&m_memTypes[i], flags, req->size, req->alignment, hints, dedAllocInfo); } } - + return result; } - - + + DxvkMemory DxvkMemoryAllocator::tryAllocFromType( DxvkMemoryType* type, VkMemoryPropertyFlags flags, @@ -358,15 +393,19 @@ namespace dxvk { DxvkDeviceMemory devMem = this->tryAllocDeviceMemory( type, flags, size, hints, dedAllocInfo); - if (devMem.memHandle != VK_NULL_HANDLE) + if (devMem.memHandle != VK_NULL_HANDLE) { + if (unlikely(devMem.memPointer && this->zeroMappedMemory())) + std::memset(devMem.memPointer, 0, size); + memory = DxvkMemory(this, nullptr, type, devMem.memHandle, 0, size, devMem.memPointer); + } } else { for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) memory = type->chunks[i]->alloc(flags, size, align, hints); - + if (!memory) { DxvkDeviceMemory devMem; - + if (this->shouldFreeEmptyChunks(type->heap, chunkSize)) this->freeEmptyChunks(type->heap); @@ -387,8 +426,8 @@ namespace dxvk { return memory; } - - + + DxvkDeviceMemory DxvkMemoryAllocator::tryAllocDeviceMemory( DxvkMemoryType* type, VkMemoryPropertyFlags flags, @@ -397,7 +436,7 @@ namespace dxvk { const VkMemoryDedicatedAllocateInfo* dedAllocInfo) { bool useMemoryPriority = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && (m_device->features().extMemoryPriority.memoryPriority); - + if (type->heap->budget && type->heap->stats.memoryAllocated + size > type->heap->budget) return DxvkDeviceMemory(); @@ -426,7 +465,7 @@ namespace dxvk { if (m_vkd->vkAllocateMemory(m_vkd->device(), &info, nullptr, &result.memHandle) != VK_SUCCESS) return DxvkDeviceMemory(); - + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VkResult status = m_vkd->vkMapMemory(m_vkd->device(), result.memHandle, 0, VK_WHOLE_SIZE, 0, &result.memPointer); @@ -463,7 +502,7 @@ namespace dxvk { } } - + void DxvkMemoryAllocator::freeChunkMemory( DxvkMemoryType* type, DxvkMemoryChunk* chunk, @@ -483,7 +522,7 @@ namespace dxvk { type->chunks.push_back(std::move(chunkRef)); } } - + void DxvkMemoryAllocator::freeDeviceMemory( DxvkMemoryType* type, diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index 795dd74d62a..ac877672331 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -3,13 +3,13 @@ #include "dxvk_adapter.h" namespace dxvk { - + class DxvkMemoryAllocator; class DxvkMemoryChunk; - + /** * \brief Memory stats - * + * * Reports the amount of device memory * allocated and used by the application. */ @@ -46,7 +46,7 @@ namespace dxvk { /** * \brief Device memory object - * + * * Stores a Vulkan memory object. If the object * was allocated on host-visible memory, it will * be persistently mapped. @@ -59,10 +59,10 @@ namespace dxvk { float priority = 0.0f; }; - + /** * \brief Memory heap - * + * * Corresponds to a Vulkan memory heap and stores * its properties as well as allocation statistics. */ @@ -75,7 +75,7 @@ namespace dxvk { /** * \brief Memory type - * + * * Corresponds to a Vulkan memory type and stores * memory chunks used to sub-allocate memory on * this memory type. @@ -89,18 +89,18 @@ namespace dxvk { std::vector> chunks; }; - - + + /** * \brief Memory slice - * + * * Represents a slice of memory that has * been sub-allocated from a bigger chunk. */ class DxvkMemory { friend class DxvkMemoryAllocator; public: - + DxvkMemory(); DxvkMemory( DxvkMemoryAllocator* alloc, @@ -113,10 +113,10 @@ namespace dxvk { DxvkMemory (DxvkMemory&& other); DxvkMemory& operator = (DxvkMemory&& other); ~DxvkMemory(); - + /** * \brief Memory object - * + * * This information is required when * binding memory to Vulkan objects. * \returns Memory object @@ -124,10 +124,10 @@ namespace dxvk { VkDeviceMemory memory() const { return m_memory; } - + /** * \brief Offset into device memory - * + * * This information is required when * binding memory to Vulkan objects. * \returns Offset into device memory @@ -135,10 +135,10 @@ namespace dxvk { VkDeviceSize offset() const { return m_offset; } - + /** * \brief Pointer to mapped data - * + * * \param [in] offset Byte offset * \returns Pointer to mapped data */ @@ -148,7 +148,7 @@ namespace dxvk { /** * \brief Returns length of memory allocated - * + * * \returns Memory size */ VkDeviceSize length() const { @@ -157,16 +157,16 @@ namespace dxvk { /** * \brief Checks whether the memory slice is defined - * + * * \returns \c true if this slice points to actual device * memory, and \c false if it is undefined. */ operator bool () const { return m_memory != VK_NULL_HANDLE; } - + private: - + DxvkMemoryAllocator* m_alloc = nullptr; DxvkMemoryChunk* m_chunk = nullptr; DxvkMemoryType* m_type = nullptr; @@ -174,9 +174,9 @@ namespace dxvk { VkDeviceSize m_offset = 0; VkDeviceSize m_length = 0; void* m_mapPtr = nullptr; - + void free(); - + }; @@ -195,29 +195,29 @@ namespace dxvk { }; using DxvkMemoryFlags = Flags; - - + + /** * \brief Memory chunk - * + * * A single chunk of memory that provides a * sub-allocator. This is not thread-safe. */ class DxvkMemoryChunk : public RcObject { - + public: - + DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, DxvkDeviceMemory memory, DxvkMemoryFlags m_hints); - + ~DxvkMemoryChunk(); /** * \brief Allocates memory from the chunk - * + * * On failure, this returns a slice with * \c VK_NULL_HANDLE as the memory handle. * \param [in] flags Requested memory type flags @@ -231,10 +231,10 @@ namespace dxvk { VkDeviceSize size, VkDeviceSize align, DxvkMemoryFlags hints); - + /** * \brief Frees memory - * + * * Returns a slice back to the chunk. * Called automatically when a memory * slice runs out of scope. @@ -258,27 +258,27 @@ namespace dxvk { bool isCompatible(const Rc& other) const; private: - + struct FreeSlice { VkDeviceSize offset; VkDeviceSize length; }; - + DxvkMemoryAllocator* m_alloc; DxvkMemoryType* m_type; DxvkDeviceMemory m_memory; DxvkMemoryFlags m_hints; - + std::vector m_freeList; bool checkHints(DxvkMemoryFlags hints) const; - + }; - - + + /** * \brief Memory allocator - * + * * Allocates device memory for Vulkan resources. * Memory objects will be destroyed automatically. */ @@ -288,13 +288,13 @@ namespace dxvk { constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10; public: - + DxvkMemoryAllocator(const DxvkDevice* device); ~DxvkMemoryAllocator(); - + /** * \brief Buffer-image granularity - * + * * The granularity between linear and non-linear * resources in adjacent memory locations. See * section 11.6 of the Vulkan spec for details. @@ -303,10 +303,10 @@ namespace dxvk { VkDeviceSize bufferImageGranularity() const { return m_devProps.limits.bufferImageGranularity; } - + /** * \brief Allocates device memory - * + * * \param [in] req Memory requirements * \param [in] dedAllocReq Dedicated allocation requirements * \param [in] dedAllocInfo Dedicated allocation info @@ -320,10 +320,10 @@ namespace dxvk { const VkMemoryDedicatedAllocateInfo& dedAllocInfo, VkMemoryPropertyFlags flags, DxvkMemoryFlags hints); - + /** * \brief Queries memory stats - * + * * Returns the total amount of memory * allocated and used for a given heap. * \param [in] heap Heap index @@ -332,14 +332,20 @@ namespace dxvk { DxvkMemoryStats getMemoryStats(uint32_t heap) const { return m_memHeaps[heap].stats; } - + + /** + * \brief Whether mapped memory should be zero-initialized + * \returns \c true if zeroMappedMemory is enabled + */ + bool zeroMappedMemory() const; + private: const Rc m_vkd; const DxvkDevice* m_device; const VkPhysicalDeviceProperties m_devProps; const VkPhysicalDeviceMemoryProperties m_memProps; - + dxvk::mutex m_mutex; std::array m_memHeaps; std::array m_memTypes; @@ -349,7 +355,7 @@ namespace dxvk { const VkMemoryDedicatedAllocateInfo* dedAllocInfo, VkMemoryPropertyFlags flags, DxvkMemoryFlags hints); - + DxvkMemory tryAllocFromType( DxvkMemoryType* type, VkMemoryPropertyFlags flags, @@ -357,27 +363,27 @@ namespace dxvk { VkDeviceSize align, DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo); - + DxvkDeviceMemory tryAllocDeviceMemory( DxvkMemoryType* type, VkMemoryPropertyFlags flags, VkDeviceSize size, DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo); - + void free( const DxvkMemory& memory); - + void freeChunkMemory( DxvkMemoryType* type, DxvkMemoryChunk* chunk, VkDeviceSize offset, VkDeviceSize length); - + void freeDeviceMemory( DxvkMemoryType* type, DxvkDeviceMemory memory); - + VkDeviceSize pickChunkSize( uint32_t memTypeId, DxvkMemoryFlags hints) const; @@ -394,5 +400,5 @@ namespace dxvk { const DxvkMemoryHeap* heap); }; - + } diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index 656cb7cff10..3f674fe636e 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -9,6 +9,13 @@ namespace dxvk { useRawSsbo = config.getOption("dxvk.useRawSsbo", Tristate::Auto); shrinkNvidiaHvvHeap = config.getOption("dxvk.shrinkNvidiaHvvHeap", Tristate::Auto); hud = config.getOption("dxvk.hud", ""); + hideIntegratedGraphics = config.getOption ("dxvk.hideIntegratedGraphics", false); + deviceFilter = config.getOption("dxvk.deviceFilter", ""); + tilerMode = config.getOption("dxvk.tilerMode", Tristate::Auto); + zeroMappedMemory = config.getOption("dxvk.zeroMappedMemory", false); + lowerSinCos = config.getOption("dxvk.lowerSinCos", Tristate::Auto); + auto budget = config.getOption("dxvk.maxMemoryBudget", 0); + maxMemoryBudget = VkDeviceSize(std::max(budget, 0)) << 20u; enableDyasync = config.getOption ("dxvk.enableDyasync", true); numDyasyncThreads = config.getOption ("dxvk.numDyasyncThreads", 0); } diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index da80edeb196..d225ce5e6fa 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -2,6 +2,8 @@ #include "../util/config/config.h" +#include "../vulkan/vulkan_loader.h" + namespace dxvk { struct DxvkOptions { @@ -18,7 +20,34 @@ namespace dxvk { /// when using the state cache int32_t numCompilerThreads; + // Hides integrated GPUs if dedicated GPUs are + // present. May be necessary for some games that + // incorrectly assume monitor layouts. + bool hideIntegratedGraphics; + + /// Device name + std::string deviceFilter; + + // Tiler GPU tweaks. Currently biases host-visible + // allocations toward cached memory on tilers; the + // render-pass-op side is detected but not yet acted on. + Tristate tilerMode; + + // Zero-initialize host-visible mapped memory on allocation. + // Works around games that assume freshly mapped buffers are clean. + bool zeroMappedMemory; + + /// Whether to use custom sin/cos approximation + Tristate lowerSinCos = Tristate::Auto; + + /// Memory budget in bytes + VkDeviceSize maxMemoryBudget; + + // Enable or disable Dyasync bool enableDyasync; + + // Number of compiler threads + // when using Dyasync int32_t numDyasyncThreads; /// Shader-related options diff --git a/src/spirv/spirv_module.cpp b/src/spirv/spirv_module.cpp index 7cccdd135dc..0c02acd95e2 100644 --- a/src/spirv/spirv_module.cpp +++ b/src/spirv/spirv_module.cpp @@ -2647,6 +2647,88 @@ namespace dxvk { return resultId; } + uint32_t SpirvModule::opSinCos( + uint32_t x, + bool useBuiltIn) { + uint32_t floatType = defFloatType(32); + uint32_t resultType = defVectorType(floatType, 2u); + + if (useBuiltIn) { + std::array members = { + opSin(floatType, x), + opCos(floatType, x), + }; + return opCompositeConstruct(resultType, members.size(), members.data()); + } + + uint32_t uintType = defIntType(32, false); + uint32_t boolType = defBoolType(); + + uint32_t xNorm = opFMul(floatType, opFAbs(floatType, x), + constf32(4.0f / pi)); + uint32_t xTrunc = opTrunc(floatType, xNorm); + uint32_t xFract = opFSub(floatType, xNorm, xTrunc); + uint32_t xInt = opConvertFtoU(uintType, xTrunc); + + uint32_t mirror = opINotEqual(boolType, + opBitwiseAnd(uintType, xInt, constu32(1u)), constu32(0u)); + xFract = opSelect(floatType, mirror, + opFSub(floatType, constf32(1.0f), xFract), xFract); + + uint32_t xFract_2 = opFMul(floatType, xFract, xFract); + uint32_t xFract_4 = opFMul(floatType, xFract_2, xFract_2); + uint32_t xFract_6 = opFMul(floatType, xFract_4, xFract_2); + + uint32_t taylor = opFMul(floatType, xFract_6, + constf32(static_cast(-sincosTaylorFactor(7)))); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFFma(floatType, xFract_4, + constf32(static_cast(sincosTaylorFactor(5))), taylor); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFFma(floatType, xFract_2, + constf32(static_cast(-sincosTaylorFactor(3))), taylor); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFAdd(floatType, + constf32(static_cast(sincosTaylorFactor(1))), taylor); + decorate(taylor, spv::DecorationNoContraction); + + taylor = opFMul(floatType, taylor, xFract); + decorate(taylor, spv::DecorationNoContraction); + + uint32_t coFunc = opSqrt(floatType, + opFSub(floatType, constf32(1.0f), + opFMul(floatType, taylor, taylor))); + + uint32_t funcIsSin = opIEqual(boolType, + opBitwiseAnd(uintType, + opIAdd(uintType, xInt, constu32(1u)), constu32(2u)), + constu32(0u)); + + uint32_t sin = opSelect(floatType, funcIsSin, taylor, coFunc); + uint32_t cos = opSelect(floatType, funcIsSin, coFunc, taylor); + + uint32_t sinNeg = opINotEqual(boolType, + opBitwiseAnd(uintType, xInt, constu32(4u)), constu32(0u)); + sinNeg = opLogicalNotEqual(boolType, sinNeg, + opFOrdLessThan(boolType, x, constf32(0.0f))); + + uint32_t cosNeg = opINotEqual(boolType, + opBitwiseAnd(uintType, + opIAdd(uintType, xInt, constu32(2u)), constu32(4u)), + constu32(0u)); + + sin = opSelect(floatType, sinNeg, + opFNegate(floatType, sin), sin); + cos = opSelect(floatType, cosNeg, + opFNegate(floatType, cos), cos); + + std::array members = { sin, cos }; + return opCompositeConstruct(resultType, members.size(), members.data()); + } + uint32_t SpirvModule::opSqrt( uint32_t resultType, diff --git a/src/spirv/spirv_module.h b/src/spirv/spirv_module.h index a1d027ea62c..f1381fbd1a5 100644 --- a/src/spirv/spirv_module.h +++ b/src/spirv/spirv_module.h @@ -927,6 +927,10 @@ namespace dxvk { uint32_t resultType, uint32_t vector); + uint32_t opSinCos( + uint32_t x, + bool useBuiltIn); + uint32_t opSqrt( uint32_t resultType, uint32_t operand); @@ -1269,6 +1273,13 @@ namespace dxvk { void instImportGlsl450(); + static constexpr double sincosTaylorFactor(uint32_t power) { + double r = 1.0; + for (uint32_t i = 1; i <= power; i++) + r *= pi * 0.25 / double(i); + return r; + } + uint32_t getImageOperandWordCount( const SpirvImageOperands& op) const; diff --git a/src/util/config/config.cpp b/src/util/config/config.cpp index d5b0517bc2f..de871da8053 100644 --- a/src/util/config/config.cpp +++ b/src/util/config/config.cpp @@ -278,9 +278,10 @@ namespace dxvk { { "d3d11.invariantPosition", "False" }, { "d3d11.floatControls", "False" }, }} }, - /* Nioh 2 */ + /* Nioh 2 - Fixes some fires looking glitchy */ { R"(\\nioh2\.exe$)", {{ { "dxgi.deferSurfaceCreation", "True" }, + { "dxvk.zeroMappedMemory", "True" }, }} }, /* Crazy Machines 3 - crashes on long device * * descriptions */ @@ -440,7 +441,8 @@ namespace dxvk { * around the game not properly initializing * * some of its constant buffers after discard */ { R"(\\Vindictus(_x64)?\.exe$)", {{ - { "d3d11.cachedDynamicResources", "cr" }, + { "d3d11.cachedDynamicResources", "cr" }, + { "dxvk.zeroMappedMemory", "True" }, }} }, /* Riders Republic - Statically linked AMDAGS */ { R"(\\RidersRepublic(_BE)?\.exe$)", {{ @@ -470,6 +472,11 @@ namespace dxvk { { R"(\\Kena-Win64-Shipping\.exe$)", {{ { "dxgi.hideIntelGpu", "True" }, }} }, + /* Warcraft 3 Reforged * + * Bugs out on some multi-gpu systems. */ + { R"(\\x86_64\\Warcraft III\.exe$)", {{ + { "dxvk.hideIntegratedGraphics", "True" }, + }} }, /* Earth Defense Force 5 */ { R"(\\EDF5\.exe$)", {{ { "dxgi.tearFree", "False" }, @@ -872,6 +879,15 @@ namespace dxvk { { "d3d9.memoryTrackTest", "True" }, { "d3d9.maxAvailableMemory", "2048" }, }} }, + /* Ridge Racer Unbounded - Vertex explosions */ + { R"(\\RRU(_demo)?\.exe$)", {{ + { "dxvk.zeroMappedMemory", "True" }, + }} }, + /* Warhammer 40,000: Dawn of War DE * + * Fixes occasional vertex explosions */ + { R"(\\W40k(_gog)?\.exe$)", {{ + { "dxvk.zeroMappedMemory", "True" }, + }} }, /* The Ship (2004) */ { R"(\\ship\.exe$)", {{ { "d3d9.memoryTrackTest", "True" },