Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 54 additions & 24 deletions src/dxbc/dxbc_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2228,32 +2228,62 @@ namespace dxvk {
const DxbcRegisterValue srcValue = emitRegisterLoad(
ins.src[0], DxbcRegMask(true, true, true, true));

// Either output may be DxbcOperandType::Null, in
// which case we don't have to generate any code.
if (ins.dst[0].type != DxbcOperandType::Null) {
const DxbcRegisterValue sinInput =
emitRegisterExtract(srcValue, ins.dst[0].mask);

DxbcRegisterValue sin;
sin.type = sinInput.type;
sin.id = m_module.opSin(
getVectorTypeId(sin.type),
sinInput.id);

emitRegisterStore(ins.dst[0], sin);
}

if (ins.dst[1].type != DxbcOperandType::Null) {
const DxbcRegisterValue cosInput =
emitRegisterExtract(srcValue, ins.dst[1].mask);
// Compute sin and cos together; either output may be null.
const bool useBuiltIn = !m_moduleInfo.options.sincosEmulation;

if (ins.dst[0].type != DxbcOperandType::Null ||
ins.dst[1].type != DxbcOperandType::Null) {

// Determine component count from whichever destination is non‑null
DxbcRegMask sharedMask = ins.dst[0].type != DxbcOperandType::Null
? ins.dst[0].mask : ins.dst[1].mask;
const DxbcRegisterValue srcInput =
emitRegisterExtract(srcValue, sharedMask);

uint32_t componentCount = srcInput.type.ccount;
std::array<uint32_t, 4> sinIds = {};
std::array<uint32_t, 4> cosIds = {};

uint32_t floatType = m_module.defFloatType(32);

for (uint32_t i = 0; i < componentCount; i++) {
uint32_t scalarX = componentCount > 1
? m_module.opVectorExtractDynamic(
floatType, srcInput.id,
m_module.constu32(i))
: srcInput.id;

uint32_t sincos = m_module.opSinCos(scalarX, useBuiltIn);

// opSinCos returns vec2: index 0 = sin, index 1 = cos
uint32_t sinIndex = 0u, cosIndex = 1u;
sinIds[i] = m_module.opCompositeExtract(
floatType, sincos, 1u, &sinIndex);
cosIds[i] = m_module.opCompositeExtract(
floatType, sincos, 1u, &cosIndex);
}

DxbcRegisterValue cos;
cos.type = cosInput.type;
cos.id = m_module.opCos(
getVectorTypeId(cos.type),
cosInput.id);
if (ins.dst[0].type != DxbcOperandType::Null) {
DxbcRegisterValue sin;
sin.type = srcInput.type;
sin.id = componentCount > 1
? m_module.opCompositeConstruct(
getVectorTypeId(sin.type),
componentCount, sinIds.data())
: sinIds[0];
emitRegisterStore(ins.dst[0], sin);
}

emitRegisterStore(ins.dst[1], cos);
if (ins.dst[1].type != DxbcOperandType::Null) {
DxbcRegisterValue cos;
cos.type = srcInput.type;
cos.id = componentCount > 1
? m_module.opCompositeConstruct(
getVectorTypeId(cos.type),
componentCount, cosIds.data())
: cosIds[0];
emitRegisterStore(ins.dst[1], cos);
}
}
}

Expand Down
15 changes: 10 additions & 5 deletions src/dxbc/dxbc_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "dxbc_options.h"

namespace dxvk {

DxbcOptions::DxbcOptions() {

}
Expand All @@ -19,6 +19,11 @@ namespace dxvk {
// Disable unbound texture optimization on Mali GPUs due to black screen issues
disableUnboundTextureOptimization = (devProps.vendorID == 0x13B5); // ARM Mali

// Use software sin/cos approximation on Intel iGPUs by default
sincosEmulation = adapter->matchesDriver(VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA)
|| adapter->matchesDriver(VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS);
applyTristate(sincosEmulation, device->config().lowerSinCos);

useDepthClipWorkaround
= !devFeatures.extDepthClipEnable.depthClipEnable;
useStorageImageReadWithoutFormat
Expand All @@ -38,13 +43,13 @@ namespace dxvk {
&& (devInfo.coreSubgroup.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT);
useSdivForBufferIndex
= adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0);

switch (device->config().useRawSsbo) {
case Tristate::Auto: minSsboAlignment = devInfo.core.properties.limits.minStorageBufferOffsetAlignment; break;
case Tristate::True: minSsboAlignment = 4u; break;
case Tristate::False: minSsboAlignment = ~0u; break;
}

invariantPosition = options.invariantPosition;
enableRtOutputNanFixup = options.enableRtOutputNanFixup;
zeroInitWorkgroupMemory = options.zeroInitWorkgroupMemory;
Expand All @@ -55,7 +60,7 @@ namespace dxvk {
// Disable subgroup early discard on Nvidia because it may hurt performance
if (adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0))
useSubgroupOpsForEarlyDiscard = false;

// Figure out float control flags to match D3D11 rules
if (options.floatControls) {
if (devInfo.khrShaderFloatControls.shaderSignedZeroInfNanPreserveFloat32)
Expand All @@ -75,5 +80,5 @@ namespace dxvk {
|| adapter->matchesDriver(DxvkGpuVendor::Amd, VK_DRIVER_ID_MESA_RADV_KHR, 0, VK_MAKE_VERSION(20, 3, 0)))
enableRtOutputNanFixup = true;
}

}
9 changes: 7 additions & 2 deletions src/dxbc/dxbc_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ namespace dxvk {
/// Disable unbound texture optimization on Mali GPUs
/// to prevent black screen issues due to strict binding validation
bool disableUnboundTextureOptimization = false;


/// Use a Taylor approximation for sin/cos instead of the
/// native GLSL.std.450 Sin/Cos instructions. Required for
/// correct results on Intel iGPUs (dxvk #4866).
bool sincosEmulation = false;

/// Clear thread-group shared memory to zero
bool zeroInitWorkgroupMemory = false;

Expand All @@ -78,5 +83,5 @@ namespace dxvk {
/// Minimum storage buffer alignment
VkDeviceSize minSsboAlignment = 0;
};

}
15 changes: 12 additions & 3 deletions src/dxso/dxso_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2196,14 +2196,23 @@ namespace dxvk {
DxsoRegMask srcMask(true, false, false, false);
uint32_t src0 = emitRegisterLoad(src[0], srcMask).id;

std::array<uint32_t, 4> sincosVectorIndices = { 0, 0, 0, 0 };
uint32_t sincos = m_module.opSinCos(src0,
!m_moduleInfo.options.sincosEmulation);

std::array<uint32_t, 4> sincosVectorIndices = { 0, 0, 0, 0 };
uint32_t index = 0;

uint32_t cosIdx = 1u, sinIdx = 0u;

// Original order: mask[0] = cos, mask[1] = sin
// opSinCos returns vec2(sin, cos)
if (mask[0])
sincosVectorIndices[index++] = m_module.opCos(scalarTypeId, src0);
sincosVectorIndices[index++] = m_module.opCompositeExtract(
scalarTypeId, sincos, 1u, &cosIdx);

if (mask[1])
sincosVectorIndices[index++] = m_module.opSin(scalarTypeId, src0);
sincosVectorIndices[index++] = m_module.opCompositeExtract(
scalarTypeId, sincos, 1u, &sinIdx);

for (; index < result.type.ccount; index++) {
if (sincosVectorIndices[index] == 0)
Expand Down
7 changes: 6 additions & 1 deletion src/dxso/dxso_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ namespace dxvk {
const DxvkDeviceFeatures& devFeatures = device->features();
const DxvkDeviceInfo& devInfo = adapter->devicePropertiesExt();

// Use software sin/cos approximation on Intel iGPUs by default
sincosEmulation = adapter->matchesDriver(VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA)
|| adapter->matchesDriver(VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS);
applyTristate(sincosEmulation, device->config().lowerSinCos);

useDemoteToHelperInvocation
= (devFeatures.extShaderDemoteToHelperInvocation.shaderDemoteToHelperInvocation);

Expand All @@ -25,7 +30,7 @@ namespace dxvk {
// Disable early discard on Nvidia because it may hurt performance
if (adapter->matchesDriver(DxvkGpuVendor::Nvidia, VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR, 0, 0))
useSubgroupOpsForEarlyDiscard = false;

// Apply shader-related options
strictConstantCopies = options.strictConstantCopies;

Expand Down
4 changes: 4 additions & 0 deletions src/dxso/dxso_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ namespace dxvk {

/// Whether vertex shaders may emit ClipDistance builtins.
bool enableClipDistance = false;

/// Use a Taylor approximation for sin/cos instead of the
/// native GLSL.std.450 Sin/Cos instructions.
bool sincosEmulation = false;
};

}
6 changes: 6 additions & 0 deletions src/dxvk/dxvk_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,12 @@ namespace dxvk {
}


bool DxvkAdapter::matchesDriver(
VkDriverIdKHR driver) const {
return driver == m_deviceInfo.khrDeviceDriverProperties.driverID;
}


void DxvkAdapter::logAdapterInfo() const {
VkPhysicalDeviceProperties deviceInfo = this->deviceProperties();
VkPhysicalDeviceMemoryProperties memoryInfo = this->memoryProperties();
Expand Down
Loading
Loading