diff --git a/D3D11Engine/BaseGraphicsEngine.h b/D3D11Engine/BaseGraphicsEngine.h index 28828b6f..d31f3470 100644 --- a/D3D11Engine/BaseGraphicsEngine.h +++ b/D3D11Engine/BaseGraphicsEngine.h @@ -128,7 +128,7 @@ class BaseGraphicsEngine { virtual BaseLineRenderer* GetLineRenderer() PURE; /** Returns the graphics-device this is running on */ - virtual std::string GetGraphicsDeviceName() PURE; + virtual const std::string& GetGraphicsDeviceName() PURE; /** Draws a screen fade effects */ virtual XRESULT DrawScreenFade( void* camera ) { return XR_SUCCESS; }; @@ -227,4 +227,6 @@ class BaseGraphicsEngine { virtual XRESULT UpdateRenderStates() { return XR_SUCCESS; }; virtual std::unique_ptr RecordGraphicsEvent( LPCWSTR region ) { return std::make_unique(); } + + virtual void OnWorldLoaded() {}; }; diff --git a/D3D11Engine/ConstantBufferStructs.h b/D3D11Engine/ConstantBufferStructs.h index 17501bc9..4a528bbf 100644 --- a/D3D11Engine/ConstantBufferStructs.h +++ b/D3D11Engine/ConstantBufferStructs.h @@ -16,17 +16,100 @@ struct VobInstanceInfo { DWORD GP_Slot; }; -/** Remap-index for the static vobs */ -struct VobInstanceRemapInfo { - bool operator < ( const VobInstanceRemapInfo& b ) const { - return InstanceRemapIndex < b.InstanceRemapIndex; - } - - bool operator == ( const VobInstanceRemapInfo& o ) const { - return InstanceRemapIndex == o.InstanceRemapIndex; - } - - DWORD InstanceRemapIndex; +struct VobInstanceInfoAtlas { + XMFLOAT4X4 world; + XMFLOAT4X4 prevWorld; // Previous frame's world matrix for motion vectors + DWORD color; + float windStrenth; + float canBeAffectedByPlayer; + // Texture Atlas information, directly stored in the instance data for easy access in shader without needing an extra StructuredBuffer + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + UINT globalSourceIndex; // global source index into feedback texture +}; + +// Descriptor returned for use with shader +// Points to a specific slice in the Texture2DArray atlas, along with UV coordinates for sampling that slice +// this is pointed to from VobInstanceInfo GP_Slot into a StructuredBuffer, which is then indexed in the shader to get the correct slice/UVs for each instance +struct TextureDescriptor { + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; +}; + +// CPU-side lookup: maps a zCTexture* to its atlas placement +struct TextureAtlasLookup { + DXGI_FORMAT atlasFormat; + TextureDescriptor descriptor; +}; + +// Per-vob data uploaded once at world load, read by GPU cull compute shader +struct VobGPUData { + XMFLOAT3 aabbCenter; + float pad0; + XMFLOAT3 aabbExtent; + float pad1; + XMFLOAT4X4 world; + XMFLOAT4X4 prevWorld; + DWORD color; + float aniModeStrength; + float canBeAffectedByPlayer; + UINT submeshStart; // index into SubmeshGPUData[] + UINT submeshCount; // how many submeshes this vob maps to + UINT pad2[3]; +}; + +// Per-submesh lookup, shared across all vobs with the same visual +struct SubmeshGPUData { + int slice; + float uStart, vStart, uEnd, vEnd; + UINT argIndex; // index into merged indirect args + UINT instanceBaseOffset; // fixed write offset in instance buffer + UINT globalSourceIndex; // global source index into feedback texture +}; + +// Per-submesh data for the world mesh atlas indirect draw path. +// Read by VS_ExWorldAtlas via StructuredBuffer. +struct WorldMeshSubmeshGPUData { + // Diffuse atlas + int diffuseSlice; + float dUStart, dVStart, dUEnd, dVEnd; + // Normal atlas + int normalSlice; + float nUStart, nVStart, nUEnd, nVEnd; + // FX atlas + int fxSlice; + float fUStart, fVStart, fUEnd, fVEnd; + // Flags: 1 = HAS_NORMAL, 2 = HAS_FX, 4 = ALPHA_TEST + UINT flags; +}; + +// Constant buffer for the GPU cull compute shader +struct CullConstants { + XMFLOAT4 frustumPlanes[6]; + XMFLOAT3 cameraPosition; + float drawDistance; + float globalWindStrength; + UINT windAdvanced; + UINT numVobs; + UINT feedbackFrameNumber; // >0 = write feedback in CS; 0 = disabled (e.g. shadow pass) + UINT enableHiZ; // 1 = Hi-Z occlusion culling enabled + UINT hiZMipCount; + float hiZWidth; // Hi-Z mip 0 dimensions (full depth buffer size) + float hiZHeight; + XMFLOAT4X4 viewProjection; // Current frame view-projection matrix for Hi-Z reprojection +}; + +struct HiZBuildConstants { + UINT outputWidth; + UINT outputHeight; + UINT inputMipLevel; + UINT isCopyPass; // 1 = copy from depth buffer (mip 0), 0 = downsample from previous mip }; #pragma pack (push, 1) @@ -61,11 +144,11 @@ struct PerObjectState { }; struct PFXVS_ConstantBuffer { - XMFLOAT4X4 PFXVS_InvProj; + float4 PFXVS_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 }; struct HeightfogConstantBuffer { - XMFLOAT4X4 InvProj; + float4 HF_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 XMFLOAT4X4 InvView; float3 CameraPosition; float HF_FogHeight; @@ -120,7 +203,7 @@ struct DS_PointLightConstantBuffer { float2 PL_ViewportSize; float2 PL_Pad2; - XMFLOAT4X4 PL_InvProj; // Optimize out! + float4 PL_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 XMFLOAT4X4 PL_InvView; float3 PL_LightScreenPos; @@ -130,7 +213,7 @@ struct DS_PointLightConstantBuffer { constexpr int MAX_CSM_CASCADES = 4; struct DS_ScreenQuadConstantBuffer { - XMFLOAT4X4 SQ_InvProj; // Optimize out! + float4 SQ_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 XMFLOAT4X4 SQ_InvView; XMFLOAT4X4 SQ_View; @@ -142,16 +225,14 @@ struct DS_ScreenQuadConstantBuffer { float4 SQ_LightColor; // CSM: Cascade 0 (f�r Kompatibilit�t mit bestehenden Shadern) - XMFLOAT4X4 SQ_ShadowView[MAX_CSM_CASCADES]; - XMFLOAT4X4 SQ_ShadowProj[MAX_CSM_CASCADES]; - - XMFLOAT4X4 SQ_RainView; - XMFLOAT4X4 SQ_RainProj; + XMFLOAT4X4 SQ_ShadowViewProj[MAX_CSM_CASCADES]; float SQ_ShadowStrength; float SQ_ShadowAOStrength; float SQ_WorldAOStrength; float SQ_ShadowSoftness; + uint32_t SQ_FrameIndex; + float3 SQ_Pad; }; struct CloudConstantBuffer { diff --git a/D3D11Engine/D3D11AtlasTypes.h b/D3D11Engine/D3D11AtlasTypes.h new file mode 100644 index 00000000..05423800 --- /dev/null +++ b/D3D11Engine/D3D11AtlasTypes.h @@ -0,0 +1,31 @@ +#pragma once +#include "D3D11TextureAtlasManager.h" +#include "D3D11IndirectBuffer.h" +#include "ConstantBufferStructs.h" + +#include +#include +#include + +// Shared atlas constants +constexpr size_t TEXTURE_ATLAS_MAX = DXGI_FORMAT_V408 + 1; +struct MeshVisualInfo; + +// Tracks one unique submesh in the global geometry buffer +struct StaticSubmeshEntry { + UINT indexCount; + UINT startIndexLocation; // offset into global IB + int baseVertexLocation; // offset into global VB + TextureDescriptor atlasDesc; + MeshVisualInfo* visual; // which visual owns this submesh +}; + +// Groups all submeshes that share one atlas (same DXGI_FORMAT) +struct AtlasDrawGroup { + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + std::vector submeshes; + std::vector indirectArgs; + std::unique_ptr indirectBuffer; + UINT mergedArgsOffset = 0; // byte offset into merged indirect args buffer + UINT mergedArgsCount = 0; // number of args in this group +}; diff --git a/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp b/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp index 0139b414..ee7e9ccf 100644 --- a/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp +++ b/D3D11Engine/D3D11CascadedShadowMapBuffer.cpp @@ -115,13 +115,13 @@ ID3D11ShaderResourceView* D3D11CascadedShadowMapBuffer::GetShaderResourceView() return m_srv.Get(); } -void D3D11CascadedShadowMapBuffer::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11CascadedShadowMapBuffer::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->PSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } } -void D3D11CascadedShadowMapBuffer::BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const { +void D3D11CascadedShadowMapBuffer::BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const { if ( m_srv ) { context->VSSetShaderResources( slot, 1, m_srv.GetAddressOf() ); } diff --git a/D3D11Engine/D3D11CascadedShadowMapBuffer.h b/D3D11Engine/D3D11CascadedShadowMapBuffer.h index 694c205b..260c53e6 100644 --- a/D3D11Engine/D3D11CascadedShadowMapBuffer.h +++ b/D3D11Engine/D3D11CascadedShadowMapBuffer.h @@ -50,14 +50,14 @@ class D3D11CascadedShadowMapBuffer { * @param context Device context * @param slot Shader resource slot */ - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) const; /** * Bind the texture array to a vertex shader slot. * @param context Device context * @param slot Shader resource slot */ - void BindToVertexShader( ID3D11DeviceContext1* context, UINT slot ) const; + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) const; /** Get the size of each cascade (width = height) */ UINT GetSize() const { return m_size; } diff --git a/D3D11Engine/D3D11CommandList.cpp b/D3D11Engine/D3D11CommandList.cpp new file mode 100644 index 00000000..9fd631c0 --- /dev/null +++ b/D3D11Engine/D3D11CommandList.cpp @@ -0,0 +1,11 @@ +#include "pch.h" +#include "D3D11CommandList.h" +#include "D3D11VertexBuffer.h" + +void D3D11CommandList::IASetVertexBuffer( D3D11VertexBuffer* vb, UINT stride, UINT offset ) { + m_Context->IASetVertexBuffers( 0, 1, vb->GetVertexBuffer().GetAddressOf(), &stride, &offset ); +} + +void D3D11CommandList::IASetIndexBuffer( D3D11VertexBuffer* ib, DXGI_FORMAT format, UINT offset ) { + m_Context->IASetIndexBuffer( ib->GetVertexBuffer().Get(), format, offset ); +} diff --git a/D3D11Engine/D3D11CommandList.h b/D3D11Engine/D3D11CommandList.h new file mode 100644 index 00000000..41e4ee24 --- /dev/null +++ b/D3D11Engine/D3D11CommandList.h @@ -0,0 +1,140 @@ +#pragma once +#include "pch.h" +#include "D3D11PipelineStateObject.h" + +class D3D11VertexBuffer; + +/** + * Slim command-list wrapper around an ID3D11DeviceContext1 and the + * D3D11PipelineStateCache. + * + * Provides SetPipelineState() plus the commonly used Draw / IA / OM + * helpers so that call-sites read like a modern graphics API without + * touching the raw context or the engine's global render-state machine. + * + * The object is intentionally cheap to construct (two pointers) and + * does not own any resources. + */ +struct D3D11CommandList { + + D3D11CommandList() = default; + D3D11CommandList( ID3D11DeviceContext* context, D3D11PipelineStateCache* cache ) + : m_Context( context ), m_Cache( cache ) {} + + // --- Pipeline state ------------------------------------------------------ + + void SetPipelineState( const D3D11PipelineStateObject& pso ) { + m_Cache->SetPipelineState( pso ); + } + + /** Force the cache to re-bind everything on next SetPipelineState. */ + void InvalidatePipelineState() { + m_Cache->Invalidate(); + } + + // --- Input assembly ------------------------------------------------------ + + void IASetVertexBuffer( D3D11VertexBuffer* vb, UINT stride, UINT offset = 0 ); + + void IASetVertexBuffers( UINT startSlot, + UINT numBuffers, + ID3D11Buffer* const* buffers, + const UINT* strides, + const UINT* offsets ) { + m_Context->IASetVertexBuffers( startSlot, numBuffers, buffers, strides, offsets ); + } + + void IASetIndexBuffer( ID3D11Buffer* buffer, DXGI_FORMAT format, UINT offset = 0 ) { + m_Context->IASetIndexBuffer( buffer, format, offset ); + } + + void IASetIndexBuffer( D3D11VertexBuffer* ib, DXGI_FORMAT format, UINT offset = 0 ); + + // --- Draw calls ---------------------------------------------------------- + + void Draw( UINT vertexCount, UINT startVertexLocation = 0 ) { + m_Context->Draw( vertexCount, startVertexLocation ); + m_DrawnTriangles += vertexCount / 3; + } + + void DrawIndexed( UINT indexCount, + UINT startIndexLocation = 0, + INT baseVertexLocation = 0 ) { + m_Context->DrawIndexed( indexCount, startIndexLocation, baseVertexLocation ); + m_DrawnTriangles += indexCount / 3; + } + + void DrawInstanced( UINT vertexCountPerInstance, + UINT instanceCount, + UINT startVertexLocation = 0, + UINT startInstanceLocation = 0 ) { + m_Context->DrawInstanced( vertexCountPerInstance, instanceCount, + startVertexLocation, startInstanceLocation ); + m_DrawnTriangles += ( vertexCountPerInstance / 3 ) * instanceCount; + } + + void DrawIndexedInstanced( UINT indexCountPerInstance, + UINT instanceCount, + UINT startIndexLocation = 0, + INT baseVertexLocation = 0, + UINT startInstanceLocation = 0 ) { + m_Context->DrawIndexedInstanced( indexCountPerInstance, instanceCount, + startIndexLocation, baseVertexLocation, + startInstanceLocation ); + m_DrawnTriangles += ( indexCountPerInstance / 3 ) * instanceCount; + } + + void DrawIndexedInstancedIndirect( ID3D11Buffer* argsBuffer, + UINT alignedByteOffsetForArgs ) { + m_Context->DrawIndexedInstancedIndirect( argsBuffer, alignedByteOffsetForArgs ); + // Triangle count unknown for indirect draws + } + + // --- Render target / viewport helpers ------------------------------------ + + void OMSetRenderTargets( UINT numViews, + ID3D11RenderTargetView* const* rtvs, + ID3D11DepthStencilView* dsv ) { + m_Context->OMSetRenderTargets( numViews, rtvs, dsv ); + } + + void RSSetViewports( UINT numViewports, const D3D11_VIEWPORT* viewports ) { + m_Context->RSSetViewports( numViewports, viewports ); + } + + void RSGetViewports( UINT* numViewports, D3D11_VIEWPORT* viewports ) { + m_Context->RSGetViewports( numViewports, viewports ); + } + + void ClearDepthStencilView( ID3D11DepthStencilView* dsv, + UINT clearFlags, + float depth, + UINT8 stencil ) { + m_Context->ClearDepthStencilView( dsv, clearFlags, depth, stencil ); + } + + void ClearRenderTargetView( ID3D11RenderTargetView* rtv, const float color[4] ) { + m_Context->ClearRenderTargetView( rtv, color ); + } + + // --- Stats --------------------------------------------------------------- + + /** Return triangles drawn since last ResetStats() and reset counter. */ + UINT FlushDrawnTriangles() { + UINT t = m_DrawnTriangles; + m_DrawnTriangles = 0; + return t; + } + + UINT GetDrawnTriangles() const { return m_DrawnTriangles; } + + // --- Raw access (escape hatch) ------------------------------------------- + + ID3D11DeviceContext* GetContext() const { return m_Context; } + D3D11PipelineStateCache* GetPSOCache() const { return m_Cache; } + +private: + ID3D11DeviceContext* m_Context = nullptr; + D3D11PipelineStateCache* m_Cache = nullptr; + UINT m_DrawnTriangles = 0; +}; diff --git a/D3D11Engine/D3D11Effect.cpp b/D3D11Engine/D3D11Effect.cpp index 7e401a33..66083e30 100644 --- a/D3D11Engine/D3D11Effect.cpp +++ b/D3D11Engine/D3D11Effect.cpp @@ -219,15 +219,12 @@ XRESULT D3D11Effect::DrawRain() { // Set alphablending state.BlendState.SetAlphaBlending(); - state.BlendState.SetDirty(); // Disable depth-write state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); // Disable culling state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); // Rendering instances only e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP ); @@ -378,15 +375,12 @@ XRESULT D3D11Effect::DrawRain_CS() { // Set alphablending state.BlendState.SetAlphaBlending(); - state.BlendState.SetDirty(); // Disable depth-write state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); // Disable culling state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); // Rendering instances only e->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP ); diff --git a/D3D11Engine/D3D11Engine.vcxproj b/D3D11Engine/D3D11Engine.vcxproj index 64f958e2..beaacfab 100644 --- a/D3D11Engine/D3D11Engine.vcxproj +++ b/D3D11Engine/D3D11Engine.vcxproj @@ -798,6 +798,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -814,6 +815,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -835,13 +837,18 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + + + + + @@ -983,6 +990,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1063,6 +1071,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1084,12 +1093,15 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + + + ../pch.h @@ -1190,6 +1202,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + @@ -1265,12 +1278,21 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" Document + + + Document + + + Document + + + @@ -1280,6 +1302,7 @@ copy "$(OutDir)$(TargetName).pdb" "$(G1_SYSTEM_PATH)\ddraw.pdb" + diff --git a/D3D11Engine/D3D11Engine.vcxproj.filters b/D3D11Engine/D3D11Engine.vcxproj.filters index 0c403c73..4b9f2297 100644 --- a/D3D11Engine/D3D11Engine.vcxproj.filters +++ b/D3D11Engine/D3D11Engine.vcxproj.filters @@ -384,6 +384,12 @@ + + Engine\D3D11 + + + Engine\D3D11 + Engine\D3D11\PFX\Effects @@ -804,6 +810,21 @@ Engine\D3D11\PFX\Effects + + Engine + + + + + + Engine\D3D11 + + + Engine\D3D11 + + + Engine\D3D11 + @@ -834,6 +855,15 @@ Engine\D3D11 + + Engine\D3D11 + + + Engine\D3D11 + + + Engine\D3D11 + Engine\D3D11 @@ -1078,6 +1108,15 @@ Engine\D3D11\PFX\Effects + + Engine + + + Engine\D3D11 + + + Engine\D3D11 + @@ -1111,9 +1150,6 @@ Librarys\Assimp - - Engine\D3D11 - Engine\Shaders @@ -1135,5 +1171,14 @@ Engine\Shaders + + Engine\Shaders + + + Engine\Shaders + + + Engine\Shaders + \ No newline at end of file diff --git a/D3D11Engine/D3D11GraphicsEngine.cpp b/D3D11Engine/D3D11GraphicsEngine.cpp index b5fccb4f..9d9b936a 100644 --- a/D3D11Engine/D3D11GraphicsEngine.cpp +++ b/D3D11Engine/D3D11GraphicsEngine.cpp @@ -1,6 +1,9 @@ #include "D3D11GraphicsEngine.h" #include "D3D11ShadowMap.h" +#include "D3D11VobAtlasPass.h" +#include "D3D11MeshAtlasPass.h" + #include "AlignedAllocator.h" #include "D3D11Effect.h" #include "D3D11GShader.h" @@ -51,6 +54,7 @@ #include "zCOption.h" #include "RenderGraph.h" #include "RGBuilder.h" +#include "D3D11TextureAtlasManager.h" #ifdef BUILD_SPACER #define IS_SPACER_BUILD true @@ -94,6 +98,7 @@ static std::unique_ptr igdextDevice; static std::unique_ptr agsDevice; extern bool userHaveAMDGPU; +bool SupportTextureAtlases = false; namespace { @@ -160,6 +165,9 @@ D3D11GraphicsEngine::D3D11GraphicsEngine() { m_lowlatency = false; m_isWindowActive = false; + m_VobAtlasPass = std::make_unique( this ); + m_MeshAtlasPass = std::make_unique( this ); + // Initialize previous view-proj matrix to identity for motion vectors XMStoreFloat4x4( &m_PrevViewProjMatrix, XMMatrixIdentity() ); @@ -278,6 +286,7 @@ void D3D11GraphicsEngine::CreateAndBindDefaultSampler() { float scaleRatio = static_cast(GetScaledResolution().x) / static_cast(GetBackbufferResolution().x); // Calculate raw bias, but clamp it to a maximum of 0.0f to protect Supersampling float mipBias = std::min(0.0f, std::log2(scaleRatio)); + m_SamplerMipBias = mipBias; D3D11_SAMPLER_DESC samplerDesc{}; samplerDesc.Filter = D3D11_FILTER_ANISOTROPIC; @@ -568,7 +577,9 @@ XRESULT D3D11GraphicsEngine::Init() { DrawMultiIndexedInstancedIndirect = Stub_DrawMultiIndexedInstancedIndirect; } - Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseMDI = DrawMultiIndexedInstancedIndirect != Stub_DrawMultiIndexedInstancedIndirect; + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseMDI = + !FeatureLevel10Compatibility + && DrawMultiIndexedInstancedIndirect != Stub_DrawMultiIndexedInstancedIndirect; if ( !BeginUAVOverlap || !EndUAVOverlap ) { BeginUAVOverlap = Stub_BeginUAVOverlap; @@ -582,6 +593,23 @@ XRESULT D3D11GraphicsEngine::Init() { Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseLayeredRendering = FeatureRTArrayIndexFromAnyShader; } + if (maxFeatureLevel >= D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0) { + // check amount of GPU Memory available + constexpr uint64_t GiB = 1024ull * 1024ull * 1024ull; + if ( adpDesc.DedicatedVideoMemory >= 3 * GiB ) { // on 32 bit processes dx11 can't see more than 3GiB + // currently we just assume everything fits into memory. + // in the future we should make use of Tiled Resources, which would allow us + // to support more memory intensive features, even on less than 4GB cards, by streaming in the necessary tiles. + SupportTextureAtlases = true; + Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows = SupportTextureAtlases; + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh = SupportTextureAtlases; + + // VOB atlas is currently bugged, due to some vobs not getting their correct textures, + // likely due to being "animated" and at world load no animation has happened yet. + Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs = false; + } + } + LogInfo() << "Creating ShaderManager"; ShaderManager = std::make_unique(); ShaderManager->Init(); @@ -771,6 +799,9 @@ XRESULT D3D11GraphicsEngine::Init() { D3D11VertexBuffer::EBindFlags::B_INDEXBUFFER, D3D11VertexBuffer::EUsageFlags::U_IMMUTABLE ); + // Initialize pipeline state cache + m_PipelineStateCache.Init( Device.Get(), Context.Get() ); + // Create shadow map manager ShadowMaps = std::make_unique(); int initialShadowSize = Engine::GAPI->GetRendererState().RendererSettings.ShadowMapSize; @@ -958,6 +989,9 @@ XRESULT D3D11GraphicsEngine::RecreateBuffers() { GetDevice().Get(), roundedTextureResolution.x, roundedTextureResolution.y, DXGI_FORMAT_R32_TYPELESS, nullptr, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_FLOAT ); + // Create / recreate Hi-Z pyramid resources to match new depth buffer size + CreateHiZResources(); + // Create PFX-Renderer if ( !PfxRenderer ) PfxRenderer = std::make_unique(); @@ -973,7 +1007,8 @@ XRESULT D3D11GraphicsEngine::RecreateBuffers() { OnResetBackBuffer(); // actual native-resolution backbuffer for UI and copy operations !! - Backbuffer = std::make_unique( GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_ENGINE_SWAPCHAIN, nullptr, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, 1, 1, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS ); + Backbuffer = std::make_unique( GetDevice().Get(), Resolution.x, Resolution.y, DXGI_FORMAT_ENGINE_SWAPCHAIN, nullptr, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, 1, 1, + D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | (Device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ? D3D11_BIND_UNORDERED_ACCESS : 0) ); SetDebugName( Backbuffer->GetTexture().Get(), "Backbuffer->TEX" ); SetDebugName( Backbuffer->GetShaderResView().Get(), "Backbuffer->SRV" ); @@ -1338,7 +1373,6 @@ XRESULT D3D11GraphicsEngine::OnBeginFrame() { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); rendererState.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - rendererState.RasterizerState.SetDirty(); UpdateRenderStates(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); @@ -1457,6 +1491,7 @@ XRESULT D3D11GraphicsEngine::FetchDisplayModeListDXGI() { currentRefreshRate = devMode.dmDisplayFrequency; } + CachedDisplayModes.reserve( numModes ); for ( UINT i = 0; i < numModes; i++ ) { DXGI_MODE_DESC1& displayMode = displayModes[i]; if ( static_cast(Resolution.x) == displayMode.Width && static_cast(Resolution.y) == displayMode.Height ) { @@ -1507,6 +1542,7 @@ XRESULT D3D11GraphicsEngine::FetchDisplayModeListWindows() { XRESULT D3D11GraphicsEngine::GetDisplayModeList( std::vector* modeList, bool includeSuperSampling ) { + modeList->reserve( CachedDisplayModes.size() ); for ( DisplayModeInfo& mode : CachedDisplayModes ) { modeList->push_back( mode ); } @@ -1890,10 +1926,8 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { // Default states SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); SetActivePixelShader( "PS_PFX_CinemaScope" ); ActivePS->Apply(); @@ -1938,28 +1972,23 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { case zRND_ALPHA_FUNC_BLEND_TEST: case zRND_ALPHA_FUNC_SUB: { Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_ADD: { Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_MUL: { Engine::GAPI->GetRendererState().BlendState.SetModulateBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } case zRND_ALPHA_FUNC_MUL2: { Engine::GAPI->GetRendererState().BlendState.SetModulate2Blending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); break; } } Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); if ( haveTexture ) SetActivePixelShader( "PS_PFX_Alpha_Blend" ); @@ -1987,7 +2016,6 @@ XRESULT D3D11GraphicsEngine::DrawScreenFade( void* c ) { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); UpdateRenderStates(); } return XR_SUCCESS; @@ -2438,8 +2466,8 @@ XRESULT D3D11GraphicsEngine::UnbindTexture( int slot ) { /** Recreates the renderstates */ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { - if ( Engine::GAPI->GetRendererState().BlendState.StateDirty && - Engine::GAPI->GetRendererState().BlendState.Hash != FFBlendStateHash ) { + Engine::GAPI->GetRendererState().BlendState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().BlendState.Hash != FFBlendStateHash ) { D3D11BlendStateInfo* state = static_cast (GothicStateCache::s_BlendStateMap[Engine::GAPI->GetRendererState().BlendState]); @@ -2454,13 +2482,12 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFBlendState = state->State.Get(); FFBlendStateHash = Engine::GAPI->GetRendererState().BlendState.Hash; - Engine::GAPI->GetRendererState().BlendState.StateDirty = false; GetContext()->OMSetBlendState( FFBlendState.Get(), float4( 0, 0, 0, 0 ).toPtr(), 0xFFFFFFFF ); } - if ( Engine::GAPI->GetRendererState().RasterizerState.StateDirty && - Engine::GAPI->GetRendererState().RasterizerState.Hash != + Engine::GAPI->GetRendererState().RasterizerState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().RasterizerState.Hash != FFRasterizerStateHash ) { D3D11RasterizerStateInfo* state = static_cast (GothicStateCache::s_RasterizerStateMap[Engine::GAPI->GetRendererState().RasterizerState]); @@ -2476,12 +2503,11 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFRasterizerState = state->State.Get(); FFRasterizerStateHash = Engine::GAPI->GetRendererState().RasterizerState.Hash; - Engine::GAPI->GetRendererState().RasterizerState.StateDirty = false; GetContext()->RSSetState( FFRasterizerState.Get() ); } - if ( Engine::GAPI->GetRendererState().DepthState.StateDirty && - Engine::GAPI->GetRendererState().DepthState.Hash != + Engine::GAPI->GetRendererState().DepthState.ComputeHash(); + if ( Engine::GAPI->GetRendererState().DepthState.Hash != FFDepthStencilStateHash ) { D3D11DepthBufferState* state = static_cast (GothicStateCache::s_DepthBufferMap[Engine::GAPI->GetRendererState().DepthState]); @@ -2497,7 +2523,6 @@ XRESULT D3D11GraphicsEngine::UpdateRenderStates() { FFDepthStencilState = state->State.Get(); FFDepthStencilStateHash = Engine::GAPI->GetRendererState().DepthState.Hash; - Engine::GAPI->GetRendererState().DepthState.StateDirty = false; GetContext()->OMSetDepthStencilState( FFDepthStencilState.Get(), 0 ); } @@ -2596,16 +2621,15 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { GetContext()->CSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); // Update view distances - InfiniteRangeConstantBuffer->UpdateBuffer( float4( FLT_MAX, 0, 0, 0 ).toPtr() ); + InfiniteRangeConstantBuffer->UpdateBuffer( float4( FLT_MAX, m_SamplerMipBias, 0, 0 ).toPtr() ); OutdoorSmallVobsConstantBuffer->UpdateBuffer( float4( rendererState.RendererSettings.OutdoorSmallVobDrawRadius, - 0, 0, 0 ).toPtr() ); + m_SamplerMipBias, 0, 0 ).toPtr() ); OutdoorVobsConstantBuffer->UpdateBuffer( float4( rendererState.RendererSettings.OutdoorVobDrawRadius, - 0, 0, 0 ).toPtr() ); + m_SamplerMipBias, 0, 0 ).toPtr() ); rendererState.RasterizerState.FrontCounterClockwise = false; - rendererState.RasterizerState.SetDirty(); RGResourceHandle colorResource; graph.AddPass( L"Initialize Buffers", [&]( RGBuilder& builder, RenderPass& pass ) { @@ -2628,7 +2652,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { GetContext()->ClearRenderTargetView( graph.GetPhysicalTexture( colorResource )->GetRenderTargetView().Get(), reinterpret_cast(&fogColor) ); }; }); - + if ( rendererState.RendererSettings.DrawSky ) { graph.AddPass( L"Draw Sky", [&]( RGBuilder& builder, RenderPass& pass ) { //// Setup / Declare @@ -2636,13 +2660,13 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { //albedoTarget = builder.CreateTexture( albedoDesc ); builder.Write( colorResource ); - pass.m_executeCallback = [this, colorResource](const RenderGraph& graph)->void { + pass.m_executeCallback = [this, colorResource]( const RenderGraph& graph )->void { // Draw back of the sky if outdoor GetContext()->OMSetRenderTargets( 1, graph.GetPhysicalTexture( colorResource )->GetRenderTargetView().GetAddressOf(), nullptr ); - + DrawSky(); }; - }); + } ); } RGResourceHandle normalsResource; @@ -2651,7 +2675,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { graph.AddPass( L"G-Buffer Pass", [&]( RGBuilder& builder, RenderPass& pass ) { // Setup / Declare auto size = GetResolution(); - normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R8G8B8A8_SNORM, L"GBufferNormals" }); + normalsResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R16G16_SNORM, L"GBufferNormals" }); specularResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R16G16_FLOAT, L"GBufferSpecular" }); reactiveMaskResource = builder.CreateTexture({ static_cast(size.x), static_cast(size.y), DXGI_FORMAT_R8_UNORM, L"ReactiveMask" }); @@ -2723,10 +2747,11 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { builder.Read( specularResource ); builder.Write( backBufferHandle ); - pass.m_executeCallback = [this, colorResource, normalsResource, specularResource](const RenderGraph& graph)-> void { + pass.m_executeCallback = [this, colorResource, normalsResource, specularResource, backBufferHandle](const RenderGraph& graph)-> void { auto colorTexture = graph.GetPhysicalTexture(colorResource); auto normalsTexture = graph.GetPhysicalTexture(normalsResource); auto specularTexture = graph.GetPhysicalTexture(specularResource); + auto backbuffer = graph.GetPhysicalTexture( backBufferHandle ); if ( Engine::GAPI->GetRendererState().RendererSettings.EnableShadows ) { // Cascades only get rendered if this is enabled. @@ -2737,13 +2762,15 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { *colorTexture, *normalsTexture, *specularTexture, - *GetDepthBufferCopy()); + *GetDepthBufferCopy(), + backbuffer->GetRenderTargetView().Get(), + GetDepthBuffer()->GetDepthStencilView().Get() ); if ( !Engine::GAPI->GetRendererState().RendererSettings.FixViewFrustum ) { m_FrameLights.clear(); } }; - }); - + }); + graph.AddPass( L"Draw Frame AlphaMeshes", [&]( RGBuilder& builder, RenderPass& pass ) { // Setup / Declare builder.Write( backBufferHandle ); @@ -2902,19 +2929,19 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetBspTreeMode() == zBSP_MODE_OUTDOOR) { graph.AddPass( L"Draw Godrays", [&]( RGBuilder& builder, RenderPass& pass ) { - builder.Read( normalsResource ); builder.Read( backBufferHandle ); builder.Write( backBufferHandle ); - pass.m_executeCallback = [this, backBufferHandle, normalsResource](const RenderGraph& graph) { + pass.m_executeCallback = [this, backBufferHandle](const RenderGraph& graph) { // Unbind temporary backbuffer copy Microsoft::WRL::ComPtr srv; GetContext()->PSSetShaderResources( 5, 1, srv.GetAddressOf() ); auto backbufferResource = graph.GetPhysicalTexture(backBufferHandle); - auto normalsTexture = graph.GetPhysicalTexture(normalsResource); - PfxRenderer->RenderGodRays(backbufferResource->GetShaderResView().Get(), normalsTexture->GetShaderResView().Get()); + PfxRenderer->RenderGodRays(backbufferResource->GetShaderResView().Get(), GetDepthBufferCopy()->GetShaderResView().Get()); + // Godrays bind a different sampler + GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); }; }); } @@ -3060,6 +3087,7 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { GetContext()->ClearDepthStencilView( m_NativeSizeDepthStencil->GetDepthStencilView().Get(), D3D11_CLEAR_DEPTH, 0, 0 ); SetDefaultStates(); + UpdateRenderStates(); }; } ); } @@ -3210,7 +3238,6 @@ XRESULT D3D11GraphicsEngine::OnStartWorldRendering() { // Disable culling for ui rendering(Sprite from LeGo needs it since it use CCW instead of CW order) SetDefaultStates(); rendererState.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - rendererState.RasterizerState.SetDirty(); UpdateRenderStates(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); @@ -3339,7 +3366,6 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( // Setup renderstates Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -3405,10 +3431,8 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( if ( alphaFunc == zMAT_ALPHA_FUNC_ADD ) Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); lastAlphaFunc = alphaFunc; @@ -3430,9 +3454,7 @@ XRESULT D3D11GraphicsEngine::DrawMeshInfoListAlphablended( } Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = true; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); @@ -3453,13 +3475,6 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { if ( !Engine::GAPI->GetRendererState().RendererSettings.DrawWorldMesh ) return XR_SUCCESS; - struct MDI_DrawArgs - { - unsigned int DrawCount; - unsigned int AlignedByteOffsetForArgs; - MaterialInfo* MeshMaterialInfo; - }; - // Setup default renderstates SetDefaultStates(); @@ -3467,6 +3482,18 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); + // Draw atlas path first (handles opaque + alpha-test submeshes that were atlased) + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + m_MeshAtlasPass->Draw(); + } + + struct MDI_DrawArgs + { + unsigned int DrawCount; + unsigned int AlignedByteOffsetForArgs; + MaterialInfo* MeshMaterialInfo; + }; + SetActivePixelShader( "PS_Diffuse" ); SetActiveVertexShader( "VS_Ex" ); @@ -3505,11 +3532,16 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh_Indirect( bool noTextures ) { static std::vector drawIndirectArgs; drawIndirectArgs.clear(); static std::vector> meshList; meshList.clear(); static std::vector> meshListAlpha; meshListAlpha.clear(); + if ( meshList.capacity() == 0 ) { meshList.reserve( 4096 ); meshListAlpha.reserve( 512 ); drawIndirectArgs.reserve( 4096 ); } auto CompareMesh = []( std::tuple& a, std::tuple& b ) -> bool { return std::get<0>( a ) < std::get<0>( b ); }; for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { + // Skip submeshes already drawn by the atlas path + if ( m_MeshAtlasPass->IsSubmeshAtlased( worldMesh.second ) ) + continue; + zCTexture* aniTex = worldMesh.first.Material->GetTexture(); if ( !aniTex ) continue; @@ -3715,6 +3747,10 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { Engine::GAPI->SetViewTransformXM( view ); Engine::GAPI->ResetWorldTransform(); + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + m_MeshAtlasPass->Draw(); + } + SetActivePixelShader( "PS_Diffuse" ); SetActiveVertexShader( "VS_Ex" ); @@ -3745,6 +3781,7 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { DrawVertexBufferIndexedUINT( meshInfo->MeshVertexBuffer, meshInfo->MeshIndexBuffer, 0, 0 ); static std::vector> meshList; + if ( meshList.capacity() == 0 ) meshList.reserve( 4096 ); auto CompareMesh = []( std::pair& a, std::pair& b ) -> bool { return a.first.Texture < b.first.Texture; }; GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); @@ -3753,6 +3790,10 @@ XRESULT D3D11GraphicsEngine::DrawWorldMesh( bool noTextures ) { for ( auto const& renderItem : renderList ) { for ( auto const& worldMesh : renderItem->WorldMeshes ) { + // Skip submeshes already drawn by the atlas path + if ( m_MeshAtlasPass->IsSubmeshAtlased( worldMesh.second ) ) + continue; + if ( worldMesh.first.Material ) { zCTexture* aniTex = worldMesh.first.Material->GetTexture(); if ( !aniTex ) continue; @@ -3903,7 +3944,6 @@ void D3D11GraphicsEngine::DrawWaterSurfaces() { // Setup render states for z-prepass Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); // Bind vertex water shader @@ -3935,10 +3975,8 @@ void D3D11GraphicsEngine::DrawWaterSurfaces() { // Disable depth writes after z-prepass Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); // Bind pixel water shader @@ -4001,11 +4039,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAround( cullFront ? GothicRasterizerStateInfo::CM_CULL_FRONT : GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); bool linearDepth = (Engine::GAPI->GetRendererState().GraphicsState.FF_GSwitches & @@ -4309,11 +4345,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAround_Layered( cullFront ? GothicRasterizerStateInfo::CM_CULL_FRONT : GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); bool linearDepth = (Engine::GAPI->GetRendererState().GraphicsState.FF_GSwitches & @@ -4610,10 +4644,6 @@ void D3D11GraphicsEngine::ShadowPass_DrawWorldMesh_Indirect(const std::vectorGetRendererState().GraphicsState.FF_AlphaRef; bool linearDepth = (Engine::GAPI->GetRendererState().GraphicsState.FF_GSwitches & GSWITCH_LINEAR_DEPTH) != 0; - - auto drawMultiIndexedInstancedIndirect = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.UseMDI - ? DrawMultiIndexedInstancedIndirect - : Stub_DrawMultiIndexedInstancedIndirect; if ( Engine::GAPI->GetRendererState().RendererSettings.FastShadows ) { @@ -4635,6 +4665,7 @@ void D3D11GraphicsEngine::ShadowPass_DrawWorldMesh_Indirect(const std::vector> alphaMeshes; opaqueDrawArgs.clear(); alphaMeshes.clear(); + if ( opaqueDrawArgs.capacity() == 0 ) { opaqueDrawArgs.reserve( 4096 ); alphaMeshes.reserve( 512 ); } for ( const WorldMeshSectionInfo* section : visibleSections ) { for ( const auto& meshPair : section->WorldMeshes ) { @@ -4687,7 +4718,7 @@ void D3D11GraphicsEngine::ShadowPass_DrawWorldMesh_Indirect(const std::vector(opaqueDrawArgs.size()), WorldMeshIndirectBuffer->GetIndirectBuffer().Get(), 0, @@ -4818,11 +4849,9 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p GothicRasterizerStateInfo::CM_CULL_NONE; Engine::GAPI->GetRendererState().RasterizerState.DepthClipEnable = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::ECompareFunc::CF_COMPARISON_LESS_EQUAL; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); @@ -4922,181 +4951,340 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p } if ( Engine::GAPI->GetRendererState().RendererSettings.DrawVOBs ) { - static std::vector potentialCasters; - std::vector& vobs = potentialCasters; - if (params.CascadeIndex != -1) { - auto renderQueue = ShadowMaps->GetRenderQueue( params.CascadeIndex ); - renderQueue->ProcessQueue(); - - vobs = renderQueue->GetVobs(); - } else { - static std::vector _1; - static std::vector _2; - potentialCasters.reserve(1024); - potentialCasters.clear(); - - LegacyRenderQueueProxy q(potentialCasters, _1, _2); - RndCullContext ctx; - ctx.queue = &q; - ctx.cameraPosition = Engine::GAPI->GetCameraPosition(); - ctx.stage = RenderStage::STAGE_DRAW_WORLD; - ctx.frustum = currentFrustum; - ctx.drawDistances.OutdoorVobs = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; - ctx.drawDistances.OutdoorVobsSmall = Engine::GAPI->GetRendererState().RendererSettings.OutdoorSmallVobDrawRadius; - ctx.drawDistances.IndoorVobs = Engine::GAPI->GetRendererState().RendererSettings.IndoorVobDrawRadius; - ctx.drawDistances.VisualFX = Engine::GAPI->GetRendererState().RendererSettings.VisualFXDrawRadius; - Engine::GAPI->CollectVisibleVobs( ctx ); - } - - // clear any residue of main render pass - for ( auto const& staticMeshVisual : Engine::GAPI->GetStaticMeshVisuals() ) { - staticMeshVisual.second->StartNewFrame(); - } - for ( auto& it : vobs) { - // process any vobs only visible in this cascade - VobInstanceInfo vii = {}; - vii.world = it->WorldMatrix; - vii.prevWorld = it->HasValidPrevMatrix ? it->PrevWorldMatrix : it->WorldMatrix; - vii.color = it->GroundColor; - vii.windStrenth = 0.0f; - vii.canBeAffectedByPlayer = 0; - - zTAnimationMode aniMode = it->Vob->GetVisualAniMode(); - if ( aniMode != zVISUAL_ANIMODE_NONE ) { - vii.canBeAffectedByPlayer = (!it->Vob->GetDynColl() ? 1.0f : 0.0f); - GothicAPI::ProcessVobAnimation( it->Vob, aniMode, vii ); - } + bool drawStaticVobs = true; + if ( Engine::GAPI->GetRendererState().RendererSettings.UseIndirectVobShadows && m_VobAtlasPass->IsReady() ) { + // GPU indirect path: reuse the VOB atlas pass with the cascade/shadow frustum. + // BC1 groups render depth-only (no PS); BC2 groups use the alpha-test PS. + m_VobAtlasPass->Draw( currentFrustum, /*bindPS=*/false ); + drawStaticVobs = false; + } + + static std::vector dynamicVobCasters; + static std::vector _1; + static std::vector _2; + dynamicVobCasters.reserve( 1024 ); + dynamicVobCasters.clear(); + + LegacyRenderQueueProxy q( dynamicVobCasters, _1, _2 ); + RndCullContext ctx; + ctx.queue = &q; + ctx.cameraPosition = Engine::GAPI->GetCameraPosition(); + ctx.stage = RenderStage::STAGE_DRAW_SHADOWS; + ctx.frustum = currentFrustum; + ctx.drawDistances.OutdoorVobs = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + ctx.drawDistances.OutdoorVobsSmall = Engine::GAPI->GetRendererState().RendererSettings.OutdoorSmallVobDrawRadius; + ctx.drawDistances.IndoorVobs = 0; + ctx.drawDistances.VisualFX = 0; + Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(EBspTreeCollectFlags::COLLECT_DYNAMIC_VOBS) ); + + struct BatchableStaticVobs { + MeshVisualInfo* VisualInfo; + std::vector Instances; + uint32_t StartInstanceNum; + }; - reinterpret_cast(it->VisualInfo)->Instances.push_back( vii ); - } + if (drawStaticVobs) { + // clear any residue of main render pass + const auto& vobs = m_StaticVobs; + std::vector outRenderQueue{}; + VobCulling::CullAndGatherStaticVOBs( m_StaticVobsAABBs, vobs, currentFrustum.GetPlanes()._Elems, outRenderQueue ); + + std::sort( outRenderQueue.begin(), outRenderQueue.end(), + []( const StaticVobRenderItem& a, const StaticVobRenderItem& b ) { + return a.mvi->Visual < a.mvi->Visual; + } ); + + // Group vobs by visual and prepare instance data + std::vector batchables; + batchables.reserve( outRenderQueue.size() ); + + zCVisual* lastVisual = nullptr; + for ( auto& itm : outRenderQueue ) { + auto v = vobs[itm.instanceIndex]; + + if ( v->VisualInfo->Visual != lastVisual ) { + // New visual, reset instance data + lastVisual = v->VisualInfo->Visual; + batchables.push_back( { reinterpret_cast(v->VisualInfo), std::vector() } ); + batchables.back().Instances.reserve( 10 ); + } + BatchableStaticVobs& batch = batchables.back(); + + MeshVisualInfo* visualInfo = batch.VisualInfo; + VobInstanceInfo vii = {}; + vii.world = v->WorldMatrix; + vii.prevWorld = v->HasValidPrevMatrix ? v->PrevWorldMatrix : v->WorldMatrix; + vii.color = v->GroundColor; + vii.windStrenth = 0.0f; + vii.canBeAffectedByPlayer = 0; + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + vii.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + GothicAPI::ProcessVobAnimation( v->Vob, aniMode, vii ); + } + batch.Instances.push_back( vii ); + } - auto _ = START_TIMING( timer_labels_vobs[timerLabelIndex] ); - auto _1 = RecordGraphicsEvent( L"DrawVOBs" ); + auto _ = START_TIMING( timer_labels_vobs[timerLabelIndex] ); + auto _1 = RecordGraphicsEvent( L"DrawVOBs" ); - size_t ByteWidth = DynamicInstancingBuffer->GetSizeInBytes(); + size_t ByteWidth = DynamicInstancingBuffer->GetSizeInBytes(); - if ( ByteWidth < sizeof( VobInstanceInfo ) * vobs.size() ) { - if ( Engine::GAPI->GetRendererState().RendererSettings.EnableDebugLog ) - LogInfo() << "Instancing buffer too small (" << ByteWidth - << "), need " << sizeof( VobInstanceInfo ) * vobs.size() - << " bytes. Recreating buffer."; + if ( ByteWidth < sizeof( VobInstanceInfo ) * vobs.size() ) { + if ( Engine::GAPI->GetRendererState().RendererSettings.EnableDebugLog ) + LogInfo() << "Instancing buffer too small (" << ByteWidth + << "), need " << sizeof( VobInstanceInfo ) * vobs.size() + << " bytes. Recreating buffer."; - // Buffer too small, recreate it - DynamicInstancingBuffer->Init( - nullptr, sizeof( VobInstanceInfo ) * vobs.size(), - D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, - D3D11VertexBuffer::CA_WRITE ); + // Buffer too small, recreate it + DynamicInstancingBuffer->Init( + nullptr, sizeof( VobInstanceInfo ) * vobs.size(), + D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, + D3D11VertexBuffer::CA_WRITE ); - SetDebugName( DynamicInstancingBuffer->GetShaderResourceView().Get(), "DynamicInstancingBuffer->ShaderResourceView" ); - SetDebugName( DynamicInstancingBuffer->GetVertexBuffer().Get(), "DynamicInstancingBuffer->VertexBuffer" ); - } + SetDebugName( DynamicInstancingBuffer->GetShaderResourceView().Get(), "DynamicInstancingBuffer->ShaderResourceView" ); + SetDebugName( DynamicInstancingBuffer->GetVertexBuffer().Get(), "DynamicInstancingBuffer->VertexBuffer" ); + } - std::vector activeVisuals; - activeVisuals.reserve(256); // Reserve enough memory to avoid allocations - for ( auto const& pair : Engine::GAPI->GetStaticMeshVisuals() ) { - if ( !pair.second->Instances.empty() ) { - activeVisuals.push_back(pair.second); + byte* data; + UINT size; + UINT loc = 0; + if ( !SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, + reinterpret_cast(&data), &size ) ) ) { + LogError() << "Failed to map dynamic instancing buffer for writing!"; + return; } - } + for ( auto& staticMeshVisual : batchables ) { + staticMeshVisual.StartInstanceNum = loc; + memcpy( data + loc * sizeof( VobInstanceInfo ), staticMeshVisual.Instances.data(), + sizeof( VobInstanceInfo ) * staticMeshVisual.Instances.size() ); + loc += staticMeshVisual.Instances.size(); + } + DynamicInstancingBuffer->Unmap(); - byte* data; - UINT size; - UINT loc = 0; - DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, - reinterpret_cast(&data), &size ); - for ( auto const& staticMeshVisual : activeVisuals ) { - staticMeshVisual->StartInstanceNum = loc; - memcpy( data + loc * sizeof( VobInstanceInfo ), staticMeshVisual->Instances.data(), - sizeof( VobInstanceInfo ) * staticMeshVisual->Instances.size() ); - loc += staticMeshVisual->Instances.size(); - } - DynamicInstancingBuffer->Unmap(); + // Apply instancing shader + SetActiveVertexShader( "VS_ExInstancedObj" ); + // SetActivePixelShader("PS_DiffuseAlphaTest"); + ActiveVS->Apply(); - // Apply instancing shader - SetActiveVertexShader( "VS_ExInstancedObj" ); - // SetActivePixelShader("PS_DiffuseAlphaTest"); - ActiveVS->Apply(); + if ( !linearDepth ) // Only unbind when not rendering linear depth + { + // Unbind PS + Context->PSSetShader( nullptr, nullptr, 0 ); + } - if ( !linearDepth ) // Only unbind when not rendering linear depth - { - // Unbind PS - Context->PSSetShader( nullptr, nullptr, 0 ); - } + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + } - if ( ActiveVS ) { - ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); - } + XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); + g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); - XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); - g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); + // Draw all vobs the player currently sees + for ( auto const& b : batchables ) { + if ( b.Instances.empty() ) continue; + auto staticMeshVisual = b.VisualInfo; - // Draw all vobs the player currently sees - for ( auto const& staticMeshVisual : activeVisuals ) { - if ( staticMeshVisual->Instances.empty() ) continue; - - g_windBuffer.minHeight = staticMeshVisual->BBox.Min.y; - g_windBuffer.maxHeight = staticMeshVisual->BBox.Max.y; + g_windBuffer.minHeight = staticMeshVisual->BBox.Min.y; + g_windBuffer.maxHeight = staticMeshVisual->BBox.Max.y; - if ( ActiveVS ) { - ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &g_windBuffer ); - } + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &g_windBuffer ); + } - bool doReset = true; - zCTexture* previousTx = nullptr; - for ( auto const& itt : staticMeshVisual->MeshesByTexture ) { - std::vector& mlist = staticMeshVisual->MeshesByTexture[itt.first]; - if ( mlist.empty() ) continue; + zCTexture* previousTx = nullptr; + for ( auto const& itt : staticMeshVisual->MeshesByTexture ) { + std::vector& mlist = staticMeshVisual->MeshesByTexture[itt.first]; + if ( mlist.empty() ) continue; - zCTexture* tx = itt.first.Texture; - bool bindTexture = previousTx != tx - && tx - && (tx->HasAlphaChannel() || colorWritesEnabled); - - // Check for alphablend - bool blendAdd = - itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; - bool blendBlend = - itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; - // if one part of the mesh uses blending, all do, which means that - // the mesh likely is transparent and can't cast shadows - if ( !doReset || blendAdd || blendBlend ) { - doReset = false; - continue; - } + zCTexture* tx = itt.first.Texture; + bool bindTexture = previousTx != tx + && tx + && (tx->HasAlphaChannel() || colorWritesEnabled); + + // Check for alphablend + bool blendAdd = + itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; + bool blendBlend = + itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; + // if one part of the mesh uses blending, all do, which means that + // the mesh likely is transparent and can't cast shadows + if ( blendAdd || blendBlend ) { + continue; + } - for ( unsigned int i = 0; i < mlist.size(); i++ ) { - // Bind texture - if ( bindTexture ) { - if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { - tx->Bind( 0 ); - ActivePS->Apply(); - previousTx = tx; - } else - continue; - } else { - if ( !linearDepth ) // Only unbind when not rendering linear depth - { - // Unbind PS - Context->PSSetShader( nullptr, nullptr, 0 ); + for ( unsigned int i = 0; i < mlist.size(); i++ ) { + // Bind texture + if ( bindTexture ) { + if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { + tx->Bind( 0 ); + ActivePS->Apply(); + previousTx = tx; + } else + continue; + } else { + if ( !linearDepth ) // Only unbind when not rendering linear depth + { + // Unbind PS + Context->PSSetShader( nullptr, nullptr, 0 ); + } } + + MeshInfo* mi = mlist[i]; + + // Draw batch + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + mi->Indices.size(), DynamicInstancingBuffer.get(), + sizeof( VobInstanceInfo ), b.Instances.size(), + sizeof( ExVertexStruct ), b.StartInstanceNum ); + + Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs += + b.Instances.size(); } + } + } + } // end else (CPU indirect path) - MeshInfo* mi = mlist[i]; + // Draw dynamic vobs (spawned at runtime, not part of m_StaticVobs or atlas) + if ( !dynamicVobCasters.empty() ) { + // Group by visual for instanced drawing - // Draw batch - DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, - mi->Indices.size(), DynamicInstancingBuffer.get(), - sizeof( VobInstanceInfo ), staticMeshVisual->Instances.size(), - sizeof( ExVertexStruct ), staticMeshVisual->StartInstanceNum ); + std::vector dynBatches; + std::unordered_map batchIndex; + batchIndex.reserve( dynamicVobCasters.size() ); // usually single, but can be multiple - Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs += - staticMeshVisual->Instances.size(); + for ( auto* v : dynamicVobCasters ) { + if ( !v->VisualInfo ) continue; + MeshVisualInfo* vi = reinterpret_cast( v->VisualInfo ); + + auto [it, inserted] = batchIndex.emplace( vi, dynBatches.size() ); + if ( inserted ) { + dynBatches.push_back( { vi, {}, 0 } ); } + + VobInstanceInfo vii = {}; + vii.world = v->WorldMatrix; + vii.prevWorld = v->HasValidPrevMatrix ? v->PrevWorldMatrix : v->WorldMatrix; + vii.color = v->GroundColor; + vii.windStrenth = 0.0f; + vii.canBeAffectedByPlayer = 0; + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + vii.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + GothicAPI::ProcessVobAnimation( v->Vob, aniMode, vii ); + } + dynBatches[it->second].Instances.push_back( vii ); } - // Reset visual - if ( doReset ) staticMeshVisual->StartNewFrame(); + if ( !dynBatches.empty() ) { + // Ensure instancing buffer is large enough + size_t needed = dynamicVobCasters.size() * sizeof( VobInstanceInfo ); + if ( DynamicInstancingBuffer->GetSizeInBytes() < needed ) { + DynamicInstancingBuffer->Init( + nullptr, needed, + D3D11VertexBuffer::B_VERTEXBUFFER, D3D11VertexBuffer::U_DYNAMIC, + D3D11VertexBuffer::CA_WRITE ); + } + + byte* dynData; + UINT dynSize; + UINT dynLoc = 0; + if ( SUCCEEDED( DynamicInstancingBuffer->Map( D3D11VertexBuffer::M_WRITE_DISCARD, + reinterpret_cast(&dynData), &dynSize ) ) ) { + for ( auto& batch : dynBatches ) { + batch.StartInstanceNum = dynLoc; + memcpy( dynData + dynLoc * sizeof( VobInstanceInfo ), batch.Instances.data(), + sizeof( VobInstanceInfo ) * batch.Instances.size() ); + dynLoc += batch.Instances.size(); + } + DynamicInstancingBuffer->Unmap(); + } + + // Set up instanced vertex shader (GPU indirect path may have changed shader state) + SetActiveVertexShader( "VS_ExInstancedObj" ); + ActiveVS->Apply(); + + if ( linearDepth ) { + SetActivePixelShader( "PS_LinDepth" ); + } else { + SetActivePixelShader( "PS_DiffuseAlphaTest" ); + Context->PSSetShader( nullptr, nullptr, 0 ); + } + + // Rebind PS constant buffers (GPU indirect path may have overwritten them) + ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + + GSky* dynSky = Engine::GAPI->GetSky(); + ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &dynSky->GetAtmosphereCB() ); + ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + + InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); + + SetupVS_ExConstantBuffer(); + + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + } + + XMFLOAT3 vPlayerPosition = Engine::GAPI->GetPlayerVob() ? Engine::GAPI->GetPlayerVob()->GetPositionWorld() : XMFLOAT3( 0, 0, 0 ); + g_windBuffer.playerPos = float3( vPlayerPosition.x, vPlayerPosition.y, vPlayerPosition.z ); + + for ( auto const& batch : dynBatches ) { + if ( batch.Instances.empty() ) continue; + + g_windBuffer.minHeight = batch.VisualInfo->BBox.Min.y; + g_windBuffer.maxHeight = batch.VisualInfo->BBox.Max.y; + + if ( ActiveVS ) { + ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &g_windBuffer ); + } + + zCTexture* previousTx = nullptr; + for ( auto const& itt : batch.VisualInfo->MeshesByTexture ) { + const std::vector& mlist = itt.second; + if ( mlist.empty() ) continue; + + zCTexture* tx = itt.first.Texture; + bool bindTexture = previousTx != tx + && tx + && (tx->HasAlphaChannel() || colorWritesEnabled); + + bool blendAdd = itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_ADD; + bool blendBlend = itt.first.Material->GetAlphaFunc() == zMAT_ALPHA_FUNC_BLEND; + if ( blendAdd || blendBlend ) { + continue; // shadow pass, transparent materials shouldn't cast shadows + } + + for ( unsigned int i = 0; i < mlist.size(); i++ ) { + if ( bindTexture ) { + if ( alphaRef > 0.0f && tx->CacheIn( 0.6f ) == zRES_CACHED_IN ) { + tx->Bind( 0 ); + ActivePS->Apply(); + previousTx = tx; + } else + continue; + } else { + if ( !linearDepth ) { + Context->PSSetShader( nullptr, nullptr, 0 ); + } + } + + MeshInfo* mi = mlist[i]; + + DrawInstanced( mi->MeshVertexBuffer, mi->MeshIndexBuffer, + mi->Indices.size(), DynamicInstancingBuffer.get(), + sizeof( VobInstanceInfo ), batch.Instances.size(), + sizeof( ExVertexStruct ), batch.StartInstanceNum ); + + Engine::GAPI->GetRendererState().RendererInfo.FrameDrawnVobs += + batch.Instances.size(); + } + } + } + } } } @@ -5147,7 +5335,6 @@ void XM_CALLCONV D3D11GraphicsEngine::DrawWorldAroundForWorldShadow( FXMVECTOR p } Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } /** Update morph mesh visual */ @@ -5236,7 +5423,20 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { { auto _ = START_TIMING( "VOBs" ); - SetDefaultStates(); + + bool needsDrawVobs = true; + if ( m_VobAtlasPass->IsReady() ) { + Frustum cameraFrustum = Frustum::AlwaysContainingFrustum(); + if ( auto cam = zCCamera::GetCamera() ) { + cam->Activate(); + cameraFrustum.BuildPerspective( + XMMatrixTranspose( XMLoadFloat4x4( &cam->trafoView ) ), + XMLoadFloat4x4( &cam->trafoProjection ) ); + } + m_VobAtlasPass->Draw( cameraFrustum ); + needsDrawVobs = false; + } + SetActivePixelShader( "PS_Diffuse" ); SetActiveVertexShader( "VS_ExInstancedObj" ); @@ -5278,7 +5478,12 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { if ( !renderSettings.FixViewFrustum || (renderSettings.FixViewFrustum && vobs.empty()) ) { - Engine::GAPI->CollectVisibleVobs( vobs, m_FrameLights, mobs ); + + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_MUTATE; + if (!needsDrawVobs) { + collectFlags = (EBspTreeCollectFlags)(collectFlags & ~EBspTreeCollectFlags::COLLECT_VOBS); + } + Engine::GAPI->CollectVisibleVobs( vobs, m_FrameLights, mobs, CullAll, collectFlags ); } } @@ -5286,15 +5491,15 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { UpdateMorphMeshVisual(); } - if ( renderSettings.DrawVOBs ) { + if ( renderSettings.DrawVOBs && vobs.size() > 0 ) { auto _1 = Engine::GraphicsEngine->RecordGraphicsEvent( L"DrawVOBsInstanced->DrawVOBs" ); - + std::vector activeVisuals; activeVisuals.reserve( 256 ); // Reserve enough memory to avoid allocations for ( auto const& pair : Engine::GAPI->GetStaticMeshVisuals() ) { if ( !pair.second->Instances.empty() ) { activeVisuals.push_back( pair.second ); - } + } } // Create instancebuffer for this frame @@ -5357,7 +5562,7 @@ XRESULT D3D11GraphicsEngine::DrawVOBsInstanced() { } else { // Only update if it changed if ( std::abs( cachedVobRadius - expectedVobRadius ) > 0.1f ) { - OutdoorVobsConstantBuffer->UpdateBuffer( float4( expectedVobRadius, 0, 0, 0 ).toPtr() ); + OutdoorVobsConstantBuffer->UpdateBuffer( float4( expectedVobRadius, m_SamplerMipBias, 0, 0 ).toPtr() ); OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); cachedVobRadius = expectedVobRadius; } @@ -5608,10 +5813,8 @@ XRESULT D3D11GraphicsEngine::DrawFrameAlphaMeshes() else if ( blendBlend ) Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); } @@ -5671,7 +5874,6 @@ XRESULT D3D11GraphicsEngine::DrawPolyStrips( bool noTextures ) { // Setup renderstates Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -5736,10 +5938,7 @@ XRESULT D3D11GraphicsEngine::DrawPolyStrips( bool noTextures ) { else if ( blendBlend ) Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); } @@ -5770,10 +5969,6 @@ void D3D11GraphicsEngine::SetDefaultStates( bool force ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - if ( force ) { FFRasterizerStateHash = 0; FFBlendStateHash = 0; @@ -5782,6 +5977,12 @@ void D3D11GraphicsEngine::SetDefaultStates( bool force ) { } } +void D3D11GraphicsEngine::InvalidateStateCache() { + FFRasterizerStateHash = 0; + FFBlendStateHash = 0; + FFDepthStencilStateHash = 0; +} + /** Draws the sky using the GSky-Object */ XRESULT D3D11GraphicsEngine::DrawSky() { GSky* sky = Engine::GAPI->GetSky(); @@ -5789,7 +5990,6 @@ XRESULT D3D11GraphicsEngine::DrawSky() { if ( !Engine::GAPI->GetRendererState().RendererSettings.AtmosphericScattering ) { Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); #if defined(BUILD_GOTHIC_1_08k) && !defined(BUILD_1_12F) @@ -5855,11 +6055,8 @@ XRESULT D3D11GraphicsEngine::DrawSky() { Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; Engine::GAPI->GetRendererState().RasterizerState.SetDefault(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); SetupVS_ExMeshDrawCall(); SetupVS_ExConstantBuffer(); @@ -5881,7 +6078,6 @@ XRESULT D3D11GraphicsEngine::DrawSky() { { SetDefaultStates(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); // Draw barrier after sky @@ -6004,7 +6200,6 @@ void D3D11GraphicsEngine::DrawVobSingle( VobInfo* vob, zCCamera& camera ) { // Set backface culling Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); SetActivePixelShader( "PS_Preview_Textured" ); @@ -6040,7 +6235,6 @@ void D3D11GraphicsEngine::DrawVobSingle( VobInfo* vob, zCCamera& camera ) { // Disable culling again Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); GetContext()->PSSetSamplers( 0, 1, ClampSamplerState.GetAddressOf() ); } @@ -6283,7 +6477,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, SetDefaultStates(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); // Update view transform @@ -6292,7 +6485,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, if ( !lighting ) { SetActivePixelShader( "PS_Transparency" ); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); } else { SetActivePixelShader( "PS_World" ); } @@ -6355,7 +6547,6 @@ void D3D11GraphicsEngine::DrawDecalList( const std::vector& decals, } if ( lastAlphaFunc != alphaFunc ) { - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); lastAlphaFunc = alphaFunc; } @@ -6424,7 +6615,6 @@ void D3D11GraphicsEngine::DrawQuadMarks() { Engine::GAPI->SetViewTransformXM( view ); // Update view transform Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); ActivePS->GetConstantBuffer()[0]->UpdateBuffer( &Engine::GAPI->GetRendererState().GraphicsState ); ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); @@ -6477,7 +6667,6 @@ void D3D11GraphicsEngine::DrawQuadMarks() { alphaFunc = mat->GetAlphaFunc(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); } @@ -6501,9 +6690,7 @@ void D3D11GraphicsEngine::DrawMQuadMarks() { Engine::GAPI->SetViewTransformXM( view ); // Update view transform Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); SetupVS_ExMeshDrawCall(); SetupVS_ExConstantBuffer(); @@ -6533,7 +6720,6 @@ void D3D11GraphicsEngine::DrawMQuadMarks() { alphaFunc = mat->GetAlphaFunc(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); UpdateRenderStates(); } @@ -6606,7 +6792,6 @@ void D3D11GraphicsEngine::DrawFrameParticleMeshes( std::unordered_mapGetRendererState(); state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); Engine::GAPI->SetViewTransformXM( view ); @@ -6643,19 +6828,15 @@ void D3D11GraphicsEngine::DrawFrameParticleMeshes( std::unordered_mapGetRendererState(); state.BlendState.SetAdditiveBlending(); - state.BlendState.SetDirty(); state.DepthState.DepthWriteEnabled = false; - state.DepthState.SetDirty(); state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - state.RasterizerState.SetDirty(); std::vector*>> pvecAdd; std::vector*>> pvecRest; @@ -6820,7 +6998,6 @@ void D3D11GraphicsEngine::DrawFrameParticles( if ( partInfo.BlendMode != lastBlendMode ) { // Setup blend state state.BlendState = blendState; - state.BlendState.SetDirty(); lastBlendMode = partInfo.BlendMode; UpdateRenderStates(); @@ -6834,7 +7011,6 @@ void D3D11GraphicsEngine::DrawFrameParticles( Context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); state.BlendState.SetDefault(); - state.BlendState.SetDirty(); bufferParticleColor->BindToPixelShader( Context.Get(), 1 ); bufferParticleDistortion->BindToPixelShader( Context.Get(), 2 ); @@ -6881,17 +7057,14 @@ void D3D11GraphicsEngine::UpdateOcclusion() { // Set up states Engine::GAPI->GetRendererState().RasterizerState.SetDefault(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = false; // Rasterization is faster without writes - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; // Don't write the bsp-nodes to the depth buffer, also quicker - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -7113,7 +7286,6 @@ void D3D11GraphicsEngine::DrawString( const std::string& str, float x, float y, Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -7170,7 +7342,6 @@ void D3D11GraphicsEngine::DrawString( const std::string& str, float x, float y, DrawVertexBuffer( TempVertexBuffer.get(), vertices.size(), sizeof( ExVertexStruct ) ); oldDepthState.ApplyTo( Engine::GAPI->GetRendererState().DepthState ); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); UpdateRenderStates(); @@ -7198,6 +7369,266 @@ void D3D11GraphicsEngine::StorePrevViewProjMatrix() { } } +void D3D11GraphicsEngine::CreateHiZResources() { + auto* device = GetDevice().Get(); + HRESULT hr; + + // Get depth buffer dimensions + UINT width = DepthStencilBuffer->GetSizeX(); + UINT height = DepthStencilBuffer->GetSizeY(); + if ( width == 0 || height == 0 ) + return; + + // Calculate mip count for full mip chain + UINT mipCount = 1; + { + UINT w = width, h = height; + while ( w > 1 || h > 1 ) { + w = (std::max)( w / 2, 1u ); + h = (std::max)( h / 2, 1u ); + mipCount++; + } + } + m_HiZMipCount = mipCount; + + // Create Hi-Z texture: full mip chain, SRV-bindable (used as CS input via SRV) + D3D11_TEXTURE2D_DESC hiZDesc = {}; + hiZDesc.Width = width; + hiZDesc.Height = height; + hiZDesc.MipLevels = mipCount; + hiZDesc.ArraySize = 1; + hiZDesc.Format = DXGI_FORMAT_R32_FLOAT; + hiZDesc.SampleDesc.Count = 1; + hiZDesc.Usage = D3D11_USAGE_DEFAULT; + hiZDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + hr = device->CreateTexture2D( &hiZDesc, nullptr, m_HiZTexture.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create Hi-Z texture"; + return; + } + + // SRV for the full Hi-Z texture (all mips, used for occlusion testing in CS_CullVobs) + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = mipCount; + srvDesc.Texture2D.MostDetailedMip = 0; + + hr = device->CreateShaderResourceView( m_HiZTexture.Get(), &srvDesc, m_HiZSRV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create Hi-Z SRV"; + m_HiZTexture.Reset(); + return; + } + + // Create scratch texture: single mip, UAV-bindable (CS writes here, then we copy to Hi-Z) + D3D11_TEXTURE2D_DESC scratchDesc = {}; + scratchDesc.Width = width; + scratchDesc.Height = height; + scratchDesc.MipLevels = 1; + scratchDesc.ArraySize = 1; + scratchDesc.Format = DXGI_FORMAT_R32_FLOAT; + scratchDesc.SampleDesc.Count = 1; + scratchDesc.Usage = D3D11_USAGE_DEFAULT; + scratchDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; + + hr = device->CreateTexture2D( &scratchDesc, nullptr, m_HiZScratch.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch texture"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + return; + } + + // Scratch UAV + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_R32_FLOAT; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + + hr = device->CreateUnorderedAccessView( m_HiZScratch.Get(), &uavDesc, m_HiZScratchUAV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch UAV"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + m_HiZScratch.Reset(); + return; + } + + // Scratch SRV (not strictly needed, but useful for debugging) + D3D11_SHADER_RESOURCE_VIEW_DESC scratchSRVDesc = {}; + scratchSRVDesc.Format = DXGI_FORMAT_R32_FLOAT; + scratchSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + scratchSRVDesc.Texture2D.MipLevels = 1; + scratchSRVDesc.Texture2D.MostDetailedMip = 0; + + hr = device->CreateShaderResourceView( m_HiZScratch.Get(), &scratchSRVDesc, m_HiZScratchSRV.ReleaseAndGetAddressOf() ); + if ( FAILED( hr ) ) { + LogError() << "[Hi-Z] Failed to create scratch SRV"; + m_HiZTexture.Reset(); + m_HiZSRV.Reset(); + m_HiZScratch.Reset(); + m_HiZScratchUAV.Reset(); + return; + } + + LogInfo() << "[Hi-Z] Created Hi-Z pyramid resources: " << width << "x" << height + << ", " << mipCount << " mip levels"; +} + +void D3D11GraphicsEngine::BuildHiZPyramid() { + if ( !m_HiZTexture || !m_HiZScratch || m_HiZMipCount == 0 ) + return; + + auto hiZCS = ShaderManager->GetCShader( "CS_BuildHiZ" ); + if ( !hiZCS ) + return; + + auto& context = GetContext(); + + UINT width = DepthStencilBuffer->GetSizeX(); + UINT height = DepthStencilBuffer->GetSizeY(); + + hiZCS->Apply(); + + for ( UINT mip = 0; mip < m_HiZMipCount; mip++ ) { + UINT mipWidth = (std::max)( width >> mip, 1u ); + UINT mipHeight = (std::max)( height >> mip, 1u ); + + // Update constant buffer + HiZBuildConstants cb = {}; + cb.outputWidth = mipWidth; + cb.outputHeight = mipHeight; + cb.inputMipLevel = ( mip > 0 ) ? ( mip - 1 ) : 0; + cb.isCopyPass = ( mip == 0 ) ? 1 : 0; + hiZCS->GetConstantBuffer()[0]->UpdateBuffer( &cb ); + hiZCS->GetConstantBuffer()[0]->BindToComputeShader( 0 ); + + // Bind input SRV: + // Mip 0: read from depth buffer copy (avoids DSV/SRV hazard) + // Mip N: read from Hi-Z texture SRV (previous mip levels already filled) + ID3D11ShaderResourceView* inputSRV = nullptr; + if ( mip == 0 ) { + inputSRV = DepthStencilBufferCopy->GetShaderResView().Get(); + } else { + inputSRV = m_HiZSRV.Get(); + } + context->CSSetShaderResources( 0, 1, &inputSRV ); + + // Bind output UAV: always the scratch texture + ID3D11UnorderedAccessView* uav = m_HiZScratchUAV.Get(); + context->CSSetUnorderedAccessViews( 0, 1, &uav, nullptr ); + + // Dispatch + UINT groupsX = ( mipWidth + 7 ) / 8; + UINT groupsY = ( mipHeight + 7 ) / 8; + context->Dispatch( groupsX, groupsY, 1 ); + + // Unbind SRV and UAV to allow the copy + ID3D11ShaderResourceView* nullSRV = nullptr; + ID3D11UnorderedAccessView* nullUAV = nullptr; + context->CSSetShaderResources( 0, 1, &nullSRV ); + context->CSSetUnorderedAccessViews( 0, 1, &nullUAV, nullptr ); + + // Copy scratch (mip 0) -> Hi-Z texture (mip N) + D3D11_BOX srcBox = {}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.right = mipWidth; + srcBox.bottom = mipHeight; + srcBox.front = 0; + srcBox.back = 1; + + context->CopySubresourceRegion( + m_HiZTexture.Get(), + D3D11CalcSubresource( mip, 0, m_HiZMipCount ), + 0, 0, 0, + m_HiZScratch.Get(), + 0, + &srcBox ); + } + + // Clean up CS state + context->CSSetShader( nullptr, nullptr, 0 ); +} +void D3D11GraphicsEngine::CacheWorldStaticVobs() { + + static std::vector _1; + static std::vector _2; + m_StaticVobs.clear(); + m_StaticVobs.reserve( 1024 ); + + LegacyRenderQueueProxy q( m_StaticVobs, _1, _2 ); + RndCullContext ctx; + ctx.queue = &q; + ctx.cameraPosition = XMFLOAT3( 0, 0, 0 ); + ctx.stage = RenderStage::STAGE_DRAW_WORLD; + ctx.frustum = Frustum::AlwaysContainingFrustum(); + ctx.drawDistances.OutdoorVobs = 1'000'000; + ctx.drawDistances.OutdoorVobsSmall = ctx.drawDistances.OutdoorVobs; + ctx.drawDistances.IndoorVobs = 0; + ctx.drawDistances.VisualFX = 0; + Engine::GAPI->CollectVisibleVobs( ctx, (EBspTreeCollectFlags)(EBspTreeCollectFlags::COLLECT_VOBS | EBspTreeCollectFlags::COLLECT_DISABLE_CHECK_DIST) ); + + const size_t totalItems = m_StaticVobs.size(); + // Correct math to calculate exact number of batches (rounds up to nearest multiple of 8, AVX ;) ) + const size_t numBatches = (totalItems + 7) / 8; + m_StaticVobsAABBs.clear(); + m_StaticVobsAABBs.reserve( numBatches ); + + for ( size_t i = 0; i < numBatches; ++i ) { + AABB_SoA_Batch8 b = {}; // Zero-initialize the batch + + // Fill the 8 slots in this batch + for ( int j = 0; j < 8; ++j ) { + size_t vobIdx = (i * 8) + j; + + if ( vobIdx < totalItems ) { + // Valid item: Extract and store + DirectX::BoundingBox bb = Frustum::Frustum::BBoxFromzTBBox3D( m_StaticVobs[vobIdx]->Vob->GetBBox() ); + + b.cx[j] = bb.Center.x; + b.cy[j] = bb.Center.y; + b.cz[j] = bb.Center.z; + + b.ex[j] = bb.Extents.x; + b.ey[j] = bb.Extents.y; + b.ez[j] = bb.Extents.z; + } else { + // Out of bounds (tail padding): + // Insert a dummy AABB far outside the map so it is guaranteed to be culled. + // This prevents invalid indices from entering your RenderQueue! + b.cx[j] = 1000000.0f; + b.cy[j] = 1000000.0f; + b.cz[j] = 1000000.0f; + + b.ex[j] = 0.0f; + b.ey[j] = 0.0f; + b.ez[j] = 0.0f; + } + } + + m_StaticVobsAABBs.push_back( b ); + } +} + +void D3D11GraphicsEngine::OnWorldLoaded() +{ + CacheWorldStaticVobs(); + + // --- Build VOB texture atlases: collect unique textures, create Texture2DArray atlases --- + m_VobAtlasPass->Build(); + + // --- Build world mesh atlas: collect textures, build atlases, merge geometry --- + m_MeshAtlasPass->Build(); + + if ( Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh + || Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { + Engine::GAPI->ReloadTextures(); + } +} + void D3D11GraphicsEngine::StoreVobPreviousTransforms() { if ( !zCCamera::GetCamera() ) { return; // only do this if we actually are in-game diff --git a/D3D11Engine/D3D11GraphicsEngine.h b/D3D11Engine/D3D11GraphicsEngine.h index 493437bd..9c20df8f 100644 --- a/D3D11Engine/D3D11GraphicsEngine.h +++ b/D3D11Engine/D3D11GraphicsEngine.h @@ -4,6 +4,11 @@ #include "GothicAPI.h" #include "D3D11ShadowMap.h" #include "D3D11ShaderManager.h" +#include "D3D11PipelineStateObject.h" +#include "D3D11IndirectBuffer.h" +#include "VobCulling.h" +#include "D3D11VobAtlasPass.h" +#include "D3D11MeshAtlasPass.h" struct RenderToDepthStencilBuffer; @@ -55,6 +60,8 @@ struct AlphaMeshData { }; class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { + friend class D3D11VobAtlasPass; + friend class D3D11MeshAtlasPass; public: D3D11GraphicsEngine(); ~D3D11GraphicsEngine() override; @@ -185,6 +192,11 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { /** Sets up the default rendering state */ void SetDefaultStates( bool force = false ); + /** Invalidates the cached FF state hashes, forcing the next UpdateRenderStates() + * to re-apply all states to D3D11. Call after any code that sets D3D11 states + * directly (e.g. ImGui, external libraries). */ + void InvalidateStateCache(); + /** Returns the current resolution (Maybe supersampled)*/ INT2 GetResolution() override { return m_scaledResolution; }; @@ -341,14 +353,28 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { D3D11PfxRenderer* GetPfxRenderer() const { return PfxRenderer.get(); } D3D11Texture* GetDistortionTexture() const { return DistortionTexture.get(); } + /** Returns the pipeline state cache for optimal D3D11 state management */ + D3D11PipelineStateCache& GetPipelineStateCache() { return m_PipelineStateCache; } + RenderToTextureBuffer* GetVelocityBuffer() const { return VelocityBuffer.get(); } const XMFLOAT4X4& GetPrevViewProjMatrix() const { return m_PrevViewProjMatrix; } - void StorePrevViewProjMatrix(); + auto GetClampSamplerState() -> auto { return ClampSamplerState.Get(); } + auto GetCubeSamplerState() -> auto { return CubeSamplerState.Get(); } + auto GetLinearSamplerState() -> auto { return LinearSamplerState.Get(); } + + void OnWorldLoaded() override; protected: void StoreVobPreviousTransforms(); + void StorePrevViewProjMatrix(); + + void CacheWorldStaticVobs(); + + /** Pipeline state cache for minimizing redundant D3D11 state transitions */ + D3D11PipelineStateCache m_PipelineStateCache; + std::unique_ptr m_FrameLimiter; int m_LastFrameLimit; @@ -445,6 +471,7 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { /** If true, we will save a screenshot after the next frame */ bool SaveScreenshotNextFrame; + float m_SamplerMipBias = 0.0f; bool m_flipWithTearing; bool m_swapchainflip; bool m_lowlatency; @@ -460,4 +487,24 @@ class D3D11GraphicsEngine : public D3D11GraphicsEngineBase { INT2 NewResolution; void CreateAndBindDefaultSampler(); + + std::vector m_StaticVobs{}; + std::vector m_StaticVobsAABBs{}; + + /** Atlas rendering passes */ + std::unique_ptr m_VobAtlasPass; + std::unique_ptr m_MeshAtlasPass; + + /** Hi-Z occlusion culling resources */ + Microsoft::WRL::ComPtr m_HiZTexture; // Full mip-chain, SRV-only + Microsoft::WRL::ComPtr m_HiZSRV; + Microsoft::WRL::ComPtr m_HiZScratch; // Single-mip scratch for CS UAV writes + Microsoft::WRL::ComPtr m_HiZScratchUAV; + Microsoft::WRL::ComPtr m_HiZScratchSRV; + UINT m_HiZMipCount = 0; + + /** Create Hi-Z pyramid resources (called after depth buffer creation) */ + void CreateHiZResources(); + /** Build the Hi-Z mip chain from the current depth buffer */ + void BuildHiZPyramid(); }; diff --git a/D3D11Engine/D3D11GraphicsEngineBase.cpp b/D3D11Engine/D3D11GraphicsEngineBase.cpp index 2780a2f6..f7e3ba27 100644 --- a/D3D11Engine/D3D11GraphicsEngineBase.cpp +++ b/D3D11Engine/D3D11GraphicsEngineBase.cpp @@ -167,11 +167,6 @@ void D3D11GraphicsEngineBase::SetDefaultStates() { Engine::GAPI->GetRendererState().DepthState.SetDefault(); Engine::GAPI->GetRendererState().SamplerState.SetDefault(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().SamplerState.SetDirty(); - GetContext()->PSSetSamplers( 0, 1, DefaultSamplerState.GetAddressOf() ); UpdateRenderStates(); @@ -400,6 +395,6 @@ XRESULT D3D11GraphicsEngineBase::BindViewportInformation( const std::string& sha } /** Returns the graphics-device this is running on */ -std::string D3D11GraphicsEngineBase::GetGraphicsDeviceName() { +const std::string& D3D11GraphicsEngineBase::GetGraphicsDeviceName() { return DeviceDescription; } diff --git a/D3D11Engine/D3D11GraphicsEngineBase.h b/D3D11Engine/D3D11GraphicsEngineBase.h index 052ab597..94d0da80 100644 --- a/D3D11Engine/D3D11GraphicsEngineBase.h +++ b/D3D11Engine/D3D11GraphicsEngineBase.h @@ -72,7 +72,7 @@ class D3D11GraphicsEngineBase : public BaseGraphicsEngine { BaseLineRenderer* GetLineRenderer() override; /** Returns the graphics-device this is running on */ - std::string GetGraphicsDeviceName() override; + const std::string& GetGraphicsDeviceName() override; /** Saves a screenshot */ virtual void SaveScreenshot() {} diff --git a/D3D11Engine/D3D11IndirectBuffer.cpp b/D3D11Engine/D3D11IndirectBuffer.cpp index c1a20c69..a2ef7eb5 100644 --- a/D3D11Engine/D3D11IndirectBuffer.cpp +++ b/D3D11Engine/D3D11IndirectBuffer.cpp @@ -83,10 +83,13 @@ XRESULT D3D11IndirectBuffer::UpdateBuffer( void* data, UINT size ) { if ( SizeInBytes < size ) { size = SizeInBytes; } + if ( !data ) { + return XR_SUCCESS; + } // Assume null-copy? if ( XR_SUCCESS == Map( EMapFlags::M_WRITE_DISCARD, &mappedData, &bsize ) ) { if ( size ) { - bsize = size; + bsize = std::min( bsize, size ); } // Copy data memcpy( mappedData, data, bsize ); diff --git a/D3D11Engine/D3D11LineRenderer.cpp b/D3D11Engine/D3D11LineRenderer.cpp index d25e95be..de49fa67 100644 --- a/D3D11Engine/D3D11LineRenderer.cpp +++ b/D3D11Engine/D3D11LineRenderer.cpp @@ -65,7 +65,6 @@ XRESULT D3D11LineRenderer::Flush() { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->SetupVS_ExMeshDrawCall(); engine->SetupVS_ExConstantBuffer(); @@ -112,7 +111,6 @@ XRESULT D3D11LineRenderer::FlushScreenSpace() { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->SetupVS_ExMeshDrawCall(); engine->GetContext()->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_LINELIST ); diff --git a/D3D11Engine/D3D11MeshAtlasPass.cpp b/D3D11Engine/D3D11MeshAtlasPass.cpp new file mode 100644 index 00000000..d9d0dabd --- /dev/null +++ b/D3D11Engine/D3D11MeshAtlasPass.cpp @@ -0,0 +1,495 @@ +#include "D3D11MeshAtlasPass.h" +#include "D3D11GraphicsEngine.h" + +#include "D3D11ShaderManager.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11ConstantBuffer.h" +#include "GothicAPI.h" +#include "GSky.h" +#include "RenderToTextureBuffer.h" +#include "WorldObjects.h" +#include "VertexTypes.h" +#include "zCTexture.h" +#include "zCMaterial.h" + +#include +#include + +// ----- globals defined in D3D11GraphicsEngine.cpp ----- +extern bool SupportTextureAtlases; +namespace { + constexpr DXGI_FORMAT VERTEX_INDEX_DXGI_FORMAT = sizeof( VERTEX_INDEX ) == sizeof( unsigned short ) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; +} + +typedef void( __cdecl* PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT )( + ID3D11DeviceContext* context, unsigned int drawCount, + ID3D11Buffer* buffer, unsigned int alignedByteOffsetForArgs, + unsigned int alignedByteStrideForArgs ); +extern PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT DrawMultiIndexedInstancedIndirect; + +// ------------------------------------------------------- + +D3D11MeshAtlasPass::D3D11MeshAtlasPass( D3D11GraphicsEngine* engine ) + : m_Engine( engine ) { +} + +// ============================================================ +// Build – entry point called from OnWorldLoaded +// ============================================================ +void D3D11MeshAtlasPass::Build() { + // Reset everything + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + m_WorldMeshDiffuseAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshNormalAtlasses[(DXGI_FORMAT)i].Destroy(); + m_WorldMeshFxAtlasses[(DXGI_FORMAT)i].Destroy(); + } + m_WorldMeshDiffuseAtlasLookup.clear(); + m_WorldMeshNormalAtlasLookup.clear(); + m_WorldMeshFxAtlasLookup.clear(); + m_WorldMeshAtlasDrawGroups.clear(); + m_WorldMeshAtlasedSubmeshes.clear(); + m_WorldMeshGlobalVertexBuffer.reset(); + m_WorldMeshGlobalIndexBuffer.reset(); + m_WorldMeshGlobalInstanceIdBuffer.reset(); + m_WorldMeshSubmeshBuffer.reset(); + + if ( !SupportTextureAtlases || + !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ) { + return; + } + + BuildTextureAtlasses(); + + if ( m_WorldMeshDiffuseAtlasLookup.empty() ) + return; + + BuildGeometryBuffers(); +} + +// ============================================================ +// BuildTextureAtlasses +// ============================================================ +void D3D11MeshAtlasPass::BuildTextureAtlasses() { + struct DiffuseTextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + struct AuxTextureInfo { + D3D11Texture* engineTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + + std::unordered_set seenDiffuse; + std::unordered_set seenNormal, seenFx; + std::vector uniqueDiffuse; + std::vector uniqueNormals, uniqueFx; + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + // Skip animated textures + zCTexture* baseTex = meshKey.Material->GetTextureSingle(); + if ( !baseTex ) continue; + unsigned char texFlags = *reinterpret_cast( + reinterpret_cast(baseTex) + GothicMemoryLocations::zCTexture::Offset_Flags ); + if ( texFlags & GothicMemoryLocations::zCTexture::Mask_FlagIsAnimated ) + continue; + + // Only opaque + alpha-test + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc > zMAT_ALPHA_FUNC_NONE && alphaFunc != zMAT_ALPHA_FUNC_TEST ) + continue; + + // Skip non-standard materials (water, portals, etc.) + if ( meshKey.Info && meshKey.Info->MaterialType != MaterialInfo::MT_None ) + continue; + + zCTexture* tex = baseTex; + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) continue; + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) continue; + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) continue; + + // Diffuse + if ( seenDiffuse.insert( tex ).second ) { + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueDiffuse.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + + // Normal map + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex && seenNormal.insert( normalTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + normalTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueNormals.push_back( { normalTex, desc.Format, normalTex->GetTextureObject() } ); + } + + // FX map + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex && seenFx.insert( fxTex ).second ) { + D3D11_TEXTURE2D_DESC desc; + fxTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format >= 1 && desc.Format < TEXTURE_ATLAS_MAX ) + uniqueFx.push_back( { fxTex, desc.Format, fxTex->GetTextureObject() } ); + } + } + } + } + + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + // Build per-format Texture2DArray atlases for diffuse textures + { + std::sort( uniqueDiffuse.begin(), uniqueDiffuse.end(), + []( const DiffuseTextureInfo& a, const DiffuseTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < uniqueDiffuse.size() ) { + DXGI_FORMAT fmt = uniqueDiffuse[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueDiffuse.size() && uniqueDiffuse[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueDiffuse[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( device, context, txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) + m_WorldMeshDiffuseAtlasLookup[uniqueDiffuse[rangeStart + i].gothicTexture] = { fmt, atlas.descriptors[i] }; + + m_WorldMeshDiffuseAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + } + + // Helper: build aux (normal/fx) atlases + auto buildAuxAtlases = [&]( std::vector& textures, + std::unordered_map& lookup, + std::array& atlasses ) { + std::sort( textures.begin(), textures.end(), + []( const AuxTextureInfo& a, const AuxTextureInfo& b ) { return a.Format < b.Format; } ); + + size_t rangeStart = 0; + while ( rangeStart < textures.size() ) { + DXGI_FORMAT fmt = textures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < textures.size() && textures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( textures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( device, context, txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) + lookup[textures[rangeStart + i].engineTexture] = { fmt, atlas.descriptors[i] }; + + atlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + }; + + buildAuxAtlases( uniqueNormals, m_WorldMeshNormalAtlasLookup, m_WorldMeshNormalAtlasses ); + buildAuxAtlases( uniqueFx, m_WorldMeshFxAtlasLookup, m_WorldMeshFxAtlasses ); + + LogInfo() << "World Mesh Atlas: " << uniqueDiffuse.size() << " diffuse, " + << uniqueNormals.size() << " normal, " << uniqueFx.size() << " fx textures"; +} + +// ============================================================ +// BuildGeometryBuffers +// ============================================================ +void D3D11MeshAtlasPass::BuildGeometryBuffers() { + std::vector allVertices; + std::vector allIndices; + std::vector submeshGPU; + + std::map groupsByFormat; + std::unordered_set processedMeshes; + + // Pre-count + { + size_t totalVertices = 0, totalIndices = 0, totalSubmeshes = 0; + auto& ws = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : ws ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + zCTexture* tex = meshKey.Material->GetTextureSingle(); + if ( m_WorldMeshDiffuseAtlasLookup.find( tex ) != m_WorldMeshDiffuseAtlasLookup.end() ) { + totalVertices += worldMeshInfo->Vertices.size(); + totalIndices += worldMeshInfo->Indices.size(); + totalSubmeshes++; + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + submeshGPU.reserve( totalSubmeshes ); + } + + auto& worldSections = Engine::GAPI->GetWorldSections(); + for ( auto& [x, row] : worldSections ) { + for ( auto& [y, section] : row ) { + for ( auto const& [meshKey, worldMeshInfo] : section.WorldMeshes ) { + if ( !meshKey.Material ) continue; + + zCTexture* tex = meshKey.Material->GetTextureSingle(); + auto diffIt = m_WorldMeshDiffuseAtlasLookup.find( tex ); + if ( diffIt == m_WorldMeshDiffuseAtlasLookup.end() ) + continue; + + MeshInfo* mi = worldMeshInfo; + if ( !processedMeshes.insert( mi ).second ) + continue; + + m_WorldMeshAtlasedSubmeshes.insert( mi ); + + const TextureAtlasLookup& diffLookup = diffIt->second; + auto& group = groupsByFormat[diffLookup.atlasFormat]; + group.format = diffLookup.atlasFormat; + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + WorldMeshSubmeshGPUData gpuData = {}; + gpuData.diffuseSlice = diffLookup.descriptor.slice; + gpuData.dUStart = diffLookup.descriptor.uStart; + gpuData.dVStart = diffLookup.descriptor.vStart; + gpuData.dUEnd = diffLookup.descriptor.uEnd; + gpuData.dVEnd = diffLookup.descriptor.vEnd; + + UINT flags = 0; + auto surface = tex->GetSurface(); + if ( surface ) { + D3D11Texture* normalTex = surface->GetNormalmap(); + if ( normalTex ) { + auto normIt = m_WorldMeshNormalAtlasLookup.find( normalTex ); + if ( normIt != m_WorldMeshNormalAtlasLookup.end() ) { + gpuData.normalSlice = normIt->second.descriptor.slice; + gpuData.nUStart = normIt->second.descriptor.uStart; + gpuData.nVStart = normIt->second.descriptor.vStart; + gpuData.nUEnd = normIt->second.descriptor.uEnd; + gpuData.nVEnd = normIt->second.descriptor.vEnd; + flags |= 1; // HAS_NORMAL + } + } + + D3D11Texture* fxTex = surface->GetFxMap(); + if ( fxTex ) { + auto fxIt = m_WorldMeshFxAtlasLookup.find( fxTex ); + if ( fxIt != m_WorldMeshFxAtlasLookup.end() ) { + gpuData.fxSlice = fxIt->second.descriptor.slice; + gpuData.fUStart = fxIt->second.descriptor.uStart; + gpuData.fVStart = fxIt->second.descriptor.vStart; + gpuData.fUEnd = fxIt->second.descriptor.uEnd; + gpuData.fVEnd = fxIt->second.descriptor.vEnd; + flags |= 2; // HAS_FX + } + } + } + + int alphaFunc = meshKey.Material->GetAlphaFunc(); + if ( alphaFunc == zMAT_ALPHA_FUNC_TEST || tex->HasAlphaChannel() ) + flags |= 4; // ALPHA_TEST + + gpuData.flags = flags; + + UINT submeshIndex = static_cast(submeshGPU.size()); + submeshGPU.push_back( gpuData ); + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = static_cast(mi->Indices.size()); + args.InstanceCount = 1; + args.StartIndexLocation = startIndex; + args.BaseVertexLocation = static_cast(baseVertex); + args.StartInstanceLocation = submeshIndex; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "D3D11MeshAtlasPass::BuildGeometryBuffers: No world mesh vertices for atlas"; + return; + } + + m_WorldMeshGlobalVertexBuffer = std::make_unique(); + m_WorldMeshGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_WorldMeshGlobalIndexBuffer = std::make_unique(); + m_WorldMeshGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + UINT maxIds = static_cast(submeshGPU.size()); + if ( maxIds < 256 ) maxIds = 256; + std::vector instanceIds( maxIds ); + for ( uint32_t i = 0; i < maxIds; i++ ) + instanceIds[i] = i; + + m_WorldMeshGlobalInstanceIdBuffer = std::make_unique(); + m_WorldMeshGlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + m_WorldMeshSubmeshBuffer = std::make_unique>(); + m_WorldMeshSubmeshBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_WorldMeshSubmeshBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + m_WorldMeshAtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_IMMUTABLE, + D3D11IndirectBuffer::CA_NONE ); + + m_WorldMeshAtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "World Mesh Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_WorldMeshAtlasDrawGroups.size() << " format groups, " + << submeshGPU.size() << " submeshes"; +} + +// ============================================================ +// Draw – per-frame indirect draw of atlased world mesh +// ============================================================ +XRESULT D3D11MeshAtlasPass::Draw() { + if ( m_WorldMeshAtlasDrawGroups.empty() || + !m_WorldMeshGlobalVertexBuffer || !m_WorldMeshGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = m_Engine->RecordGraphicsEvent( L"DrawWorldMesh_Atlas" ); + auto& context = m_Engine->GetContext(); + + m_Engine->SetDefaultStates(); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + Engine::GAPI->SetViewTransformXM( view ); + Engine::GAPI->ResetWorldTransform(); + + context->DSSetShader( nullptr, nullptr, 0 ); + context->HSSetShader( nullptr, nullptr, 0 ); + + // --- Bind global geometry --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_WorldMeshGlobalVertexBuffer->GetVertexBuffer().Get(), + m_WorldMeshGlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_WorldMeshGlobalIndexBuffer->GetVertexBuffer().Get(), + VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // Submesh structured buffer -> VS t1 + ID3D11ShaderResourceView* submeshSRV = m_WorldMeshSubmeshBuffer->GetSRV(); + context->VSSetShaderResources( 1, 1, &submeshSRV ); + + // Vertex shader + m_Engine->SetActiveVertexShader( "VS_ExWorldAtlas" ); + m_Engine->SetupVS_ExMeshDrawCall(); + m_Engine->SetupVS_ExConstantBuffer(); + m_Engine->ActiveVS->Apply(); + + // Pixel shader + m_Engine->SetActivePixelShader( "PS_WorldAtlas" ); + + m_Engine->ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + m_Engine->ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + + GSky* sky = Engine::GAPI->GetSky(); + m_Engine->ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); + m_Engine->ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + + MaterialInfo defMaterial{}; + m_Engine->ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); + m_Engine->ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + + m_Engine->InfiniteRangeConstantBuffer->BindToPixelShader( 3 ); + + context->PSSetShaderResources( 4, 1, m_Engine->ReflectionCube.GetAddressOf() ); + + m_Engine->ActivePS->Apply(); + + // --- Draw per format group --- + for ( auto& group : m_WorldMeshAtlasDrawGroups ) { + ID3D11ShaderResourceView* diffuseSRV = m_WorldMeshDiffuseAtlasses[group.format].atlasSRV; + if ( !diffuseSRV ) continue; + + // Bind first available normal/fx atlases (format grouping is per-diffuse) + ID3D11ShaderResourceView* normalSRV = nullptr; + ID3D11ShaderResourceView* fxSRV = nullptr; + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) { + if ( !normalSRV && m_WorldMeshNormalAtlasses[i].atlasSRV ) + normalSRV = m_WorldMeshNormalAtlasses[i].atlasSRV; + if ( !fxSRV && m_WorldMeshFxAtlasses[i].atlasSRV ) + fxSRV = m_WorldMeshFxAtlasses[i].atlasSRV; + } + + ID3D11ShaderResourceView* psSRVs[3] = { diffuseSRV, normalSRV, fxSRV }; + context->PSSetShaderResources( 0, 3, psSRVs ); + + DrawMultiIndexedInstancedIndirect( + context.Get(), + static_cast(group.indirectArgs.size()), + group.indirectBuffer->GetIndirectBuffer().Get(), + 0, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + + // Unbind submesh buffer from VS + ID3D11ShaderResourceView* nullSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullSRV ); + + return XR_SUCCESS; +} diff --git a/D3D11Engine/D3D11MeshAtlasPass.h b/D3D11Engine/D3D11MeshAtlasPass.h new file mode 100644 index 00000000..e8c4a2f0 --- /dev/null +++ b/D3D11Engine/D3D11MeshAtlasPass.h @@ -0,0 +1,83 @@ +#pragma once +#include "D3D11AtlasTypes.h" +#include "D3D11StructuredBuffer.h" +#include "D3D11VertexBuffer.h" +#include "D3D11ConstantBuffer.h" +#include "ConstantBufferStructs.h" + +#include +#include +#include +#include +#include +#include + +class D3D11GraphicsEngine; +class D3D11Texture; +class zCTexture; +struct MeshInfo; + +/** + * Encapsulates all texture-atlas-based GPU-driven rendering for world mesh geometry. + * + * Responsibilities: + * - Building per-format Texture2DArray atlases for world-mesh diffuse, normal, and FX textures + * - Building the merged global world-mesh VB/IB with per-submesh indirect-args buffers + * - Executing the multi-indirect draw of atlased world mesh geometry each frame + * + * The engine keeps one instance of this class. Call Build() when a new world + * is loaded (OnWorldLoaded), and Draw() every frame instead of the old + * DrawWorldMesh_Atlas(). + */ +class D3D11MeshAtlasPass { + friend class D3D11GraphicsEngine; +public: + explicit D3D11MeshAtlasPass( D3D11GraphicsEngine* engine ); + + /** (Re-)build atlases and geometry buffers. + * Called from D3D11GraphicsEngine::OnWorldLoaded(). */ + void Build(); + + /** Draw atlased world mesh geometry via multi-indirect. */ + XRESULT Draw(); + + /** True once Build() has completed and at least one draw group exists. */ + bool IsReady() const { return !m_WorldMeshAtlasDrawGroups.empty(); } + + /** Returns true if the given MeshInfo was atlased (used to skip it in the legacy path). */ + bool IsSubmeshAtlased( MeshInfo* mi ) const { + return m_WorldMeshAtlasedSubmeshes.count( mi ) != 0; + } + + /** Diffuse atlas lookup (read-only access for shadow passes). */ + const std::unordered_map& GetDiffuseAtlasLookup() const { + return m_WorldMeshDiffuseAtlasLookup; + } + +private: + D3D11GraphicsEngine* m_Engine; + + // ---- Atlas textures (one array per texture type) ---- + std::unordered_map m_WorldMeshDiffuseAtlasLookup; + std::unordered_map m_WorldMeshNormalAtlasLookup; + std::unordered_map m_WorldMeshFxAtlasLookup; + + std::array m_WorldMeshDiffuseAtlasses{}; + std::array m_WorldMeshNormalAtlasses{}; + std::array m_WorldMeshFxAtlasses{}; + + // ---- Global geometry ---- + std::unique_ptr m_WorldMeshGlobalVertexBuffer; + std::unique_ptr m_WorldMeshGlobalIndexBuffer; + std::unique_ptr m_WorldMeshGlobalInstanceIdBuffer; + + // ---- GPU submesh descriptors ---- + std::unique_ptr> m_WorldMeshSubmeshBuffer; + + // ---- Draw groups ---- + std::vector m_WorldMeshAtlasDrawGroups; + std::unordered_set m_WorldMeshAtlasedSubmeshes; + + void BuildTextureAtlasses(); + void BuildGeometryBuffers(); +}; diff --git a/D3D11Engine/D3D11NVHBAO.cpp b/D3D11Engine/D3D11NVHBAO.cpp index a10f168b..f9871ccb 100644 --- a/D3D11Engine/D3D11NVHBAO.cpp +++ b/D3D11Engine/D3D11NVHBAO.cpp @@ -58,7 +58,7 @@ XRESULT D3D11NVHBAO::Render( Input.DepthData.ProjectionMatrix.Layout = GFSDK_SSAO_COLUMN_MAJOR_ORDER; Input.DepthData.MetersToViewSpaceUnits = settings.MetersToViewSpaceUnits; - Input.NormalData.Enable = true; + Input.NormalData.Enable = false; Input.NormalData.pFullResNormalTextureSRV = pFullResNormalTexSRV.Get(); auto identity = XMMatrixIdentity(); Input.NormalData.WorldToViewMatrix.Data = GFSDK_SSAO_Float4x4( reinterpret_cast(&identity) ); // We already have them in view-space diff --git a/D3D11Engine/D3D11PFX_DistanceBlur.cpp b/D3D11Engine/D3D11PFX_DistanceBlur.cpp index beaf6cbb..a1214f61 100644 --- a/D3D11Engine/D3D11PFX_DistanceBlur.cpp +++ b/D3D11Engine/D3D11PFX_DistanceBlur.cpp @@ -28,7 +28,6 @@ XRESULT D3D11PFX_DistanceBlur::Render( ID3D11ShaderResourceView* diffuse ) { auto ps = engine->GetShaderManager().GetPShader( "PS_PFX_DistanceBlur" ); Engine::GAPI->GetRendererState().BlendState.SetDefault(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Copy scene auto tempBuffer = FxRenderer->GetTempBuffer(); diff --git a/D3D11Engine/D3D11PFX_GodRays.cpp b/D3D11Engine/D3D11PFX_GodRays.cpp index 3db86b2e..88e9fc38 100644 --- a/D3D11Engine/D3D11PFX_GodRays.cpp +++ b/D3D11Engine/D3D11PFX_GodRays.cpp @@ -19,7 +19,7 @@ D3D11PFX_GodRays::~D3D11PFX_GodRays() {} /** Draws this effect to the given buffer */ XRESULT D3D11PFX_GodRays::Render( ID3D11ShaderResourceView* backbuffer, - ID3D11ShaderResourceView* normals ) { + ID3D11ShaderResourceView* depth ) { if ( Engine::GAPI->GetSky()->GetAtmoshpereSettings().LightDirection.y <= 0 ) return XR_SUCCESS; // Don't render the godrays in the night-time @@ -82,7 +82,7 @@ XRESULT D3D11PFX_GodRays::Render( ID3D11ShaderResourceView* srvs[2] { backbuffer, - normals, + depth, }; engine->GetContext()->PSSetShaderResources( 0, 2, srvs ); @@ -96,11 +96,13 @@ XRESULT D3D11PFX_GodRays::Render( zoomPS->GetConstantBuffer()[0]->UpdateBuffer( &gcb ); zoomPS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + auto clampSampler = engine->GetClampSamplerState(); + engine->GetContext()->PSSetSamplers( 0, 1, &clampSampler ); + FxRenderer->CopyTextureToRTV( tempBuffer->GetShaderResView(), tempBuffer2->GetRenderTargetView(), INT2( 0, 0 ), true ); // Upscale and blend Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); FxRenderer->CopyTextureToRTV( tempBuffer2->GetShaderResView(), oldRTV, engine->GetResolution() ); diff --git a/D3D11Engine/D3D11PFX_GodRays.h b/D3D11Engine/D3D11PFX_GodRays.h index f5f97fe1..20b51ed1 100644 --- a/D3D11Engine/D3D11PFX_GodRays.h +++ b/D3D11Engine/D3D11PFX_GodRays.h @@ -9,6 +9,6 @@ class D3D11PFX_GodRays : /** Draws this effect to the given buffer */ XRESULT Render( RenderToTextureBuffer* fxbuffer ) override { return XR_FAILED; } - XRESULT Render( ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals ); + XRESULT Render( ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth ); }; diff --git a/D3D11Engine/D3D11PFX_HDR.cpp b/D3D11Engine/D3D11PFX_HDR.cpp index 7794bf1c..a266e3a7 100644 --- a/D3D11Engine/D3D11PFX_HDR.cpp +++ b/D3D11Engine/D3D11PFX_HDR.cpp @@ -41,7 +41,6 @@ XRESULT D3D11PFX_HDR::Render( ID3D11RenderTargetView* output, ID3D11ShaderResour D3D11GraphicsEngine* engine = reinterpret_cast(Engine::GraphicsEngine); engine->SetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = false; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->UpdateRenderStates(); // Save old rendertargets diff --git a/D3D11Engine/D3D11PFX_HeightFog.cpp b/D3D11Engine/D3D11PFX_HeightFog.cpp index 7262fd6e..80d6c7d0 100644 --- a/D3D11Engine/D3D11PFX_HeightFog.cpp +++ b/D3D11Engine/D3D11PFX_HeightFog.cpp @@ -28,7 +28,10 @@ XRESULT D3D11PFX_HeightFog::Render( RenderToTextureBuffer* fxbuffer ) { vs->Apply(); HeightfogConstantBuffer cb; - XMStoreFloat4x4( &cb.InvProj, XMMatrixInverse( nullptr, XMLoadFloat4x4(&Engine::GAPI->GetProjectionMatrix()) ) ); + { + auto& proj = Engine::GAPI->GetProjectionMatrix(); + cb.HF_ProjParams = float4( 1.0f / proj._11, 1.0f / proj._22, proj._43, proj._33 ); + } XMStoreFloat4x4( &cb.InvView, XMMatrixInverse( nullptr, Engine::GAPI->GetViewMatrixXM() ) ); @@ -119,11 +122,9 @@ XRESULT D3D11PFX_HeightFog::Render( RenderToTextureBuffer* fxbuffer ) { engine->SetDefaultStates(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GAPI->GetRendererState().BlendState.SetDefault(); //Engine::GAPI->GetRendererState().BlendState.SetAdditiveBlending(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Copy FxRenderer->DrawFullScreenQuad(); diff --git a/D3D11Engine/D3D11PFX_SMAA.cpp b/D3D11Engine/D3D11PFX_SMAA.cpp index 0b2217b7..14ed5525 100644 --- a/D3D11Engine/D3D11PFX_SMAA.cpp +++ b/D3D11Engine/D3D11PFX_SMAA.cpp @@ -30,7 +30,18 @@ void D3D11PFX_SMAA::RenderPostFX( const Microsoft::WRL::ComPtr(Engine::GraphicsEngine); ID3D11DeviceContext* pContext = engine->GetContext().Get(); - engine->SetDefaultStates(); + // Configure states that SMAA needs through the Gothic state system + auto& state = Engine::GAPI->GetRendererState(); + state.RasterizerState.SetDefault(); + state.RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; + state.RasterizerState.DepthClipEnable = true; + + state.DepthState.DepthBufferEnabled = false; + state.DepthState.DepthWriteEnabled = false; + state.DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; + + state.BlendState.SetDefault(); + engine->UpdateRenderStates(); Microsoft::WRL::ComPtr OldRTV; diff --git a/D3D11Engine/D3D11PfxRenderer.cpp b/D3D11Engine/D3D11PfxRenderer.cpp index 380b381e..cf2df0d8 100644 --- a/D3D11Engine/D3D11PfxRenderer.cpp +++ b/D3D11Engine/D3D11PfxRenderer.cpp @@ -34,13 +34,13 @@ D3D11PfxRenderer::D3D11PfxRenderer() { FX_SMAA = std::make_unique( this ); FX_TAA = std::make_unique( this ); NvHBAO = std::make_unique(); + PFX_FSR1 = std::make_unique( this ); + PFX_FSR2 = std::make_unique( this ); + PFX_FSR3 = std::make_unique( this ); } PFX_CAS = std::make_unique( this ); PFX_SimpleSharpen = std::make_unique( this ); - PFX_FSR1 = std::make_unique( this ); - PFX_FSR2 = std::make_unique( this ); - PFX_FSR3 = std::make_unique( this ); } D3D11PfxRenderer::~D3D11PfxRenderer() { @@ -65,8 +65,8 @@ XRESULT D3D11PfxRenderer::RenderHeightfog() { } /** Renders the godrays-Effect */ -XRESULT D3D11PfxRenderer::RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals) { - return FX_GodRays->Render( backbuffer , normals ); +XRESULT D3D11PfxRenderer::RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth) { + return FX_GodRays->Render( backbuffer , depth ); } /** Renders the HDR-Effect */ @@ -259,23 +259,28 @@ TextureHandle D3D11PfxRenderer::GetTempBufferDS4() void D3D11PfxRenderer::FreeResources() { auto& settings = Engine::GAPI->GetRendererState().RendererSettings; - if ( settings.AntiAliasingMode != GothicRendererSettings::AA_SMAA ) { + if ( this->FX_SMAA + && settings.AntiAliasingMode != GothicRendererSettings::AA_SMAA ) { this->FX_SMAA->ReleaseResources(); } - if ( settings.AntiAliasingMode != GothicRendererSettings::AA_TAA ) { + if ( this->FX_TAA + && settings.AntiAliasingMode != GothicRendererSettings::AA_TAA ) { this->FX_TAA->ReleaseResources(); } - if ( settings.AntiAliasingMode != GothicRendererSettings::AA_FSR + if ( this->PFX_FSR2 + && settings.AntiAliasingMode != GothicRendererSettings::AA_FSR && !(settings.Upscaler == GothicRendererSettings::UPSCALER_FSR_2) && settings.ResolutionScalePercent < 100) { this->PFX_FSR2->ReleaseResources(); } - if ( !(settings.Upscaler == GothicRendererSettings::UPSCALER_FSR_1) && settings.ResolutionScalePercent < 100 ) { + if ( this->PFX_FSR1 + && !(settings.Upscaler == GothicRendererSettings::UPSCALER_FSR_1) && settings.ResolutionScalePercent < 100 ) { this->PFX_FSR1->ReleaseResources(); } - if ( !settings.HbaoSettings.Enabled ) { + if ( this->NvHBAO + && !settings.HbaoSettings.Enabled ) { this->NvHBAO->ReleaseResources(); } } diff --git a/D3D11Engine/D3D11PfxRenderer.h b/D3D11Engine/D3D11PfxRenderer.h index 30aa28c6..f466705e 100644 --- a/D3D11Engine/D3D11PfxRenderer.h +++ b/D3D11Engine/D3D11PfxRenderer.h @@ -45,7 +45,7 @@ class D3D11PfxRenderer { XRESULT RenderSimpleSharpen( const Microsoft::WRL::ComPtr& input, INT2 inputSize, const Microsoft::WRL::ComPtr& output, INT2 outputSize, RenderToTextureBuffer& intermediateBuffer ); /** Renders the godrays-Effect */ - XRESULT RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* normals); + XRESULT RenderGodRays(ID3D11ShaderResourceView* backbuffer, ID3D11ShaderResourceView* depth); /** Copies the given texture to the given RTV */ XRESULT CopyTextureToRTV( const Microsoft::WRL::ComPtr& texture, const Microsoft::WRL::ComPtr& rtv, INT2 targetResolution = INT2( 0, 0 ), bool useCustomPS = false, INT2 offset = INT2( 0, 0 ) ); diff --git a/D3D11Engine/D3D11PipelineStateObject.cpp b/D3D11Engine/D3D11PipelineStateObject.cpp new file mode 100644 index 00000000..cd157a26 --- /dev/null +++ b/D3D11Engine/D3D11PipelineStateObject.cpp @@ -0,0 +1,291 @@ +#include "pch.h" +#include "D3D11PipelineStateObject.h" +#include "GothicGraphicsState.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11GShader.h" +#include "D3D11HDShader.h" +#include "Toolbox.h" + +// --------------------------------------------------------------------------- +// D3D11PipelineStateObject::Desc +// --------------------------------------------------------------------------- + +D3D11PipelineStateObject::Desc::Desc() { + BlendState.SetDefault(); + RasterizerState.SetDefault(); + DepthStencilState.SetDefault(); + std::fill( std::begin( RTVFormats ), std::end( RTVFormats ), DXGI_FORMAT_UNKNOWN ); +} + +// --------------------------------------------------------------------------- +// D3D11PipelineStateObject +// --------------------------------------------------------------------------- + +D3D11PipelineStateObject::D3D11PipelineStateObject( const Desc& desc ) + : m_VS( desc.VS ) + , m_PS( desc.PS ) + , m_GS( desc.GS ) + , m_HDS( desc.HDS ) + , m_BlendState( desc.BlendState ) + , m_SampleMask( desc.SampleMask ) + , m_RasterizerState( desc.RasterizerState ) + , m_DepthStencilState( desc.DepthStencilState ) + , m_TopologyType( desc.TopologyType ) + , m_NumRenderTargets( desc.NumRenderTargets ) + , m_DSVFormat( desc.DSVFormat ) + , m_SampleDesc( desc.SampleDesc ) +{ + memcpy( m_RTVFormats, desc.RTVFormats, sizeof( m_RTVFormats ) ); + + // Ensure the Gothic state hashes are up to date + m_BlendState.ComputeHash(); + m_RasterizerState.ComputeHash(); + m_DepthStencilState.ComputeHash(); + + ComputeHash(); +} + +static void HashPointer( std::size_t& seed, const void* ptr ) { + auto v = reinterpret_cast( ptr ); + Toolbox::hash_combine( seed, static_cast( v ) ); + if constexpr ( sizeof( uintptr_t ) > sizeof( DWORD ) ) { + Toolbox::hash_combine( seed, static_cast( v >> 32 ) ); + } +} + +void D3D11PipelineStateObject::ComputeHash() { + m_Hash = 0; + + // Shader identity: use raw pointer value as a unique id + HashPointer( m_Hash, m_VS.get() ); + HashPointer( m_Hash, m_PS.get() ); + HashPointer( m_Hash, m_GS.get() ); + HashPointer( m_Hash, m_HDS.get() ); + + // Fixed-function state hashes + m_BlendState.ComputeHash(); + Toolbox::hash_combine( m_Hash, static_cast( m_BlendState.Hash ) ); + m_RasterizerState.ComputeHash(); + Toolbox::hash_combine( m_Hash, static_cast( m_RasterizerState.Hash ) ); + m_DepthStencilState.ComputeHash(); + Toolbox::hash_combine( m_Hash, static_cast( m_DepthStencilState.Hash ) ); + + // Sample mask + Toolbox::hash_combine( m_Hash, static_cast(m_SampleMask) ); + + // Topology + Toolbox::hash_combine( m_Hash, static_cast( m_TopologyType ) ); + + // Render target formats + Toolbox::hash_combine( m_Hash, static_cast( m_NumRenderTargets ) ); + for ( UINT i = 0; i < 8; ++i ) { + Toolbox::hash_combine( m_Hash, static_cast( m_RTVFormats[i] ) ); + } + Toolbox::hash_combine( m_Hash, static_cast( m_DSVFormat ) ); + + // Sample desc + Toolbox::hash_combine( m_Hash, static_cast( m_SampleDesc.Count ) ); + Toolbox::hash_combine( m_Hash, static_cast( m_SampleDesc.Quality ) ); +} + +// --------------------------------------------------------------------------- +// D3D11PipelineStateCache +// --------------------------------------------------------------------------- + +void D3D11PipelineStateCache::Init( ID3D11Device1* device, ID3D11DeviceContext1* context ) { + m_Device = device; + m_Context = context; +} + +void D3D11PipelineStateCache::SetPipelineState( const D3D11PipelineStateObject& pso ) { + // Fast-out: if the same PSO is already fully bound, nothing to do + if ( pso.GetHash() == m_BoundState.PSOHash ) + return; + + // --- Vertex Shader ------------------------------------------------------- + const size_t vsHash = reinterpret_cast( pso.GetVS().get() ); + if ( vsHash != m_BoundState.VSHash ) { + if ( pso.GetVS() ) { + pso.GetVS()->Apply(); + } else { + m_Context->VSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.VSHash = vsHash; + } + + // --- Pixel Shader -------------------------------------------------------- + const size_t psHash = reinterpret_cast( pso.GetPS().get() ); + if ( psHash != m_BoundState.PSHash ) { + if ( pso.GetPS() ) { + pso.GetPS()->Apply(); + } else { + m_Context->PSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.PSHash = psHash; + } + + // --- Geometry Shader ----------------------------------------------------- + const size_t gsHash = reinterpret_cast( pso.GetGS().get() ); + if ( gsHash != m_BoundState.GSHash ) { + if ( pso.GetGS() ) { + pso.GetGS()->Apply(); + } else { + m_Context->GSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.GSHash = gsHash; + } + + // --- Hull / Domain Shader ------------------------------------------------ + const size_t hdsHash = reinterpret_cast( pso.GetHDS().get() ); + if ( hdsHash != m_BoundState.HDSHash ) { + if ( pso.GetHDS() ) { + pso.GetHDS()->Apply(); + } else { + m_Context->HSSetShader( nullptr, nullptr, 0 ); + m_Context->DSSetShader( nullptr, nullptr, 0 ); + } + m_BoundState.HDSHash = hdsHash; + } + + // --- Blend State --------------------------------------------------------- + const size_t blendHash = pso.GetBlendState().Hash; + if ( blendHash != m_BoundState.BlendHash ) { + auto blendState = GetOrCreateBlendState( pso.GetBlendState() ); + const float blendFactor[4] = { 0, 0, 0, 0 }; + m_Context->OMSetBlendState( blendState.Get(), blendFactor, pso.GetSampleMask() ); + m_BoundState.BlendHash = blendHash; + m_BoundState.SampleMask = pso.GetSampleMask(); + } else if ( pso.GetSampleMask() != m_BoundState.SampleMask ) { + // Same blend state but different sample mask — need to rebind + auto it = m_BlendStates.find( blendHash ); + if ( it != m_BlendStates.end() ) { + const float blendFactor[4] = { 0, 0, 0, 0 }; + m_Context->OMSetBlendState( it->second.Get(), blendFactor, pso.GetSampleMask() ); + } + m_BoundState.SampleMask = pso.GetSampleMask(); + } + + // --- Rasterizer State ---------------------------------------------------- + const size_t rastHash = pso.GetRasterizerState().Hash; + if ( rastHash != m_BoundState.RasterizerHash ) { + auto rastState = GetOrCreateRasterizerState( pso.GetRasterizerState() ); + m_Context->RSSetState( rastState.Get() ); + m_BoundState.RasterizerHash = rastHash; + } + + // --- Depth-Stencil State ------------------------------------------------- + const size_t dsHash = pso.GetDepthStencilState().Hash; + if ( dsHash != m_BoundState.DepthStencilHash ) { + auto dsState = GetOrCreateDepthStencilState( pso.GetDepthStencilState() ); + m_Context->OMSetDepthStencilState( dsState.Get(), 0 ); + m_BoundState.DepthStencilHash = dsHash; + } + + // --- Primitive Topology -------------------------------------------------- + const D3D11_PRIMITIVE_TOPOLOGY topology = pso.GetD3D11Topology(); + if ( topology != m_BoundState.Topology ) { + m_Context->IASetPrimitiveTopology( topology ); + m_BoundState.Topology = topology; + } + + // Mark whole PSO as bound + m_BoundState.PSOHash = pso.GetHash(); +} + +void D3D11PipelineStateCache::Invalidate() { + m_BoundState = BoundState{}; +} + +void D3D11PipelineStateCache::Clear() { + Invalidate(); + m_BlendStates.clear(); + m_RasterizerStates.clear(); + m_DepthStencilStates.clear(); +} + +// --------------------------------------------------------------------------- +// State object creation helpers +// --------------------------------------------------------------------------- + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateBlendState( const GothicBlendStateInfo& desc ) { + auto it = m_BlendStates.find( desc.Hash ); + if ( it != m_BlendStates.end() ) + return it->second; + + D3D11_BLEND_DESC bd = {}; + bd.AlphaToCoverageEnable = desc.AlphaToCoverage; + bd.IndependentBlendEnable = FALSE; + + bd.RenderTarget[0].BlendEnable = desc.BlendEnabled; + bd.RenderTarget[0].SrcBlend = static_cast( desc.SrcBlend ); + bd.RenderTarget[0].DestBlend = static_cast( desc.DestBlend ); + bd.RenderTarget[0].BlendOp = static_cast( desc.BlendOp ); + bd.RenderTarget[0].SrcBlendAlpha = static_cast( desc.SrcBlendAlpha ); + bd.RenderTarget[0].DestBlendAlpha = static_cast( desc.DestBlendAlpha ); + bd.RenderTarget[0].BlendOpAlpha = static_cast( desc.BlendOpAlpha ); + bd.RenderTarget[0].RenderTargetWriteMask = desc.ColorWritesEnabled + ? ( D3D11_COLOR_WRITE_ENABLE_RED | D3D11_COLOR_WRITE_ENABLE_GREEN | + D3D11_COLOR_WRITE_ENABLE_BLUE | D3D11_COLOR_WRITE_ENABLE_ALPHA ) + : 0; + + Microsoft::WRL::ComPtr state; + m_Device->CreateBlendState( &bd, state.GetAddressOf() ); + m_BlendStates[desc.Hash] = state; + return state; +} + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateRasterizerState( const GothicRasterizerStateInfo& desc ) { + auto it = m_RasterizerStates.find( desc.Hash ); + if ( it != m_RasterizerStates.end() ) + return it->second; + + D3D11_RASTERIZER_DESC rd = {}; + rd.CullMode = static_cast( desc.CullMode ); + rd.FillMode = desc.Wireframe ? D3D11_FILL_WIREFRAME : D3D11_FILL_SOLID; + rd.FrontCounterClockwise = desc.FrontCounterClockwise; + rd.DepthBias = desc.ZBias; + rd.DepthBiasClamp = 0; + rd.SlopeScaledDepthBias = 0; + rd.DepthClipEnable = desc.DepthClipEnable; + rd.ScissorEnable = false; + rd.MultisampleEnable = false; + rd.AntialiasedLineEnable = true; + + Microsoft::WRL::ComPtr state; + m_Device->CreateRasterizerState( &rd, state.GetAddressOf() ); + m_RasterizerStates[desc.Hash] = state; + return state; +} + +Microsoft::WRL::ComPtr +D3D11PipelineStateCache::GetOrCreateDepthStencilState( const GothicDepthBufferStateInfo& desc ) { + auto it = m_DepthStencilStates.find( desc.Hash ); + if ( it != m_DepthStencilStates.end() ) + return it->second; + + D3D11_DEPTH_STENCIL_DESC dd = {}; + dd.DepthEnable = desc.DepthBufferEnabled; + dd.DepthWriteMask = desc.DepthWriteEnabled ? D3D11_DEPTH_WRITE_MASK_ALL + : D3D11_DEPTH_WRITE_MASK_ZERO; + dd.DepthFunc = static_cast( desc.DepthBufferCompareFunc ); + + dd.StencilEnable = false; + dd.StencilReadMask = 0xFF; + dd.StencilWriteMask = 0xFF; + dd.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dd.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_INCR; + dd.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + dd.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + dd.BackFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dd.BackFace.StencilDepthFailOp = D3D11_STENCIL_OP_DECR; + dd.BackFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + dd.BackFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + + Microsoft::WRL::ComPtr state; + m_Device->CreateDepthStencilState( &dd, state.GetAddressOf() ); + m_DepthStencilStates[desc.Hash] = state; + return state; +} diff --git a/D3D11Engine/D3D11PipelineStateObject.h b/D3D11Engine/D3D11PipelineStateObject.h new file mode 100644 index 00000000..51408ac0 --- /dev/null +++ b/D3D11Engine/D3D11PipelineStateObject.h @@ -0,0 +1,193 @@ +#pragma once +#include "pch.h" +#include +#include +#include "GothicGraphicsState.h" + +class D3D11VShader; +class D3D11PShader; +class D3D11GShader; +class D3D11HDShader; + +struct GothicBlendStateInfo; +struct GothicRasterizerStateInfo; +struct GothicDepthBufferStateInfo; + +// Mirrors D3D12_PRIMITIVE_TOPOLOGY_TYPE +enum class PrimitiveTopologyType : uint8_t { + Undefined = 0, + Point = 1, + Line = 2, + Triangle = 3, + Patch = 4 +}; + +/** Converts PrimitiveTopologyType to the most common D3D11 topology for that type */ +inline D3D11_PRIMITIVE_TOPOLOGY ToD3D11Topology( PrimitiveTopologyType type ) { + switch ( type ) { + case PrimitiveTopologyType::Point: return D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; + case PrimitiveTopologyType::Line: return D3D11_PRIMITIVE_TOPOLOGY_LINELIST; + case PrimitiveTopologyType::Triangle: return D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + case PrimitiveTopologyType::Patch: return D3D11_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST; + default: return D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + } +} + +/** + * Immutable pipeline state object, modeled after D3D12_GRAPHICS_PIPELINE_STATE_DESC. + * + * Captures the full set of static pipeline configuration that in DX12 would be + * baked into a single ID3D12PipelineState: + * - Shader stages (VS, PS, GS, Hull/Domain) + * - Blend, Rasterizer, DepthStencil states + * - Primitive topology type + * - Sample mask / sample desc + * - Render-target and depth-stencil formats + * + * Once constructed the object is immutable. A hash is computed at creation + * time so that the PipelineStateCache can quickly detect redundant state sets. + */ +class D3D11PipelineStateObject { +public: + /** Descriptor used to build a PSO – fill this in then pass to the constructor. */ + struct Desc { + // --- Shader stages (nullable) ---------------------------------------- + std::shared_ptr VS; + std::shared_ptr PS; + std::shared_ptr GS; + std::shared_ptr HDS; // Hull + Domain (combined, matching existing codebase) + + // --- Fixed-function state -------------------------------------------- + GothicBlendStateInfo BlendState; + UINT SampleMask = 0xFFFFFFFF; + GothicRasterizerStateInfo RasterizerState; + GothicDepthBufferStateInfo DepthStencilState; + + // --- Input assembly -------------------------------------------------- + PrimitiveTopologyType TopologyType = PrimitiveTopologyType::Triangle; + + // --- Render target description (for future DX12) --------------------- + UINT NumRenderTargets = 1; + DXGI_FORMAT RTVFormats[8] = {}; + DXGI_FORMAT DSVFormat = DXGI_FORMAT_D32_FLOAT; + DXGI_SAMPLE_DESC SampleDesc = { 1, 0 }; + + Desc(); + }; + + explicit D3D11PipelineStateObject( const Desc& desc ); + + // --- Accessors (const, PSO is immutable) --------------------------------- + + size_t GetHash() const { return m_Hash; } + bool operator==( const D3D11PipelineStateObject& o ) const { return m_Hash == o.m_Hash; } + bool operator!=( const D3D11PipelineStateObject& o ) const { return m_Hash != o.m_Hash; } + + const std::shared_ptr& GetVS() const { return m_VS; } + const std::shared_ptr& GetPS() const { return m_PS; } + const std::shared_ptr& GetGS() const { return m_GS; } + const std::shared_ptr& GetHDS() const { return m_HDS; } + + const GothicBlendStateInfo& GetBlendState() const { return m_BlendState; } + const GothicRasterizerStateInfo& GetRasterizerState() const { return m_RasterizerState; } + const GothicDepthBufferStateInfo& GetDepthStencilState() const { return m_DepthStencilState; } + + UINT GetSampleMask() const { return m_SampleMask; } + PrimitiveTopologyType GetTopologyType() const { return m_TopologyType; } + D3D11_PRIMITIVE_TOPOLOGY GetD3D11Topology() const { return ToD3D11Topology( m_TopologyType ); } + + UINT GetNumRenderTargets() const { return m_NumRenderTargets; } + DXGI_FORMAT GetRTVFormat( UINT i ) const { return (i < 8) ? m_RTVFormats[i] : DXGI_FORMAT_UNKNOWN; } + DXGI_FORMAT GetDSVFormat() const { return m_DSVFormat; } + const DXGI_SAMPLE_DESC& GetSampleDesc() const { return m_SampleDesc; } + +private: + void ComputeHash(); + + // Shaders + std::shared_ptr m_VS; + std::shared_ptr m_PS; + std::shared_ptr m_GS; + std::shared_ptr m_HDS; + + // Fixed-function state (stored by value – small POD structs) + GothicBlendStateInfo m_BlendState; + UINT m_SampleMask; + GothicRasterizerStateInfo m_RasterizerState; + GothicDepthBufferStateInfo m_DepthStencilState; + + // Input assembly + PrimitiveTopologyType m_TopologyType; + + // Render target description + UINT m_NumRenderTargets; + DXGI_FORMAT m_RTVFormats[8]; + DXGI_FORMAT m_DSVFormat; + DXGI_SAMPLE_DESC m_SampleDesc; + + // Combined hash of the entire PSO + size_t m_Hash = 0; +}; + +/** + * Pipeline-state cache that tracks which D3D11 states are currently bound and + * performs the minimal set of API calls when switching to a new PSO. + * + * Usage: + * cache.SetPipelineState(myPSO); // binds everything that changed + * + * Internally caches the D3D11 blend / rasterizer / depth-stencil state COM + * objects so they are created at most once per unique configuration. + */ +class D3D11PipelineStateCache { +public: + D3D11PipelineStateCache() = default; + + /** Initialise with the D3D11 device and immediate context. */ + void Init( ID3D11Device1* device, ID3D11DeviceContext1* context ); + + /** + * Apply a pipeline state object. Only the state that differs from the + * currently bound state will be set on the device context. + */ + void SetPipelineState( const D3D11PipelineStateObject& pso ); + + /** + * Mark all tracked state as unknown, forcing the next SetPipelineState + * to re-bind everything. Call this when external code (e.g. the Gothic + * engine) may have changed D3D11 state behind the cache's back. + */ + void Invalidate(); + + /** Release all cached D3D11 state objects. */ + void Clear(); + +private: + // --- Cached D3D11 state objects (keyed by Gothic state hash) ------------- + Microsoft::WRL::ComPtr GetOrCreateBlendState( const GothicBlendStateInfo& desc ); + Microsoft::WRL::ComPtr GetOrCreateRasterizerState( const GothicRasterizerStateInfo& desc ); + Microsoft::WRL::ComPtr GetOrCreateDepthStencilState( const GothicDepthBufferStateInfo& desc ); + + ID3D11Device1* m_Device = nullptr; + ID3D11DeviceContext1* m_Context = nullptr; + + // --- Currently bound state (tracked to skip redundant API calls) --------- + struct BoundState { + size_t PSOHash = 0; + size_t VSHash = 0; + size_t PSHash = 0; + size_t GSHash = 0; + size_t HDSHash = 0; + size_t BlendHash = 0; + size_t RasterizerHash = 0; + size_t DepthStencilHash = 0; + UINT SampleMask = 0xFFFFFFFF; + D3D11_PRIMITIVE_TOPOLOGY Topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + }; + BoundState m_BoundState{}; + + // --- State object caches (one D3D11 object per unique hash) -------------- + std::unordered_map> m_BlendStates; + std::unordered_map> m_RasterizerStates; + std::unordered_map> m_DepthStencilStates; +}; diff --git a/D3D11Engine/D3D11ShaderManager.cpp b/D3D11Engine/D3D11ShaderManager.cpp index 04bdb712..e0186a62 100644 --- a/D3D11Engine/D3D11ShaderManager.cpp +++ b/D3D11Engine/D3D11ShaderManager.cpp @@ -258,6 +258,13 @@ XRESULT D3D11ShaderManager::Init() { Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_Wind ) ); + Shaders.push_back( ShaderInfo( "VS_ExInstancedObjIndirectAtlas", "VS_ExInstancedObjIndirectAtlas.hlsl", "v", 12 ) ); + Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); + Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_Wind ) ); + + // World mesh atlas vertex shader (uses same layout 12: ExVertexStruct + uint instance remap) + Shaders.push_back( ShaderInfo( "VS_ExWorldAtlas", "VS_ExWorldAtlas.hlsl", "v", 12 ) ); + Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); Shaders.push_back( ShaderInfo( "VS_ExInstanced", "VS_ExInstanced.hlsl", "v", 4 ) ); Shaders.back().cBufferSizes.push_back( sizeof( VS_ExConstantBuffer_PerFrame ) ); @@ -403,6 +410,34 @@ XRESULT D3D11ShaderManager::Init() { Shaders.back().cBufferSizes.push_back( sizeof( MaterialInfo::Buffer ) ); Shaders.back().cBufferSizes.push_back( sizeof( PerObjectState ) ); + Shaders.push_back( ShaderInfo( "PS_DiffuseAtlas", "PS_DiffuseAtlas.hlsl", "p", makros ) ); + Shaders.back().cBufferSizes.push_back( sizeof( GothicGraphicsState ) ); + Shaders.back().cBufferSizes.push_back( sizeof( AtmosphereConstantBuffer ) ); + Shaders.back().cBufferSizes.push_back( sizeof( MaterialInfo::Buffer ) ); + Shaders.back().cBufferSizes.push_back( sizeof( float4 ) ); // DIST_Distance + + makros.clear(); + m.Name = "NORMALMAPPING"; + m.Definition = "0"; + makros.push_back( m ); + m.Name = "ALPHATEST"; + m.Definition = "1"; + makros.push_back( m ); + + Shaders.push_back( ShaderInfo( "PS_DiffuseAtlasAlphaTest", "PS_DiffuseAtlas.hlsl", "p", makros ) ); + Shaders.back().cBufferSizes.push_back( sizeof( GothicGraphicsState ) ); + Shaders.back().cBufferSizes.push_back( sizeof( AtmosphereConstantBuffer ) ); + Shaders.back().cBufferSizes.push_back( sizeof( MaterialInfo::Buffer ) ); + Shaders.back().cBufferSizes.push_back( sizeof( float4 ) ); // DIST_Distance + + // World mesh atlas PS — flags-driven normal/FX/alpha-test in a single shader + makros.clear(); + Shaders.push_back( ShaderInfo( "PS_WorldAtlas", "PS_WorldAtlas.hlsl", "p", makros ) ); + Shaders.back().cBufferSizes.push_back( sizeof( GothicGraphicsState ) ); + Shaders.back().cBufferSizes.push_back( sizeof( AtmosphereConstantBuffer ) ); + Shaders.back().cBufferSizes.push_back( sizeof( MaterialInfo::Buffer ) ); + Shaders.back().cBufferSizes.push_back( sizeof( float4 ) ); // DIST_Distance + Shaders.push_back( ShaderInfo( "PS_PortalDiffuse", "PS_PortalDiffuse.hlsl", "p" ) ); //forest portals, doors, etc. Shaders.push_back( ShaderInfo( "PS_WaterfallFoam", "PS_WaterfallFoam.hlsl", "p" ) ); //foam on at the base of waterfalls @@ -588,19 +623,26 @@ XRESULT D3D11ShaderManager::Init() { casInfo.cBufferSizes.push_back( sizeof( CASConstantBuffer ) ); Shaders.push_back( casInfo ); - // FSR1 EASU (Edge Adaptive Spatial Upsampling) Shader - ShaderInfo fsr1EasuInfo( "PS_PFX_FSR1_EASU", "PS_PFX_FSR1_EASU.hlsl", "p", makros ); - fsr1EasuInfo.cBufferSizes.push_back( sizeof( FSR1EASUConstantBuffer ) ); - Shaders.push_back( fsr1EasuInfo ); - - // FSR1 RCAS (Robust Contrast Adaptive Sharpening) Shader - ShaderInfo fsr1RcasInfo( "PS_PFX_FSR1_RCAS", "PS_PFX_FSR1_RCAS.hlsl", "p", makros ); - fsr1RcasInfo.cBufferSizes.push_back( sizeof( FSR1RCASConstantBuffer ) ); - Shaders.push_back( fsr1RcasInfo ); if ( !FeatureLevel10Compatibility ) { + // FSR1 EASU (Edge Adaptive Spatial Upsampling) Shader + ShaderInfo fsr1EasuInfo( "PS_PFX_FSR1_EASU", "PS_PFX_FSR1_EASU.hlsl", "p", makros ); + fsr1EasuInfo.cBufferSizes.push_back( sizeof( FSR1EASUConstantBuffer ) ); + Shaders.push_back( fsr1EasuInfo ); + + // FSR1 RCAS (Robust Contrast Adaptive Sharpening) Shader + ShaderInfo fsr1RcasInfo( "PS_PFX_FSR1_RCAS", "PS_PFX_FSR1_RCAS.hlsl", "p", makros ); + fsr1RcasInfo.cBufferSizes.push_back( sizeof( FSR1RCASConstantBuffer ) ); + Shaders.push_back( fsr1RcasInfo ); + Shaders.push_back( ShaderInfo( "CS_AdvanceRain", "CS_AdvanceRain.hlsl", "c" ) ); Shaders.back().cBufferSizes.push_back( sizeof( AdvanceRainConstantBuffer ) ); + + Shaders.push_back( ShaderInfo( "CS_CullVobs", "CS_CullVobs.hlsl", "c" ) ); + Shaders.back().cBufferSizes.push_back( sizeof( CullConstants ) ); + + Shaders.push_back( ShaderInfo( "CS_BuildHiZ", "CS_BuildHiZ.hlsl", "c" ) ); + Shaders.back().cBufferSizes.push_back( sizeof( HiZBuildConstants ) ); } return XR_SUCCESS; diff --git a/D3D11Engine/D3D11ShadowMap.cpp b/D3D11Engine/D3D11ShadowMap.cpp index 27b02a5f..4db777b8 100644 --- a/D3D11Engine/D3D11ShadowMap.cpp +++ b/D3D11Engine/D3D11ShadowMap.cpp @@ -176,10 +176,6 @@ void D3D11ShadowMap::Init( Microsoft::WRL::ComPtr& device, Micros m_cascadedShadowMap = std::make_unique(); m_cascadedShadowMap->Init( m_device, s, MAX_CSM_CASCADES ); - for ( int i = 0; i < MAX_CSM_CASCADES; ++i ) { - m_RenderQueues[i] = std::make_unique( device.Get(), context.Get() ); - } - Resize( s ); } @@ -195,14 +191,14 @@ void D3D11ShadowMap::Resize( int size ) { } } -void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) { // Bind the cascaded shadow map (Texture2DArray) if ( m_cascadedShadowMap ) { m_cascadedShadowMap->BindToPixelShader( context, slot ); } } -void D3D11ShadowMap::BindSampler( ID3D11DeviceContext1* context, UINT slot ) { +void D3D11ShadowMap::BindSampler( ID3D11DeviceContext* context, UINT slot ) { if ( m_shadowmapSampler ) context->PSSetSamplers( slot, 1, m_shadowmapSampler.GetAddressOf() ); } @@ -417,52 +413,6 @@ XRESULT D3D11ShadowMap::PrepareRender() } } - // Collect all VOBs inside our shadow draw distance (last frustum) - - static std::vector potentialCasters; - static std::vector _1; - static std::vector _2; - potentialCasters.reserve(1024); - potentialCasters.clear(); - - { - RndCullContext ctx; - LegacyRenderQueueProxy q(potentialCasters, _1, _2); - - ctx.queue = &q; - ctx.frustum = m_CascadeCRs[numCascades-1].frustum; - ctx.cameraPosition = m_CascadeCRs[numCascades-1].PositionReplacement; - ctx.stage = RenderStage::STAGE_DRAW_SHADOWS; - ctx.drawDistances.OutdoorVobs = settings.ShadowDrawDistance; - ctx.drawDistances.OutdoorVobsSmall = settings.ShadowDrawDistance; - - Engine::GAPI->CollectVisibleVobs( ctx ); - } - - auto invView = XMMatrixTranspose(XMLoadFloat4x4(&zCCamera::GetCamera()->GetTransformDX( zCCamera::ETransformType::TT_VIEW_INV ))); - auto camPos = invView.r[3]; - XMVECTOR camForward = XMVector3Normalize( invView.r[2]); - - for ( int i = 0; i < numCascades; ++i ) { - m_RenderQueues[i]->Reset(); - } - - for (auto vob : potentialCasters ) { - - auto boundingSphere = Frustum::BSphereFromzTBBox3D(vob->Vob->GetBBox()); - if ( numCascades > 0 && m_CascadeCRs[0].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[0]->GetVobs().push_back( vob ); - - if ( numCascades > 1 && m_ShouldUpdateCascade[1] && m_CascadeCRs[1].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[1]->GetVobs().push_back( vob ); - - if ( numCascades > 2 && m_ShouldUpdateCascade[2] && m_CascadeCRs[2].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[2]->GetVobs().push_back( vob ); - - if ( numCascades > 3 && m_ShouldUpdateCascade[3] && m_CascadeCRs[3].frustum.Intersects( boundingSphere ) ) - m_RenderQueues[3]->GetVobs().push_back( vob ); - } - return XR_SUCCESS; } @@ -648,7 +598,6 @@ XRESULT D3D11ShadowMap::DrawWorldShadow( ) RenderShadowmaps( renderParams ); Engine::GAPI->SetCameraReplacementPtr( nullptr ); - m_RenderQueues[cascadeIdx]->Reset(); } } @@ -664,7 +613,7 @@ XRESULT D3D11ShadowMap::DrawRainShadowmap() { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto _ = graphicsEngine->RecordGraphicsEvent( L"DrawRainShadowmap" ); - graphicsEngine->Effects->DrawRainShadowmap(); + return graphicsEngine->Effects->DrawRainShadowmap(); } return XR_SUCCESS; } @@ -674,7 +623,9 @@ XRESULT D3D11ShadowMap::DrawPointlightLights( RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer& specular, - RenderToTextureBuffer& depthCopy + RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ) { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto _ = graphicsEngine->RecordGraphicsEvent( L"DrawPointlightLights" ); @@ -694,13 +645,10 @@ XRESULT D3D11ShadowMap::DrawPointlightLights( if ( settings.LimitLightIntesity ) { Engine::GAPI->GetRendererState().BlendState.BlendOp = GothicBlendStateInfo::BO_BLEND_OP_MAX; } - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthWriteEnabled = false; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->SetupVS_ExMeshDrawCall(); graphicsEngine->SetupVS_ExConstantBuffer(); @@ -709,11 +657,14 @@ XRESULT D3D11ShadowMap::DrawPointlightLights( graphicsEngine->CopyDepthStencil(); // Set the main rendertarget - m_context->OMSetRenderTargets( 1, graphicsEngine->GetHDRBackBuffer().GetRenderTargetView().GetAddressOf(), graphicsEngine->GetDepthBuffer()->GetDepthStencilView().Get() ); + m_context->OMSetRenderTargets( 1, &outputRTV, dsv ); DS_PointLightConstantBuffer plcb = {}; - XMStoreFloat4x4( &plcb.PL_InvProj, XMMatrixInverse( nullptr, XMLoadFloat4x4( &Engine::GAPI->GetProjectionMatrix() ) ) ); + { + auto& proj = Engine::GAPI->GetProjectionMatrix(); + plcb.PL_ProjParams = float4( 1.0f / proj._11, 1.0f / proj._22, proj._43, proj._33 ); + } XMStoreFloat4x4( &plcb.PL_InvView, XMMatrixInverse( nullptr, XMLoadFloat4x4( &Engine::GAPI->GetRendererState().TransformState.TransformView ) ) ); plcb.PL_ViewportSize = Engine::GraphicsEngine->GetResolution(); @@ -793,16 +744,12 @@ XRESULT D3D11ShadowMap::DrawPointlightLights( if ( Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled ) { Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled = false; Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_FRONT; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->UpdateRenderStates(); } } else { if ( !Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled ) { Engine::GAPI->GetRendererState().DepthState.DepthBufferEnabled = true; Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_BACK; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); graphicsEngine->UpdateRenderStates(); } } @@ -836,7 +783,9 @@ XRESULT D3D11ShadowMap::DrawLighting( RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer& specular, - RenderToTextureBuffer& depthCopy) { + RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv) { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto& settings = Engine::GAPI->GetRendererState().RendererSettings; @@ -853,12 +802,11 @@ XRESULT D3D11ShadowMap::DrawLighting( Engine::GAPI->SetFarPlane(static_cast(settings.SectionDrawRadius) * WORLD_SECTION_SIZE ); - DrawPointlightLights(lights, color, normals, specular, depthCopy); + DrawPointlightLights(lights, color, normals, specular, depthCopy, outputRTV, dsv); - DrawWorldLights(); + DrawWorldLights( outputRTV ); - m_context->OMSetRenderTargets( 1, graphicsEngine->GetHDRBackBuffer().GetRenderTargetView().GetAddressOf(), - graphicsEngine->GetDepthBuffer()->GetDepthStencilView().Get() ); + m_context->OMSetRenderTargets( 1, &outputRTV, dsv ); return XR_SUCCESS; } @@ -911,7 +859,6 @@ void D3D11ShadowMap::RenderShadowmaps( const RenderShadowmapsParams& params ) { m_context->OMSetRenderTargets( 1, params.DebugRTV.GetAddressOf(), dsvOverwrite.Get() ); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; } - Engine::GAPI->GetRendererState().BlendState.SetDirty(); // Dont render shadows from the sun when it isn't on the sky if ( isNotWorldShadowMap || @@ -956,26 +903,23 @@ void D3D11ShadowMap::RenderShadowmaps( const RenderShadowmapsParams& params ) { WORLD_SECTION_SIZE ); } -XRESULT D3D11ShadowMap::DrawWorldLights() +XRESULT D3D11ShadowMap::DrawWorldLights(ID3D11RenderTargetView* outputRTV) { auto graphicsEngine = reinterpret_cast(Engine::GraphicsEngine); auto _ = graphicsEngine->RecordGraphicsEvent( L"DrawWorldLights" ); auto& settings = Engine::GAPI->GetRendererState().RendererSettings; - + Engine::GAPI->GetRendererState().BlendState.BlendOp = GothicBlendStateInfo::BO_BLEND_OP_ADD; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); Engine::GAPI->GetRendererState().DepthState.DepthBufferCompareFunc = GothicDepthBufferStateInfo::CF_COMPARISON_ALWAYS; - Engine::GAPI->GetRendererState().DepthState.SetDirty(); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); // Modify light when raining float rain = Engine::GAPI->GetRainFXWeight(); float wetness = Engine::GAPI->GetSceneWetness(); - - XMMATRIX view = XMMatrixTranspose(Engine::GAPI->GetViewMatrixXM()); + + XMMATRIX view = XMMatrixTranspose( Engine::GAPI->GetViewMatrixXM() ); bool isSnow = oCGame::GetGame() && oCGame::GetGame()->_zCSession_world @@ -983,7 +927,7 @@ XRESULT D3D11ShadowMap::DrawWorldLights() && oCGame::GetGame()->_zCSession_world->GetSkyControllerOutdoor()->GetWeatherType() == zTWEATHER_SNOW; // Switch global light shader when raining - if ( wetness > 0.0f && !isSnow) { + if ( wetness > 0.0f && !isSnow ) { // Same shader, just has a DEFINE set to enable rain-related effects graphicsEngine->SetActivePixelShader( "PS_DS_AtmosphericScattering_Rain" ); } else { @@ -998,11 +942,18 @@ XRESULT D3D11ShadowMap::DrawWorldLights() graphicsEngine->GetActivePS()->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); graphicsEngine->GetActivePS()->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + auto& proj = Engine::GAPI->GetProjectionMatrix(); DS_ScreenQuadConstantBuffer scb = {}; - XMStoreFloat4x4( &scb.SQ_InvProj, XMMatrixInverse( nullptr, XMLoadFloat4x4( &Engine::GAPI->GetProjectionMatrix() ) ) ); + scb.SQ_ProjParams = float4( 1.0f / proj._11, 1.0f / proj._22, proj._43, proj._33 ); XMStoreFloat4x4( &scb.SQ_InvView, XMMatrixInverse( nullptr, XMLoadFloat4x4( &Engine::GAPI->GetRendererState().TransformState.TransformView ) ) ); scb.SQ_View = Engine::GAPI->GetRendererState().TransformState.TransformView; + static uint32_t frameCounter = 0; + if ( proj._13 != 0 && proj._23 != 0) { + // only when we have jitter in the frame + scb.SQ_FrameIndex = frameCounter++; + } + XMStoreFloat3( scb.SQ_LightDirectionVS.toXMFLOAT3(), XMVector3TransformNormal( XMLoadFloat3( sky->GetAtmosphereCB().AC_LightPos.toXMFLOAT3() ), view ) ); @@ -1020,15 +971,20 @@ XRESULT D3D11ShadowMap::DrawWorldLights() // CSM: Alle Cascade-Matrizen setzen for ( size_t cascadeIdx = 0; cascadeIdx < MAX_CSM_CASCADES; ++cascadeIdx ) { - scb.SQ_ShadowView[cascadeIdx] = m_CascadeCRs[cascadeIdx].ViewReplacement; - scb.SQ_ShadowProj[cascadeIdx] = m_CascadeCRs[cascadeIdx].ProjectionReplacement; + XMStoreFloat4x4( &scb.SQ_ShadowViewProj[cascadeIdx], + XMLoadFloat4x4( &m_CascadeCRs[cascadeIdx].ProjectionReplacement ) * + XMLoadFloat4x4( &m_CascadeCRs[cascadeIdx].ViewReplacement ) + ); } scb.SQ_ShadowmapSize = static_cast( this->GetSizeX() ); // Get rain matrix - scb.SQ_RainView = graphicsEngine->Effects->GetRainShadowmapCameraRepl().ViewReplacement; - scb.SQ_RainProj = graphicsEngine->Effects->GetRainShadowmapCameraRepl().ProjectionReplacement; + + XMStoreFloat4x4( &scb.SQ_RainViewProj, + XMLoadFloat4x4( &graphicsEngine->Effects->GetRainShadowmapCameraRepl().ViewReplacement )* + XMLoadFloat4x4( &graphicsEngine->Effects->GetRainShadowmapCameraRepl().ProjectionReplacement ) + ); scb.SQ_ShadowStrength = settings.ShadowStrength; scb.SQ_ShadowAOStrength = settings.ShadowAOStrength; @@ -1060,7 +1016,7 @@ XRESULT D3D11ShadowMap::DrawWorldLights() graphicsEngine->GetActivePS()->GetConstantBuffer()[0]->BindToPixelShader( 0 ); PFXVS_ConstantBuffer vscb; - vscb.PFXVS_InvProj = scb.SQ_InvProj; + vscb.PFXVS_ProjParams = scb.SQ_ProjParams; graphicsEngine->GetActiveVS()->GetConstantBuffer()[0]->UpdateBuffer( &vscb ); graphicsEngine->GetActiveVS()->GetConstantBuffer()[0]->BindToVertexShader( 0 ); @@ -1142,12 +1098,10 @@ void XM_CALLCONV D3D11ShadowMap::RenderShadowCube( Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; // Should be false, but needs to be true for SV_Depth to work - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } else { m_context->OMSetRenderTargets( 1, debugRTV.GetAddressOf(), face.Get() ); Engine::GAPI->GetRendererState().BlendState.ColorWritesEnabled = true; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } // Always render shadowcube when dynamic shadows are enabled diff --git a/D3D11Engine/D3D11ShadowMap.h b/D3D11Engine/D3D11ShadowMap.h index 8f4dd79b..4bb16cd1 100644 --- a/D3D11Engine/D3D11ShadowMap.h +++ b/D3D11Engine/D3D11ShadowMap.h @@ -94,10 +94,10 @@ class D3D11ShadowMap { } // Bind world shadowmap SRV to a pixel shader slot (binds entire cascade array) - void BindToPixelShader( ID3D11DeviceContext1* context, UINT slot ); + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ); // Bind the shadowmap sampler to the given slot - void BindSampler( ID3D11DeviceContext1* context, UINT slot ); + void BindSampler( ID3D11DeviceContext* context, UINT slot ); XRESULT PrepareRender(); @@ -111,14 +111,18 @@ class D3D11ShadowMap { XRESULT DrawWorldShadow(); XRESULT DrawRainShadowmap(); XRESULT DrawPointlightLights(std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer - & specular, RenderToTextureBuffer& depthCopy); + & specular, RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ); /** Renders the shadowmaps for the sun using parameter struct */ void RenderShadowmaps( const RenderShadowmapsParams& params ); - XRESULT DrawWorldLights(); + XRESULT DrawWorldLights( ID3D11RenderTargetView* outputRTV ); XRESULT DrawLighting(std::vector& lights, RenderToTextureBuffer& color, RenderToTextureBuffer& normals, RenderToTextureBuffer - & specular, RenderToTextureBuffer& depthCopy); + & specular, RenderToTextureBuffer& depthCopy, + ID3D11RenderTargetView* outputRTV, + ID3D11DepthStencilView* dsv ); void XM_CALLCONV RenderShadowCube( DirectX::FXMVECTOR position, float range, @@ -142,7 +146,6 @@ class D3D11ShadowMap { /* 4 */ { 0.98f, 1.9f }, // Players should really want to use 4 cascades for best quality }; - D3D11RenderQueue* GetRenderQueue( int cascadeIndex ) { return m_RenderQueues[cascadeIndex].get(); } private: Microsoft::WRL::ComPtr m_device; Microsoft::WRL::ComPtr m_context; @@ -154,7 +157,6 @@ class D3D11ShadowMap { Microsoft::WRL::ComPtr m_shadowmapSampler; std::array m_CascadeCRs; - std::array, MAX_CSM_CASCADES> m_RenderQueues; std::vector m_CascadeSplits; std::array m_ShouldUpdateCascade; XMFLOAT3 m_WorldShadowPos; diff --git a/D3D11Engine/D3D11StructuredBuffer.h b/D3D11Engine/D3D11StructuredBuffer.h new file mode 100644 index 00000000..a47dcfd5 --- /dev/null +++ b/D3D11Engine/D3D11StructuredBuffer.h @@ -0,0 +1,135 @@ +#pragma once + +#include "pch.h" +#include + +// Templated structured buffer for GPU compute/shader access +template +class D3D11StructuredBuffer { +public: + D3D11StructuredBuffer() : ElementCount( 0 ), MaxElementCount( 0 ) {} + + ~D3D11StructuredBuffer() = default; + + // Initialize the buffer with a maximum capacity + HRESULT Init( ID3D11Device* device, UINT maxElements, bool cpuWrite = true, bool gpuWrite = false ) { + MaxElementCount = maxElements; + ElementCount = 0; + + D3D11_BUFFER_DESC desc = {}; + desc.ByteWidth = sizeof( T ) * maxElements; + desc.StructureByteStride = sizeof( T ); + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + + if ( cpuWrite ) { + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + } else if ( gpuWrite ) { + desc.Usage = D3D11_USAGE_DEFAULT; + desc.CPUAccessFlags = 0; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE + | (device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0 ? D3D11_BIND_UNORDERED_ACCESS : 0); + } else { + desc.Usage = D3D11_USAGE_DEFAULT; + desc.CPUAccessFlags = 0; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + } + + HRESULT hr = device->CreateBuffer( &desc, nullptr, Buffer.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + + // Create SRV + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = maxElements; + + hr = device->CreateShaderResourceView( Buffer.Get(), &srvDesc, SRV.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + + // Create UAV if GPU writable + if ( gpuWrite ) { + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = maxElements; + + hr = device->CreateUnorderedAccessView( Buffer.Get(), &uavDesc, UAV.GetAddressOf() ); + if ( FAILED( hr ) ) return hr; + } + + return S_OK; + } + + // Update buffer contents (for dynamic buffers) + HRESULT UpdateBuffer( ID3D11DeviceContext* context, const T* data, UINT count ) { + if ( count > MaxElementCount ) { + LogError() << "StructuredBuffer overflow: " << count << " > " << MaxElementCount; + count = MaxElementCount; + } + + ElementCount = count; + + D3D11_MAPPED_SUBRESOURCE mapped; + HRESULT hr = context->Map( Buffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped ); + if ( FAILED( hr ) ) return hr; + + memcpy( mapped.pData, data, sizeof( T ) * count ); + context->Unmap( Buffer.Get(), 0 ); + + return S_OK; + } + + // Update buffer contents (for default buffers) + void UpdateBufferDefault( ID3D11DeviceContext* context, const T* data, UINT count ) { + if ( count > MaxElementCount ) { + LogError() << "StructuredBuffer overflow: " << count << " > " << MaxElementCount; + count = MaxElementCount; + } + ElementCount = count; + context->UpdateSubresource( Buffer.Get(), 0, nullptr, data, 0, 0 ); + } + + // Bind to vertex shader + void BindToVertexShader( ID3D11DeviceContext* context, UINT slot ) { + context->VSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Bind to pixel shader + void BindToPixelShader( ID3D11DeviceContext* context, UINT slot ) { + context->PSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Unbind from vertex shader + void UnbindFromVertexShader( ID3D11DeviceContext* context, UINT slot ) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->VSSetShaderResources( slot, 1, &nullSRV ); + } + + // Bind to compute shader (SRV) + void BindToComputeShader( ID3D11DeviceContext* context, UINT slot ) { + context->CSSetShaderResources( slot, 1, SRV.GetAddressOf() ); + } + + // Unbind from compute shader + void UnbindFromComputeShader( ID3D11DeviceContext* context, UINT slot ) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources( slot, 1, &nullSRV ); + } + + UINT GetElementCount() const { return ElementCount; } + UINT GetMaxElementCount() const { return MaxElementCount; } + ID3D11Buffer* GetBuffer() const { return Buffer.Get(); } + ID3D11ShaderResourceView* GetSRV() const { return SRV.Get(); } + ID3D11UnorderedAccessView* GetUAV() const { return UAV.Get(); } + +private: + Microsoft::WRL::ComPtr Buffer; + Microsoft::WRL::ComPtr SRV; + Microsoft::WRL::ComPtr UAV; + UINT ElementCount; + UINT MaxElementCount; +}; diff --git a/D3D11Engine/D3D11Texture.cpp b/D3D11Engine/D3D11Texture.cpp index eddd39af..a5699692 100644 --- a/D3D11Engine/D3D11Texture.cpp +++ b/D3D11Engine/D3D11Texture.cpp @@ -14,7 +14,13 @@ static size_t ConvertedDataSize = 0; unsigned char* ConvertTextureData( UINT TextureWidth, UINT TextureHeight, DXGI_FORMAT TextureFormat, unsigned char* data ) { UINT realDataSize = TextureWidth * TextureHeight * 4; - ConvertedData = reinterpret_cast( malloc( realDataSize ) ); + if ( ConvertedDataSize < realDataSize ) { + if (ConvertedData) { + free( ConvertedData ); + } + ConvertedData = reinterpret_cast( malloc( realDataSize ) ); + ConvertedDataSize = realDataSize; + } if ( TextureFormat == DXGI_FORMAT_B5G6R5_UNORM ) { Convert565to8888( ConvertedData, data, realDataSize ); } else if ( TextureFormat == DXGI_FORMAT_B5G5R5A1_UNORM ) { @@ -98,52 +104,26 @@ XRESULT D3D11Texture::Init( const std::string& file ) { /** Updates the Texture-Object */ XRESULT D3D11Texture::UpdateData( void* data, int mip ) { D3D11GraphicsEngineBase* engine = reinterpret_cast(Engine::GraphicsEngine); - if ( ConvertedData ) { - free( ConvertedData ); - ConvertedData = nullptr; - } UINT TextureWidth = (TextureSize.x >> mip); UINT TextureHeight = (TextureSize.y >> mip); - Microsoft::WRL::ComPtr stagingTexture; - D3D11_TEXTURE2D_DESC stagingTextureDesc; - Texture.Get()->GetDesc( &stagingTextureDesc ); - stagingTextureDesc.Width = TextureWidth; - stagingTextureDesc.Height = TextureHeight; - stagingTextureDesc.MipLevels = 1; - stagingTextureDesc.BindFlags = 0; - stagingTextureDesc.MiscFlags = 0; - stagingTextureDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - stagingTextureDesc.Usage = D3D11_USAGE_STAGING; + void* srcData = data; + UINT rowPitch = GetRowPitchBytes( mip ); - D3D11_SUBRESOURCE_DATA stagingTextureData; if ( Is16BitTexture() && !NativeSupport16BitTextures ) { - stagingTextureData.pSysMem = ConvertTextureData( TextureWidth, TextureHeight, TextureFormat, reinterpret_cast( data ) ); - stagingTextureData.SysMemPitch = GetRowPitchBytes( mip ) * 2; - } else { - stagingTextureData.pSysMem = data; - stagingTextureData.SysMemPitch = GetRowPitchBytes( mip ); + srcData = ConvertTextureData( TextureWidth, TextureHeight, TextureFormat, reinterpret_cast( data ) ); + rowPitch = GetRowPitchBytes( mip ) * 2; } - stagingTextureData.SysMemSlicePitch = 0; - HRESULT result = engine->GetDevice()->CreateTexture2D( &stagingTextureDesc, &stagingTextureData, stagingTexture.ReleaseAndGetAddressOf() ); - if ( FAILED( result ) ) - return XR_FAILED; - - SetDebugName( stagingTexture.Get(), "D3D11Texture->UpdateData->stagingTexture" ); - - engine->GetContext()->CopySubresourceRegion( Texture.Get(), mip, 0, 0, 0, stagingTexture.Get(), 0, nullptr ); + // UpdateSubresource directly into the DEFAULT texture — no staging texture needed + engine->GetContext()->UpdateSubresource( Texture.Get(), mip, nullptr, srcData, rowPitch, 0 ); return XR_SUCCESS; } /** Updates the Texture-Object using the deferred context (For loading in an other thread) */ XRESULT D3D11Texture::UpdateDataDeferred( void* data, int mip ) { D3D11GraphicsEngineBase* engine = reinterpret_cast(Engine::GraphicsEngine); - if ( ConvertedData ) { - free( ConvertedData ); - ConvertedData = nullptr; - } UINT TextureWidth = (TextureSize.x >> mip); UINT TextureHeight = (TextureSize.y >> mip); diff --git a/D3D11Engine/D3D11TextureAtlasManager.h b/D3D11Engine/D3D11TextureAtlasManager.h new file mode 100644 index 00000000..660294a4 --- /dev/null +++ b/D3D11Engine/D3D11TextureAtlasManager.h @@ -0,0 +1,277 @@ +#pragma once +#include "pch.h" + +#include +#include +#include +#include +#include +#include "ConstantBufferStructs.h" + +// Internal struct for bin packing +struct PackItem { + int originalIndex; + UINT width; + UINT height; + UINT x, y, slice; + ID3D11Texture2D* texture; + D3D11_TEXTURE2D_DESC desc; +}; + +class TextureManager { +private: + // Helper to align sizes for power-of-two mip boundaries + static UINT Align( UINT value, UINT alignment ) { + return (value + alignment - 1) & ~(alignment - 1); + } + + // Returns the block size for BC compressed formats (4), or 1 for uncompressed + static UINT GetBlockSize( DXGI_FORMAT fmt ) { + switch ( fmt ) { + case DXGI_FORMAT_BC1_UNORM: case DXGI_FORMAT_BC1_UNORM_SRGB: + case DXGI_FORMAT_BC2_UNORM: case DXGI_FORMAT_BC2_UNORM_SRGB: + case DXGI_FORMAT_BC3_UNORM: case DXGI_FORMAT_BC3_UNORM_SRGB: + return 4; + default: return 1; + } + } + + // Generates the mip levels that are missing from the source texture (item.desc.MipLevels < mipLevels) + // by decompressing the last source mip, running box-filter downsampling, re-compressing to + // atlasFormat, and uploading each new level into the atlas via a temporary immutable texture. + static void GenerateMissingMips( + ID3D11Device* device, ID3D11DeviceContext* context, + ID3D11Texture2D* atlasTextureArray, + const PackItem& item, DXGI_FORMAT atlasFormat, UINT mipLevels ) + { + // Capture the source texture to CPU memory (creates an internal staging copy) + DirectX::ScratchImage captured; + if ( FAILED( DirectX::CaptureTexture( device, context, item.texture, captured ) ) ) + return; + + // Grab the last available mip as the downsampling base + const DirectX::Image* lastMipImg = captured.GetImage( item.desc.MipLevels - 1, 0, 0 ); + if ( !lastMipImg ) return; + + // GenerateMipMaps requires uncompressed input — decompress BC textures first + DirectX::ScratchImage decompressed; + const DirectX::Image* baseImg = lastMipImg; + if ( DirectX::IsCompressed( lastMipImg->format ) ) { + if ( FAILED( DirectX::Decompress( *lastMipImg, DXGI_FORMAT_R8G8B8A8_UNORM, decompressed ) ) ) + return; + baseImg = decompressed.GetImage( 0, 0, 0 ); + } + + // Generate: level 0 = base (already copied to atlas), levels 1..N = the missing mips + UINT levelsToGen = mipLevels - item.desc.MipLevels + 1; + DirectX::ScratchImage mipChain; + if ( FAILED( DirectX::GenerateMipMaps( *baseImg, DirectX::TEX_FILTER_BOX, levelsToGen, mipChain ) ) ) + return; + + // Re-compress the generated levels back to the atlas BC format. + // Try GPU-accelerated compression first; fall back to CPU if unsupported. + const DirectX::ScratchImage* finalChain = &mipChain; + DirectX::ScratchImage recompressed; + if ( DirectX::IsCompressed( atlasFormat ) ) { + HRESULT hr = DirectX::Compress( device, + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + atlasFormat, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ); + if ( FAILED( hr ) ) { + // GPU BC compression not supported on this hardware — use CPU path + recompressed = DirectX::ScratchImage{}; + if ( FAILED( DirectX::Compress( + mipChain.GetImages(), mipChain.GetImageCount(), mipChain.GetMetadata(), + atlasFormat, DirectX::TEX_COMPRESS_DEFAULT, DirectX::TEX_ALPHA_WEIGHT_DEFAULT, + recompressed ) ) ) + return; + } + finalChain = &recompressed; + } + + // Upload each new mip via a temporary immutable texture + CopySubresourceRegion + for ( UINT mip = item.desc.MipLevels; mip < mipLevels; ++mip ) { + // chainIdx 0 = the base (already in atlas), so start at 1 + UINT chainIdx = mip - item.desc.MipLevels + 1; + const DirectX::Image* src = finalChain->GetImage( chainIdx, 0, 0 ); + if ( !src || !src->pixels ) continue; + + // BC formats require texture dimensions to be multiples of the block size (4). + // Small mips can be sub-block, so align up to avoid CREATETEXTURE2D_INVALIDDIMENSIONS. + UINT bsz = GetBlockSize( atlasFormat ); + D3D11_TEXTURE2D_DESC tmpDesc = {}; + tmpDesc.Width = Align( (UINT)src->width, bsz ); + tmpDesc.Height = Align( (UINT)src->height, bsz ); + tmpDesc.MipLevels = 1; + tmpDesc.ArraySize = 1; + tmpDesc.Format = src->format; + tmpDesc.SampleDesc.Count = 1; + tmpDesc.Usage = D3D11_USAGE_IMMUTABLE; + tmpDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + D3D11_SUBRESOURCE_DATA initData = {}; + initData.pSysMem = src->pixels; + initData.SysMemPitch = (UINT)src->rowPitch; + + ID3D11Texture2D* tmpTex = nullptr; + if ( SUCCEEDED( device->CreateTexture2D( &tmpDesc, &initData, &tmpTex ) ) ) { + UINT mipX = item.x >> mip; + UINT mipY = item.y >> mip; + UINT dstSub = D3D11CalcSubresource( mip, item.slice, mipLevels ); + D3D11_BOX box = { 0, 0, 0, tmpDesc.Width, tmpDesc.Height, 1 }; + context->CopySubresourceRegion( atlasTextureArray, dstSub, mipX, mipY, 0, tmpTex, 0, &box ); + tmpTex->Release(); + } + } + } + +public: + struct AtlasResult { + ID3D11Texture2D* atlasTextureArray = nullptr; + ID3D11ShaderResourceView* atlasSRV = nullptr; + std::vector descriptors; + + void Destroy() { + SAFE_RELEASE( atlasSRV ); + SAFE_RELEASE( atlasTextureArray ); + descriptors.clear(); + } + }; + + static AtlasResult CreateAtlasArray( ID3D11Device* device, ID3D11DeviceContext* context, + std::basic_string_view sourceTextures, + // const std::vector& sourceTextures, + UINT atlasSize = 2048, UINT mipLevels = 6 ) + { + if ( sourceTextures.empty() ) return {}; + + AtlasResult result; + result.descriptors.resize( sourceTextures.size() ); + + // Determine format from first texture for alignment calculation. + // For BC formats (blockSize=4), coordinates must remain block-aligned at every mip level. + D3D11_TEXTURE2D_DESC firstDesc; + sourceTextures[0]->GetDesc( &firstDesc ); + DXGI_FORMAT atlasFormat = firstDesc.Format; + + const UINT blockSize = GetBlockSize( atlasFormat ); + const UINT MipAlignment = blockSize * (1 << (mipLevels - 1)); + + std::vector items; + items.reserve( sourceTextures.size() ); + + // 1. Extract info and validate + for ( size_t i = 0; i < sourceTextures.size(); ++i ) { + D3D11_TEXTURE2D_DESC desc; + sourceTextures[i]->GetDesc( &desc ); + + if ( desc.Format != atlasFormat ) { + // For a Texture2DArray, all formats must match. + throw std::runtime_error( "All textures must have the same DXGI_FORMAT." ); + } + + items.push_back( { (int)i, desc.Width, desc.Height, 0, 0, 0, sourceTextures[i], desc}); + } + + // 2. Sort by height descending for optimal shelf-packing + std::sort( items.begin(), items.end(), []( const PackItem& a, const PackItem& b ) { + return a.height > b.height; + } ); + + // 3. CPU Bin Packing (Shelf Packing Algorithm) + UINT currentX = 0, currentY = 0, currentShelfHeight = 0, currentSlice = 0; + + for ( auto& item : items ) { + UINT alignedW = Align( item.width, MipAlignment ); + UINT alignedH = Align( item.height, MipAlignment ); + + // Move to next shelf if it doesn't fit horizontally + if ( currentX + alignedW > atlasSize ) { + currentX = 0; + currentY += Align( currentShelfHeight, MipAlignment ); + currentShelfHeight = 0; + } + + // Move to next array slice if it doesn't fit vertically + if ( currentY + alignedH > atlasSize ) { + currentSlice++; + currentX = 0; + currentY = 0; + currentShelfHeight = 0; + } + + item.x = currentX; + item.y = currentY; + item.slice = currentSlice; + + currentX += alignedW; + currentShelfHeight = std::max( currentShelfHeight, alignedH ); + } + + UINT totalSlices = currentSlice + 1; + + // 4. Create the target Texture2DArray + D3D11_TEXTURE2D_DESC arrayDesc = {}; + arrayDesc.Width = atlasSize; + arrayDesc.Height = atlasSize; + arrayDesc.MipLevels = mipLevels; + arrayDesc.ArraySize = totalSlices; + arrayDesc.Format = atlasFormat; + arrayDesc.SampleDesc.Count = 1; + arrayDesc.SampleDesc.Quality = 0; + arrayDesc.Usage = D3D11_USAGE_DEFAULT; + arrayDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + + if ( FAILED( device->CreateTexture2D( &arrayDesc, nullptr, &result.atlasTextureArray ) ) ) { + throw std::runtime_error( "Failed to create Texture2DArray atlas." ); + } + + // 5. GPU CopySubresourceRegion (Extremely fast, zero CPU-readback) + + for ( const auto& item : items ) { + UINT maxMipsToCopy = std::min( item.desc.MipLevels, mipLevels ); + + for ( UINT mip = 0; mip < maxMipsToCopy; ++mip ) { + // Calculate scaled coordinates for the current mip level + UINT mipX = item.x >> mip; + UINT mipY = item.y >> mip; + + // Mip source & destination indices + UINT srcSub = D3D11CalcSubresource( mip, 0, item.desc.MipLevels ); + UINT dstSub = D3D11CalcSubresource( mip, item.slice, mipLevels ); + + context->CopySubresourceRegion( + result.atlasTextureArray, dstSub, + mipX, mipY, 0, + item.texture, srcSub, + nullptr // nullptr means copy the whole subresource + ); + } + + // 5b. Fill missing MIP levels using DirectXTex bilinear downsampling + re-compression. + if ( item.desc.MipLevels < mipLevels ) + GenerateMissingMips( device, context, result.atlasTextureArray, item, atlasFormat, mipLevels ); + + // Write out descriptors in the *original* input order + TextureDescriptor& outDesc = result.descriptors[item.originalIndex]; + outDesc.slice = item.slice; + outDesc.uStart = (float)item.x / atlasSize; + outDesc.vStart = (float)item.y / atlasSize; + outDesc.uEnd = (float)(item.x + item.width) / atlasSize; + outDesc.vEnd = (float)(item.y + item.height) / atlasSize; + } + + // 6. Create SRV + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = atlasFormat; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvDesc.Texture2DArray.MostDetailedMip = 0; + srvDesc.Texture2DArray.MipLevels = mipLevels; + srvDesc.Texture2DArray.FirstArraySlice = 0; + srvDesc.Texture2DArray.ArraySize = totalSlices; + + device->CreateShaderResourceView( result.atlasTextureArray, &srvDesc, &result.atlasSRV); + + return result; + } +}; diff --git a/D3D11Engine/D3D11VobAtlasPass.cpp b/D3D11Engine/D3D11VobAtlasPass.cpp new file mode 100644 index 00000000..1c5b8dd8 --- /dev/null +++ b/D3D11Engine/D3D11VobAtlasPass.cpp @@ -0,0 +1,618 @@ +#include "D3D11VobAtlasPass.h" +#include "D3D11GraphicsEngine.h" + +#include "D3D11ShaderManager.h" +#include "D3D11VShader.h" +#include "D3D11PShader.h" +#include "D3D11CShader.h" +#include "D3D11ConstantBuffer.h" +#include "GothicAPI.h" +#include "GSky.h" +#include "RenderToTextureBuffer.h" +#include "WorldObjects.h" +#include "VertexTypes.h" +#include "zCTexture.h" +#include "zCMaterial.h" +#include "zCVob.h" +#include "zCVisual.h" + +#include +#include + +// ----- globals defined in D3D11GraphicsEngine.cpp ----- +extern bool SupportTextureAtlases; +extern float vobAnimation_WindStrength; +namespace { + constexpr DXGI_FORMAT VERTEX_INDEX_DXGI_FORMAT = sizeof( VERTEX_INDEX ) == sizeof( unsigned short ) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; +} + +typedef void( __cdecl* PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT )( + ID3D11DeviceContext* context, unsigned int drawCount, + ID3D11Buffer* buffer, unsigned int alignedByteOffsetForArgs, + unsigned int alignedByteStrideForArgs ); +extern PFN_DRAWMULTIINDEXEDINSTANCEDINDIRECT DrawMultiIndexedInstancedIndirect; + +// ------------------------------------------------------- + +D3D11VobAtlasPass::D3D11VobAtlasPass( D3D11GraphicsEngine* engine ) + : m_Engine( engine ) { +} + +// ============================================================ +// Build – entry point called from OnWorldLoaded +// ============================================================ +void D3D11VobAtlasPass::Build() { + // Reset everything + for ( size_t i = 0; i < TEXTURE_ATLAS_MAX; i++ ) + m_TextureAtlasses[(DXGI_FORMAT)i].Destroy(); + m_TextureAtlasLookup.clear(); + m_AtlasDrawGroups.clear(); + + if ( !SupportTextureAtlases || + !Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ) { + return; + } + + BuildTextureAtlasses(); + + if ( m_TextureAtlasLookup.empty() ) + return; + + BuildGeometryBuffers(); + BuildGPUCullingBuffers(); +} + +// ============================================================ +// BuildTextureAtlasses +// ============================================================ +void D3D11VobAtlasPass::BuildTextureAtlasses() { + struct TextureInfo { + zCTexture* gothicTexture; + DXGI_FORMAT Format; + Microsoft::WRL::ComPtr Texture2D; + }; + + std::unordered_set seenTextures; + std::vector uniqueTextures; + + for ( auto vobInfo : m_Engine->m_StaticVobs ) { + for ( auto& byTex : reinterpret_cast(vobInfo->VisualInfo)->MeshesByTexture ) { + zCTexture* tex = byTex.first.Material->GetTexture(); + + if ( !tex ) { + auto vis = reinterpret_cast(vobInfo->VisualInfo)->Visual; + LogError() + << "Texture not found for visual " << vobInfo->VisualInfo->VisualName + << " Visual Type: " << vis->GetVisualType(); + + continue; + } + + if ( !seenTextures.insert( tex ).second ) { + continue; + } + + auto cachedState = tex->CacheIn( -1 ); + if ( cachedState != zRES_CACHED_IN ) { + LogError() << "Texture " << tex->GetName() << " was not cached in"; + continue; + } + + auto surface = tex->GetSurface(); + if ( !surface || !surface->IsSurfaceReady() ) { + LogError() << "Texture " << tex->GetName() << " surface not ready"; + continue; + } + + auto engineTex = surface->GetEngineTexture(); + if ( !engineTex ) { + LogError() << "Texture " << tex->GetName() << " no engine texture"; + continue; + } + + D3D11_TEXTURE2D_DESC desc; + engineTex->GetTextureObject()->GetDesc( &desc ); + if ( desc.Format < 1 || desc.Format >= TEXTURE_ATLAS_MAX ) { + LogError() << "Texture " << tex->GetName() << " has unsupported format for atlas: " << desc.Format; + continue; + } + uniqueTextures.push_back( { tex, desc.Format, engineTex->GetTextureObject() } ); + } + } + + // Sort by format so same-format textures are contiguous + std::sort( uniqueTextures.begin(), uniqueTextures.end(), + []( const TextureInfo& a, const TextureInfo& b ) { return a.Format < b.Format; } ); + + // Create one Texture2DArray atlas per contiguous format range + size_t rangeStart = 0; + while ( rangeStart < uniqueTextures.size() ) { + DXGI_FORMAT fmt = uniqueTextures[rangeStart].Format; + size_t rangeEnd = rangeStart; + while ( rangeEnd < uniqueTextures.size() && uniqueTextures[rangeEnd].Format == fmt ) + rangeEnd++; + + std::vector texPtrs; + texPtrs.reserve( rangeEnd - rangeStart ); + for ( size_t i = rangeStart; i < rangeEnd; i++ ) + texPtrs.push_back( uniqueTextures[i].Texture2D.Get() ); + + std::basic_string_view txView( texPtrs.data(), texPtrs.size() ); + TextureManager::AtlasResult atlas = TextureManager::CreateAtlasArray( + m_Engine->GetDevice().Get(), m_Engine->GetContext().Get(), txView, 2048, 6 ); + + for ( size_t i = 0; i < texPtrs.size(); i++ ) { + m_TextureAtlasLookup[uniqueTextures[rangeStart + i].gothicTexture] = { + fmt, atlas.descriptors[i] + }; + } + m_TextureAtlasses[fmt] = atlas; + rangeStart = rangeEnd; + } + + LogInfo() << "VOB Atlas: " << uniqueTextures.size() << " unique textures, " + << m_TextureAtlasLookup.size() << " mapped"; +} + +// ============================================================ +// BuildGeometryBuffers +// ============================================================ +void D3D11VobAtlasPass::BuildGeometryBuffers() { + std::vector allVertices; + std::vector allIndices; + std::map groupsByFormat; + std::unordered_set processedMeshes; + + // Pre-count to avoid incremental reallocation + { + size_t totalVertices = 0, totalIndices = 0; + std::unordered_set counted; + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + if ( m_TextureAtlasLookup.find( meshKey.Texture ) == m_TextureAtlasLookup.end() ) + continue; + for ( MeshInfo* mi : meshList ) { + if ( counted.insert( mi ).second ) { + totalVertices += mi->Vertices.size(); + totalIndices += mi->Indices.size(); + } + } + } + } + allVertices.reserve( totalVertices ); + allIndices.reserve( totalIndices ); + } + + for ( auto const& [proto, visual] : Engine::GAPI->GetStaticMeshVisuals() ) { + for ( auto const& [meshKey, meshList] : visual->MeshesByTexture ) { + auto it = m_TextureAtlasLookup.find( meshKey.Texture ); + if ( it == m_TextureAtlasLookup.end() ) + continue; + + const TextureAtlasLookup& lookup = it->second; + auto& group = groupsByFormat[lookup.atlasFormat]; + group.format = lookup.atlasFormat; + + for ( MeshInfo* mi : meshList ) { + if ( !processedMeshes.insert( mi ).second ) + continue; + + UINT baseVertex = static_cast(allVertices.size()); + UINT startIndex = static_cast(allIndices.size()); + + allVertices.insert( allVertices.end(), mi->Vertices.begin(), mi->Vertices.end() ); + allIndices.insert( allIndices.end(), mi->Indices.begin(), mi->Indices.end() ); + + StaticSubmeshEntry entry; + entry.indexCount = static_cast(mi->Indices.size()); + entry.startIndexLocation = startIndex; + entry.baseVertexLocation = static_cast(baseVertex); + entry.atlasDesc = lookup.descriptor; + entry.visual = visual; + group.submeshes.push_back( entry ); + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = entry.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = entry.startIndexLocation; + args.BaseVertexLocation = entry.baseVertexLocation; + args.StartInstanceLocation = 0; + group.indirectArgs.push_back( args ); + } + } + } + + if ( allVertices.empty() ) { + LogWarn() << "D3D11VobAtlasPass::BuildGeometryBuffers: No vertices to process"; + return; + } + + m_StaticGlobalVertexBuffer = std::make_unique(); + m_StaticGlobalVertexBuffer->Init( + allVertices.data(), + static_cast(allVertices.size() * sizeof( ExVertexStruct )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_StaticGlobalIndexBuffer = std::make_unique(); + m_StaticGlobalIndexBuffer->Init( + allIndices.data(), + static_cast(allIndices.size() * sizeof( VERTEX_INDEX )), + D3D11VertexBuffer::B_INDEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + UINT maxInstanceIds = static_cast(m_Engine->m_StaticVobs.size() * 4); + if ( maxInstanceIds < 4096 ) + maxInstanceIds = 4096; + std::vector instanceIds( maxInstanceIds ); + for ( uint32_t i = 0; i < maxInstanceIds; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + + m_AtlasDrawGroups.clear(); + for ( auto& [fmt, group] : groupsByFormat ) { + if ( group.indirectArgs.empty() ) + continue; + + UINT bufSize = static_cast(group.indirectArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.indirectBuffer = std::make_unique(); + group.indirectBuffer->Init( + group.indirectArgs.data(), bufSize, + D3D11IndirectBuffer::B_VERTEXBUFFER, + D3D11IndirectBuffer::U_DYNAMIC, + D3D11IndirectBuffer::CA_WRITE ); + + m_AtlasDrawGroups.push_back( std::move( group ) ); + } + + LogInfo() << "VOB Atlas geometry: " << allVertices.size() << " vertices, " + << allIndices.size() << " indices, " + << m_AtlasDrawGroups.size() << " atlas groups, " + << processedMeshes.size() << " unique submeshes"; +} + +// ============================================================ +// BuildGPUCullingBuffers +// ============================================================ +void D3D11VobAtlasPass::BuildGPUCullingBuffers() { + if ( m_AtlasDrawGroups.empty() || m_Engine->m_StaticVobs.empty() ) + return; + + // --- 1. Build visual -> vob-count mapping --- + std::unordered_map vobsPerVisual; + std::unordered_map> vobIndicesByVisual; + + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + auto* visual = reinterpret_cast(m_Engine->m_StaticVobs[i]->VisualInfo); + vobsPerVisual[visual]++; + vobIndicesByVisual[visual].push_back( i ); + } + + // --- 2. Build merged indirect args + SubmeshGPUData --- + std::vector mergedArgs; + std::unordered_map> visualSubmeshMap; + { + size_t totalSubmeshes = 0; + for ( const auto& group : m_AtlasDrawGroups ) totalSubmeshes += group.submeshes.size(); + mergedArgs.reserve( totalSubmeshes ); + } + + UINT runningInstanceOffset = 0; + UINT globalArgIndex = 0; + + for ( auto& group : m_AtlasDrawGroups ) { + group.mergedArgsOffset = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + group.mergedArgsCount = static_cast(group.indirectArgs.size()); + + for ( size_t si = 0; si < group.submeshes.size(); si++ ) { + const auto& submesh = group.submeshes[si]; + MeshVisualInfo* visual = submesh.visual; + UINT maxInstances = vobsPerVisual.count( visual ) ? vobsPerVisual[visual] : 0; + + D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS args = {}; + args.IndexCountPerInstance = submesh.indexCount; + args.InstanceCount = 0; + args.StartIndexLocation = submesh.startIndexLocation; + args.BaseVertexLocation = submesh.baseVertexLocation; + args.StartInstanceLocation = runningInstanceOffset; + mergedArgs.push_back( args ); + + SubmeshGPUData smGPU = {}; + smGPU.slice = submesh.atlasDesc.slice; + smGPU.uStart = submesh.atlasDesc.uStart; + smGPU.vStart = submesh.atlasDesc.vStart; + smGPU.uEnd = submesh.atlasDesc.uEnd; + smGPU.vEnd = submesh.atlasDesc.vEnd; + smGPU.argIndex = globalArgIndex; + smGPU.instanceBaseOffset = runningInstanceOffset; + smGPU.globalSourceIndex = 0; + + visualSubmeshMap[visual].push_back( smGPU ); + + runningInstanceOffset += maxInstances; + globalArgIndex++; + } + } + + m_TotalMaxInstances = runningInstanceOffset; + + // --- 3. Flatten per-visual submesh entries --- + struct VisualSubmeshRange { UINT start; UINT count; }; + std::unordered_map visualSubmeshRanges; + std::vector submeshGPU; + submeshGPU.reserve( mergedArgs.size() ); + + for ( auto& [visual, entries] : visualSubmeshMap ) { + UINT start = static_cast(submeshGPU.size()); + submeshGPU.insert( submeshGPU.end(), entries.begin(), entries.end() ); + visualSubmeshRanges[visual] = { start, static_cast(entries.size()) }; + } + + // --- 4. Build VobGPUData --- + std::vector vobGPU; + vobGPU.reserve( m_Engine->m_StaticVobs.size() ); + + for ( size_t i = 0; i < m_Engine->m_StaticVobs.size(); i++ ) { + VobInfo* v = m_Engine->m_StaticVobs[i]; + auto* visual = reinterpret_cast(v->VisualInfo); + + VobGPUData data = {}; + DirectX::BoundingBox bb = Frustum::BBoxFromzTBBox3D( v->Vob->GetBBox() ); + data.aabbCenter = bb.Center; + data.aabbExtent = bb.Extents; + data.world = v->WorldMatrix; + data.prevWorld = v->WorldMatrix; + data.color = v->GroundColor; + + zTAnimationMode aniMode = v->Vob->GetVisualAniMode(); + if ( aniMode != zVISUAL_ANIMODE_NONE ) { + data.aniModeStrength = v->Vob->GetVisualAniModeStrength(); + data.canBeAffectedByPlayer = (!v->Vob->GetDynColl() ? 1.0f : 0.0f); + } else { + data.aniModeStrength = 0.0f; + data.canBeAffectedByPlayer = 0.0f; + } + + auto it = visualSubmeshRanges.find( visual ); + if ( it != visualSubmeshRanges.end() ) { + data.submeshStart = it->second.start; + data.submeshCount = it->second.count; + } + vobGPU.push_back( data ); + } + + // --- 5. Upload to GPU --- + auto* device = m_Engine->GetDevice().Get(); + auto* context = m_Engine->GetContext().Get(); + + m_VobGPUBuffer = std::make_unique>(); + m_VobGPUBuffer->Init( device, static_cast(vobGPU.size()), false, false ); + m_VobGPUBuffer->UpdateBufferDefault( context, vobGPU.data(), static_cast(vobGPU.size()) ); + + m_SubmeshGPUBuffer = std::make_unique>(); + m_SubmeshGPUBuffer->Init( device, static_cast(submeshGPU.size()), false, false ); + m_SubmeshGPUBuffer->UpdateBufferDefault( context, submeshGPU.data(), static_cast(submeshGPU.size()) ); + + UINT instanceCapacity = std::max( m_TotalMaxInstances, 1u ); + m_InstanceBufferGPU = std::make_unique>(); + m_InstanceBufferGPU->Init( device, instanceCapacity, false, true ); + + UINT argsSize = static_cast(mergedArgs.size() * sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS )); + m_MergedIndirectArgs = std::make_unique(); + m_MergedIndirectArgs->Init( + mergedArgs.data(), argsSize, + D3D11IndirectBuffer::B_UNORDERED_ACCESS, + D3D11IndirectBuffer::U_DEFAULT, + D3D11IndirectBuffer::CA_NONE ); + + m_MergedArgsReset = mergedArgs; + + D3D11_BUFFER_DESC templateDesc = {}; + templateDesc.ByteWidth = argsSize; + templateDesc.Usage = D3D11_USAGE_DEFAULT; + templateDesc.BindFlags = 0; + templateDesc.MiscFlags = D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS; + + D3D11_SUBRESOURCE_DATA templateData = {}; + templateData.pSysMem = mergedArgs.data(); + device->CreateBuffer( &templateDesc, &templateData, m_IndirectArgsTemplate.ReleaseAndGetAddressOf() ); + + CullConstants initCB = {}; + m_CullConstantBuffer = std::make_unique( sizeof( CullConstants ), &initCB ); + + if ( m_TotalMaxInstances > 0 ) { + std::vector instanceIds( m_TotalMaxInstances ); + for ( uint32_t i = 0; i < m_TotalMaxInstances; i++ ) + instanceIds[i] = i; + + m_GlobalInstanceIdBuffer = std::make_unique(); + m_GlobalInstanceIdBuffer->Init( + instanceIds.data(), + static_cast(instanceIds.size() * sizeof( uint32_t )), + D3D11VertexBuffer::B_VERTEXBUFFER, + D3D11VertexBuffer::U_IMMUTABLE, + D3D11VertexBuffer::CA_NONE ); + } + + LogInfo() << "VOB Atlas GPU Culling: " << vobGPU.size() << " vobs, " + << submeshGPU.size() << " submesh entries, " + << mergedArgs.size() << " indirect args, " + << m_TotalMaxInstances << " max instances"; +} + +// ============================================================ +// Draw – per-frame GPU-cull + indirect draw +// ============================================================ +XRESULT D3D11VobAtlasPass::Draw( const Frustum& frustum, bool bindPS ) { + if ( m_AtlasDrawGroups.empty() || !m_VobGPUBuffer || + !m_StaticGlobalVertexBuffer || !m_StaticGlobalIndexBuffer ) + return XR_SUCCESS; + + auto _ = m_Engine->RecordGraphicsEvent( L"DrawVOBsIndirect" ); + auto& context = m_Engine->GetContext(); + + // --- 0. Build Hi-Z pyramid for occlusion culling (main pass only) --- + const bool useHiZ = bindPS && m_Engine->m_HiZTexture && m_Engine->m_HiZSRV; + if ( useHiZ ) { + m_Engine->CopyDepthStencil(); + m_Engine->BuildHiZPyramid(); + } + + // --- 1. Reset indirect args InstanceCounts --- + context->CopyResource( m_MergedIndirectArgs->GetIndirectBuffer().Get(), + m_IndirectArgsTemplate.Get() ); + + // --- 2. Update cull constant buffer --- + CullConstants cb = {}; + memcpy( cb.frustumPlanes, frustum.GetPlanes().data(), 6 * sizeof( XMFLOAT4 ) ); + cb.cameraPosition = Engine::GAPI->GetCameraPosition(); + cb.drawDistance = Engine::GAPI->GetRendererState().RendererSettings.OutdoorVobDrawRadius; + cb.globalWindStrength = vobAnimation_WindStrength; + cb.windAdvanced = (Engine::GAPI->GetRendererState().RendererSettings.WindQuality + == GothicRendererSettings::EWindQuality::WIND_QUALITY_ADVANCED) ? 1 : 0; + cb.numVobs = static_cast(m_Engine->m_StaticVobs.size()); + cb.feedbackFrameNumber = 0; + + if ( useHiZ ) { + cb.enableHiZ = 1; + cb.hiZMipCount = m_Engine->m_HiZMipCount; + cb.hiZWidth = static_cast(m_Engine->DepthStencilBuffer->GetSizeX()); + cb.hiZHeight = static_cast(m_Engine->DepthStencilBuffer->GetSizeY()); + + XMMATRIX view = Engine::GAPI->GetViewMatrixXM(); + auto& projF = Engine::GAPI->GetProjectionMatrix(); + XMStoreFloat4x4( &cb.viewProjection, XMMatrixMultiply( view, XMLoadFloat4x4( &projF ) ) ); + } else { + cb.enableHiZ = 0; + cb.hiZMipCount = 0; + cb.hiZWidth = 0.0f; + cb.hiZHeight = 0.0f; + XMStoreFloat4x4( &cb.viewProjection, XMMatrixIdentity() ); + } + + m_CullConstantBuffer->UpdateBuffer( &cb ); + m_CullConstantBuffer->BindToComputeShader( 0 ); + + // --- 3. Dispatch CS_CullVobs --- + auto cullCS = m_Engine->ShaderManager->GetCShader( "CS_CullVobs" ); + if ( !cullCS ) + return XR_SUCCESS; + cullCS->Apply(); + + ID3D11ShaderResourceView* srvs[2] = { + m_VobGPUBuffer->GetSRV(), + m_SubmeshGPUBuffer->GetSRV() + }; + context->CSSetShaderResources( 0, 2, srvs ); + + if ( useHiZ ) { + ID3D11ShaderResourceView* hiZSRV = m_Engine->m_HiZSRV.Get(); + context->CSSetShaderResources( 2, 1, &hiZSRV ); + } + + ID3D11UnorderedAccessView* uavs[2] = { + m_InstanceBufferGPU->GetUAV(), + m_MergedIndirectArgs->GetUnorderedAccessView().Get() + }; + context->CSSetUnorderedAccessViews( 0, 2, uavs, nullptr ); + + UINT numGroups = (static_cast(m_Engine->m_StaticVobs.size()) + 63) / 64; + context->Dispatch( numGroups, 1, 1 ); + + // Unbind CS resources + ID3D11ShaderResourceView* nullSRV[3] = { nullptr, nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[2] = { nullptr, nullptr }; + context->CSSetShaderResources( 0, 3, nullSRV ); + context->CSSetUnorderedAccessViews( 0, 2, nullUAV, nullptr ); + context->CSSetShader( nullptr, nullptr, 0 ); + + // --- 4. Bind global geometry --- + UINT strides[2] = { sizeof( ExVertexStruct ), sizeof( uint32_t ) }; + UINT offsets[2] = { 0, 0 }; + ID3D11Buffer* vbs[2] = { + m_StaticGlobalVertexBuffer->GetVertexBuffer().Get(), + m_GlobalInstanceIdBuffer->GetVertexBuffer().Get() + }; + context->IASetVertexBuffers( 0, 2, vbs, strides, offsets ); + context->IASetIndexBuffer( m_StaticGlobalIndexBuffer->GetVertexBuffer().Get(), + VERTEX_INDEX_DXGI_FORMAT, 0 ); + context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST ); + + // --- 5. Bind instance structured buffer to VS t1 --- + ID3D11ShaderResourceView* instSRV = m_InstanceBufferGPU->GetSRV(); + context->VSSetShaderResources( 1, 1, &instSRV ); + + // --- 6. Set vertex shader --- + m_Engine->SetActiveVertexShader( "VS_ExInstancedObjIndirectAtlas" ); + m_Engine->SetupVS_ExMeshDrawCall(); + m_Engine->SetupVS_ExConstantBuffer(); + + VS_ExConstantBuffer_Wind windBuff{}; + m_Engine->ApplyWindProps( windBuff ); + m_Engine->ActiveVS->GetConstantBuffer()[1]->UpdateBuffer( &windBuff ); + m_Engine->ActiveVS->GetConstantBuffer()[1]->BindToVertexShader( 1 ); + + if ( bindPS ) + context->PSSetShaderResources( 4, 1, m_Engine->ReflectionCube.GetAddressOf() ); + + m_Engine->ActiveVS->Apply(); + + // --- 7. Draw per atlas group --- + MaterialInfo defMaterial{}; + GSky* sky = Engine::GAPI->GetSky(); + + for ( auto& group : m_AtlasDrawGroups ) { + ID3D11ShaderResourceView* srv = m_TextureAtlasses[group.format].atlasSRV; + if ( !srv ) + continue; + + const bool needsPS = bindPS || (group.format == DXGI_FORMAT_BC2_UNORM); + + if ( needsPS ) { + context->PSSetShaderResources( 0, 1, &srv ); + + if ( bindPS && group.format != DXGI_FORMAT_BC2_UNORM ) + m_Engine->SetActivePixelShader( "PS_DiffuseAtlas" ); + else + m_Engine->SetActivePixelShader( "PS_DiffuseAtlasAlphaTest" ); + + m_Engine->ActivePS->GetConstantBuffer()[0]->UpdateBuffer( + &Engine::GAPI->GetRendererState().GraphicsState ); + m_Engine->ActivePS->GetConstantBuffer()[0]->BindToPixelShader( 0 ); + + m_Engine->ActivePS->GetConstantBuffer()[1]->UpdateBuffer( &sky->GetAtmosphereCB() ); + m_Engine->ActivePS->GetConstantBuffer()[1]->BindToPixelShader( 1 ); + + m_Engine->ActivePS->GetConstantBuffer()[2]->UpdateBuffer( &defMaterial.buffer ); + m_Engine->ActivePS->GetConstantBuffer()[2]->BindToPixelShader( 2 ); + + m_Engine->OutdoorVobsConstantBuffer->BindToPixelShader( 3 ); + + m_Engine->ActivePS->Apply(); + } else { + context->PSSetShader( nullptr, nullptr, 0 ); + } + + DrawMultiIndexedInstancedIndirect( + context.Get(), + group.mergedArgsCount, + m_MergedIndirectArgs->GetIndirectBuffer().Get(), + group.mergedArgsOffset, + sizeof( D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS ) ); + } + + // Unbind instance buffer from VS + ID3D11ShaderResourceView* nullVSSRV = nullptr; + context->VSSetShaderResources( 1, 1, &nullVSSRV ); + + return XR_SUCCESS; +} diff --git a/D3D11Engine/D3D11VobAtlasPass.h b/D3D11Engine/D3D11VobAtlasPass.h new file mode 100644 index 00000000..5c965261 --- /dev/null +++ b/D3D11Engine/D3D11VobAtlasPass.h @@ -0,0 +1,81 @@ +#pragma once +#include "D3D11AtlasTypes.h" +#include "D3D11StructuredBuffer.h" +#include "D3D11VertexBuffer.h" +#include "D3D11ConstantBuffer.h" +#include "VobCulling.h" + +#include +#include +#include +#include +#include + +class D3D11GraphicsEngine; +class Frustum; +class zCTexture; + +/** + * Encapsulates all texture-atlas-based GPU-driven rendering for static VOBs. + * + * Responsibilities: + * - Building per-format Texture2DArray atlases from static-VOB diffuse textures + * - Building the merged global VB/IB and per-submesh indirect-args buffer + * - Building the GPU structured buffers used by CS_CullVobs + * - Executing the GPU-culling compute pass and the subsequent indirect draw + * + * The engine keeps one instance of this class. Call Build() when a new world + * is loaded (OnWorldLoaded), and Draw() every frame in place of the old + * DrawVOBsIndirect(). + */ +class D3D11VobAtlasPass { + friend class D3D11GraphicsEngine; +public: + explicit D3D11VobAtlasPass( D3D11GraphicsEngine* engine ); + + /** (Re-)build atlases, geometry buffers, and GPU culling buffers. + * Called from D3D11GraphicsEngine::OnWorldLoaded(). */ + void Build(); + + /** GPU-cull static VOBs and draw them with indirect multi-draw. + * bindPS=false is used in shadow passes to skip the pixel shader. */ + XRESULT Draw( const Frustum& frustum, bool bindPS = true ); + + /** True once Build() has completed and at least one draw group exists. */ + bool IsReady() const { return !m_AtlasDrawGroups.empty(); } + + /** Atlas lookup (read-only access for other systems if needed). */ + const std::unordered_map& GetAtlasLookup() const { + return m_TextureAtlasLookup; + } + +private: + D3D11GraphicsEngine* m_Engine; + + // ---- Atlas textures ---- + std::array m_TextureAtlasses{}; + std::unordered_map m_TextureAtlasLookup; + + // ---- Global geometry ---- + std::unique_ptr m_StaticGlobalVertexBuffer; + std::unique_ptr m_StaticGlobalIndexBuffer; + std::unique_ptr m_GlobalInstanceIdBuffer; + std::vector m_AtlasDrawGroups; + + // (legacy slot – not yet used but reserved for future streaming) + std::unique_ptr> m_StaticVobInstanceBuffer; + + // ---- GPU culling buffers ---- + std::unique_ptr> m_VobGPUBuffer; + std::unique_ptr> m_SubmeshGPUBuffer; + std::unique_ptr> m_InstanceBufferGPU; + std::unique_ptr m_MergedIndirectArgs; + Microsoft::WRL::ComPtr m_IndirectArgsTemplate; + std::unique_ptr m_CullConstantBuffer; + std::vector m_MergedArgsReset; + UINT m_TotalMaxInstances = 0; + + void BuildTextureAtlasses(); + void BuildGeometryBuffers(); + void BuildGPUCullingBuffers(); +}; diff --git a/D3D11Engine/D3D7/MyDirect3DDevice7.h b/D3D11Engine/D3D7/MyDirect3DDevice7.h index 9a93a2cd..15ff82aa 100644 --- a/D3D11Engine/D3D7/MyDirect3DDevice7.h +++ b/D3D11Engine/D3D7/MyDirect3DDevice7.h @@ -245,15 +245,15 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { } break; - case D3DRENDERSTATE_ZENABLE: state.DepthState.DepthBufferEnabled = Value != 0; state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_ZENABLE: state.DepthState.DepthBufferEnabled = Value != 0; break; case D3DRENDERSTATE_ALPHATESTENABLE: state.GraphicsState.SetGraphicsSwitch( GSWITCH_ALPHAREF, Value != 0 ); break; - case D3DRENDERSTATE_SRCBLEND: state.BlendState.SrcBlend = static_cast(Value); state.BlendState.SetDirty(); break; - case D3DRENDERSTATE_DESTBLEND: state.BlendState.DestBlend = static_cast(Value); state.BlendState.SetDirty(); break; - //case D3DRENDERSTATE_CULLMODE: state.RasterizerState.CullMode = static_cast(Value); state.RasterizerState.SetDirty(); break; - case D3DRENDERSTATE_ZFUNC: state.DepthState.DepthBufferCompareFunc = static_cast(Value); state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_SRCBLEND: state.BlendState.SrcBlend = static_cast(Value); break; + case D3DRENDERSTATE_DESTBLEND: state.BlendState.DestBlend = static_cast(Value); break; + //case D3DRENDERSTATE_CULLMODE: state.RasterizerState.CullMode = static_cast(Value); break; + case D3DRENDERSTATE_ZFUNC: state.DepthState.DepthBufferCompareFunc = static_cast(Value); break; case D3DRENDERSTATE_ALPHAREF: state.GraphicsState.FF_AlphaRef = static_cast(Value) / 255.0f; break; // Ref for masked - case D3DRENDERSTATE_ALPHABLENDENABLE: state.BlendState.BlendEnabled = Value != 0; state.BlendState.SetDirty(); break; - case D3DRENDERSTATE_ZBIAS: state.RasterizerState.ZBias = Value; state.DepthState.SetDirty(); break; + case D3DRENDERSTATE_ALPHABLENDENABLE: state.BlendState.BlendEnabled = Value != 0; break; + case D3DRENDERSTATE_ZBIAS: state.RasterizerState.ZBias = Value; break; case D3DRENDERSTATE_TEXTUREFACTOR: state.GraphicsState.FF_TextureFactor = float4( Value ); break; case D3DRENDERSTATE_LIGHTING: state.GraphicsState.SetGraphicsSwitch( GSWITCH_LIGHING, Value != 0 ); break; } @@ -345,15 +345,12 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { case D3DTSS_ADDRESS: state.SamplerState.AddressU = static_cast(Value); state.SamplerState.AddressV = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_ADDRESSU: state.SamplerState.AddressU = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_ADDRESSV: state.SamplerState.AddressV = static_cast(Value); - state.SamplerState.SetDirty(); break; case D3DTSS_BORDERCOLOR: break; @@ -508,7 +505,6 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { // Gothic wants that for the sky Engine::GAPI->GetRendererState().RasterizerState.FrontCounterClockwise = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GraphicsEngine->SetActiveVertexShader( "VS_TransformedEx" ); Engine::GraphicsEngine->BindViewportInformation( "VS_TransformedEx", 0 ); break; @@ -572,7 +568,6 @@ class MyDirect3DDevice7 : public IDirect3DDevice7 { // Gothic wants that for the sky Engine::GAPI->GetRendererState().RasterizerState.FrontCounterClockwise = true; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); Engine::GraphicsEngine->DrawVertexBufferFF( static_cast(lpd3dVertexBuffer)->GetVertexBuffer(), dwNumVertices, dwStartVertex, sizeof( Gothic_XYZRHW_DIF_T1_Vertex ) ); break; diff --git a/D3D11Engine/EditorLinePrimitive.cpp b/D3D11Engine/EditorLinePrimitive.cpp index 2407980b..ce6e9978 100644 --- a/D3D11Engine/EditorLinePrimitive.cpp +++ b/D3D11Engine/EditorLinePrimitive.cpp @@ -857,7 +857,6 @@ void EditorLinePrimitive::RenderVertexBuffer( const Microsoft::WRL::ComPtrSetDefaultStates(); Engine::GAPI->GetRendererState().BlendState.SetAlphaBlending(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); engine->UpdateRenderStates(); Shader->Apply(); diff --git a/D3D11Engine/Frustum.h b/D3D11Engine/Frustum.h index e0ebfde9..29e8e6e8 100644 --- a/D3D11Engine/Frustum.h +++ b/D3D11Engine/Frustum.h @@ -60,6 +60,8 @@ class Frustum {0, 0, 0, 1} /* Identity Orientation */); viewSpaceFrustum.Transform(m_orientedBox, invView); + CacheOBBPlanes(); + m_useBoundingOrientedBox = true; m_useSphere = false; m_always_containing = false; @@ -72,7 +74,9 @@ class Frustum f.m_always_containing = true; f.isValid = true; return f; - } + } + + bool SupportsCulling() const { return !m_always_containing; } // Für perspektivische Projektion (normale Kamera) void __vectorcall BuildPerspective(FXMMATRIX view, CXMMATRIX proj) { @@ -116,23 +120,63 @@ class Frustum if (m_useSphere) { return m_boundingSphere.Intersects(aabb); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Intersects(aabb); + + const float cx = aabb.Center.x; + const float cy = aabb.Center.y; + const float cz = aabb.Center.z; + const float ex = aabb.Extents.x; + const float ey = aabb.Extents.y; + const float ez = aabb.Extents.z; + + for ( int i = 0; i < 6; ++i ) { + const float nx = m_cachedPlanes[i].x; + const float ny = m_cachedPlanes[i].y; + const float nz = m_cachedPlanes[i].z; + const float w = m_cachedPlanes[i].w; + + // Distance from the AABB center to the plane + const float dist = nx * cx + ny * cy + nz * cz + w; + + // Projected radius of the AABB onto the plane's normal + const float projRadius = ex * std::abs( nx ) + ey * std::abs( ny ) + ez * std::abs( nz ); + + // If the center is further outside the plane than its projected radius, + // the entire box is disjoint. We can early-out immediately. + if ( dist > projRadius ) { + return false; + } } - return m_frustum.Intersects(aabb); + + // If no separating plane was found, it must be intersecting or contained. + return true; } // Schneller Sphere-Test für VOBs - bool Intersects(const BoundingSphere& sphere) const { - if (m_always_containing) return true; + bool Intersects( const BoundingSphere& sphere ) const { + if ( m_always_containing ) return true; - if (m_useSphere) { - return m_boundingSphere.Intersects(sphere); + if ( m_useSphere ) { + return m_boundingSphere.Intersects( sphere ); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Intersects(sphere); + + const float cx = sphere.Center.x; + const float cy = sphere.Center.y; + const float cz = sphere.Center.z; + const float r = sphere.Radius; + + // Scalar early-out loop. + // For outward-facing planes, if distance > radius, it is completely outside. + for ( int i = 0; i < 6; ++i ) { + const float dist = m_cachedPlanes[i].x * cx + + m_cachedPlanes[i].y * cy + + m_cachedPlanes[i].z * cz + + m_cachedPlanes[i].w; + if ( dist > r ) { + return false; + } } - return m_frustum.Intersects(sphere); + + return true; } // Schneller AABB-Test @@ -141,22 +185,52 @@ class Frustum if (m_useSphere) { return m_boundingSphere.Contains(aabb); } - if (m_useBoundingOrientedBox) { - return m_orientedBox.Contains(aabb); + + const float cx = aabb.Center.x; + const float cy = aabb.Center.y; + const float cz = aabb.Center.z; + const float ex = aabb.Extents.x; + const float ey = aabb.Extents.y; + const float ez = aabb.Extents.z; + + bool intersects = false; + + for ( int i = 0; i < 6; ++i ) { + const float nx = m_cachedPlanes[i].x; + const float ny = m_cachedPlanes[i].y; + const float nz = m_cachedPlanes[i].z; + const float w = m_cachedPlanes[i].w; + + // 1. Calculate distance from the AABB center to the plane + const float dist = nx * cx + ny * cy + nz * cz + w; + + // 2. Calculate the projected radius of the AABB onto the plane's normal + const float projRadius = ex * std::abs( nx ) + ey * std::abs( ny ) + ez * std::abs( nz ); + + // 3. Since planes are OUTWARD facing: + if ( dist > projRadius ) { + return DirectX::ContainmentType::DISJOINT; // Completely outside + } + if ( dist > -projRadius ) { + intersects = true; // Partially inside, keep checking the other planes + } } - return aabb.ContainedBy( - XMLoadFloat4(&m_cachedPlanes[0]), - XMLoadFloat4(&m_cachedPlanes[1]), - XMLoadFloat4(&m_cachedPlanes[2]), - XMLoadFloat4(&m_cachedPlanes[3]), - XMLoadFloat4(&m_cachedPlanes[4]), - XMLoadFloat4(&m_cachedPlanes[5]) - ); + + return intersects ? DirectX::ContainmentType::INTERSECTS : DirectX::ContainmentType::CONTAINS; } bool Intersects( const zTBBox3D& aabb ) const { if ( m_always_containing ) return true; - return Intersects( BBoxFromzTBBox3D( aabb ) ); + // Fast scalar conversion - avoids memory->SIMD->memory roundtrip + BoundingBox bb; + bb.Center.x = (aabb.Min.x + aabb.Max.x) * 0.5f; + bb.Center.y = (aabb.Min.y + aabb.Max.y) * 0.5f; + bb.Center.z = (aabb.Min.z + aabb.Max.z) * 0.5f; + bb.Extents.x = (aabb.Max.x - aabb.Min.x) * 0.5f; + bb.Extents.y = (aabb.Max.y - aabb.Min.y) * 0.5f; + bb.Extents.z = (aabb.Max.z - aabb.Min.z) * 0.5f; + + return Intersects( bb ); } DirectX::ContainmentType Contains(const zTBBox3D& aabb) const { @@ -191,12 +265,14 @@ class Frustum return Contains(bb); } - static BoundingBox BBoxFromzTBBox3D(const zTBBox3D& box) { + static BoundingBox BBoxFromzTBBox3D(const zTBBox3D& aabb) { BoundingBox bb; - XMVECTOR bbMin = XMLoadFloat3(&box.Min); - XMVECTOR bbMax = XMLoadFloat3(&box.Max); - XMStoreFloat3(&bb.Center, XMVectorScale(XMVectorAdd(bbMin, bbMax), 0.5f)); - XMStoreFloat3(&bb.Extents, XMVectorScale(XMVectorSubtract(bbMax, bbMin), 0.5f)); + bb.Center.x = (aabb.Min.x + aabb.Max.x) * 0.5f; + bb.Center.y = (aabb.Min.y + aabb.Max.y) * 0.5f; + bb.Center.z = (aabb.Min.z + aabb.Max.z) * 0.5f; + bb.Extents.x = (aabb.Max.x - aabb.Min.x) * 0.5f; + bb.Extents.y = (aabb.Max.y - aabb.Min.y) * 0.5f; + bb.Extents.z = (aabb.Max.z - aabb.Min.z) * 0.5f; return bb; } @@ -207,6 +283,8 @@ class Frustum } bool IsValid() const { return isValid; } + + const std::array& GetPlanes() const { return m_cachedPlanes; } private: // Cache world-space planes for fast culling (called after frustum is transformed to world space) // Plane order: [0]=Left, [1]=Right, [2]=Bottom, [3]=Top, [4]=Near, [5]=Far @@ -215,29 +293,29 @@ class Frustum XMVECTOR vOrigin = XMLoadFloat3(&m_frustum.Origin); XMVECTOR vOrientation = XMLoadFloat4(&m_frustum.Orientation); - // Left plane - XMVECTOR plane = XMVectorSet(-1.0f, 0.0f, m_frustum.LeftSlope, 0.0f); + // Near plane + XMVECTOR plane = XMVectorSet(0.0f, 0.0f, -1.0f, m_frustum.Near); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[0], XMPlaneNormalize(plane)); + // Left plane + plane = XMVectorSet(-1.0f, 0.0f, m_frustum.LeftSlope, 0.0f); + plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); + XMStoreFloat4(&m_cachedPlanes[1], XMPlaneNormalize(plane)); + // Right plane plane = XMVectorSet(1.0f, 0.0f, -m_frustum.RightSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[1], XMPlaneNormalize(plane)); + XMStoreFloat4(&m_cachedPlanes[2], XMPlaneNormalize(plane)); // Bottom plane plane = XMVectorSet(0.0f, -1.0f, m_frustum.BottomSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[2], XMPlaneNormalize(plane)); + XMStoreFloat4(&m_cachedPlanes[3], XMPlaneNormalize(plane)); // Top plane plane = XMVectorSet(0.0f, 1.0f, -m_frustum.TopSlope, 0.0f); plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); - XMStoreFloat4(&m_cachedPlanes[3], XMPlaneNormalize(plane)); - - // Near plane - plane = XMVectorSet(0.0f, 0.0f, -1.0f, m_frustum.Near); - plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[4], XMPlaneNormalize(plane)); // Far plane @@ -245,6 +323,47 @@ class Frustum plane = DirectX::MathInternal::XMPlaneTransform(plane, vOrientation, vOrigin); XMStoreFloat4(&m_cachedPlanes[5], XMPlaneNormalize(plane)); } + + // Cache world-space planes from an Oriented Bounding Box (Directional Light / Ortho) +// Plane order: [0]=Left, [1]=Right, [2]=Bottom, [3]=Top, [4]=Near, [5]=Far +void CacheOBBPlanes() { + XMVECTOR C = XMLoadFloat3(&m_orientedBox.Center); + XMVECTOR E = XMLoadFloat3(&m_orientedBox.Extents); + XMVECTOR Q = XMLoadFloat4(&m_orientedBox.Orientation); + + XMMATRIX R = XMMatrixRotationQuaternion(Q); + XMVECTOR AxisX = R.r[0]; + XMVECTOR AxisY = R.r[1]; + XMVECTOR AxisZ = R.r[2]; + + XMVECTOR Ex = XMVectorSplatX(E); + XMVECTOR Ey = XMVectorSplatY(E); + XMVECTOR Ez = XMVectorSplatZ(E); + + // Near face: Min Z boundary. Outward normal is -AxisZ + XMVECTOR P_Near = XMVectorSubtract( C, XMVectorMultiply( AxisZ, Ez ) ); + XMStoreFloat4( &m_cachedPlanes[0], XMPlaneFromPointNormal( P_Near, XMVectorNegate( AxisZ ) ) ); + + // Left face: Min X boundary. Outward normal is -AxisX + XMVECTOR P_Left = XMVectorSubtract( C, XMVectorMultiply( AxisX, Ex ) ); + XMStoreFloat4( &m_cachedPlanes[1], XMPlaneFromPointNormal( P_Left, XMVectorNegate( AxisX ) ) ); + + // Right face: Max X boundary. Outward normal is +AxisX + XMVECTOR P_Right = XMVectorAdd( C, XMVectorMultiply( AxisX, Ex ) ); + XMStoreFloat4( &m_cachedPlanes[2], XMPlaneFromPointNormal( P_Right, AxisX ) ); + + // Bottom face: Min Y boundary. Outward normal is -AxisY + XMVECTOR P_Bottom = XMVectorSubtract( C, XMVectorMultiply( AxisY, Ey ) ); + XMStoreFloat4( &m_cachedPlanes[3], XMPlaneFromPointNormal( P_Bottom, XMVectorNegate( AxisY ) ) ); + + // Top face: Max Y boundary. Outward normal is +AxisY + XMVECTOR P_Top = XMVectorAdd( C, XMVectorMultiply( AxisY, Ey ) ); + XMStoreFloat4( &m_cachedPlanes[4], XMPlaneFromPointNormal( P_Top, AxisY ) ); + + // Far face: Max Z boundary. Outward normal is +AxisZ + XMVECTOR P_Far = XMVectorAdd( C, XMVectorMultiply( AxisZ, Ez ) ); + XMStoreFloat4( &m_cachedPlanes[5], XMPlaneFromPointNormal( P_Far, AxisZ ) ); +} private: // Helper to get frustum corners for AABB creation @@ -258,7 +377,7 @@ class Frustum BoundingSphere m_boundingSphere; BoundingOrientedBox m_orientedBox; - std::array m_cachedPlanes{}; // [0]=Left, [1]=Right, [2]=Bottom, [3]=Top, [4]=Near, [5]=Far + std::array m_cachedPlanes{}; bool m_useSphere = false; bool m_useBoundingOrientedBox = false; bool m_always_containing = false; diff --git a/D3D11Engine/GVegetationBox.cpp b/D3D11Engine/GVegetationBox.cpp index 253bd785..94f49944 100644 --- a/D3D11Engine/GVegetationBox.cpp +++ b/D3D11Engine/GVegetationBox.cpp @@ -306,7 +306,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { VegetationTexture->BindToPixelShader( 1 ); Engine::GAPI->GetRendererState().RasterizerState.CullMode = GothicRasterizerStateInfo::CM_CULL_NONE; - Engine::GAPI->GetRendererState().RasterizerState.SetDirty(); // Enable alpha-to-coverage @@ -314,7 +313,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); Engine::GAPI->GetRendererState().BlendState.BlendEnabled = false; Engine::GAPI->GetRendererState().BlendState.AlphaToCoverage = Engine::GAPI->GetRendererState().RendererSettings.VegetationAlphaToCoverage; - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } Engine::GraphicsEngine->SetActiveVertexShader( "VS_GrassInstanced" ); @@ -353,7 +351,6 @@ void GVegetationBox::RenderVegetation( const XMFLOAT3& eye ) { if ( Engine::GAPI->GetRendererState().RendererSettings.VegetationAlphaToCoverage ) { Engine::GAPI->GetRendererState().BlendState.SetDefault(); - Engine::GAPI->GetRendererState().BlendState.SetDirty(); } } diff --git a/D3D11Engine/GothicAPI.cpp b/D3D11Engine/GothicAPI.cpp index 46f6c57e..d3fb058a 100644 --- a/D3D11Engine/GothicAPI.cpp +++ b/D3D11Engine/GothicAPI.cpp @@ -47,6 +47,7 @@ // TODO: REMOVE THIS! #include "D3D11GraphicsEngine.h" +#include "D3D11TextureAtlasManager.h" #ifndef PUBLIC_RELEASE #define OPT_DBG_NOINLINE __declspec(noinline) @@ -779,11 +780,11 @@ void GothicAPI::ResetVobs() { AnimatedSkeletalVobs.clear(); // Delete light vobs - for ( auto const& it : VobLightMap ) { + for ( auto const& it : VobLights_Sorted ) { Engine::GraphicsEngine->OnVobRemovedFromWorld( it.first ); delete it.second; } - VobLightMap.clear(); + VobLights_Sorted.clear(); } /** Called when the game loaded a new level */ @@ -865,12 +866,11 @@ void GothicAPI::OnWorldLoaded() { zCTree* vobTree = oCGame::GetGame()->_zCSession_world->GetGlobalVobTree(); TraverseVobTree( vobTree ); - // Build instancing cache for the static vobs for each section - BuildStaticMeshInstancingCache(); - // Build vob info cache for the bsp-leafs BuildBspVobMapCache(); + // Build instancing cache for the static vobs for each section + BuildStaticMeshInstancingCache(); #ifdef BUILD_GOTHIC_1_08k if ( LoadedWorldInfo->CustomWorldLoaded ) { CreatezCPolygonsForSections(); @@ -911,6 +911,7 @@ void GothicAPI::OnWorldLoaded() { #endif _canClearVobsByVisual = false; + Engine::GraphicsEngine->OnWorldLoaded(); } void GothicAPI::LoadRendererWorldSettings( GothicRendererSettings& s ) { @@ -1178,7 +1179,6 @@ void GothicAPI::DrawWorldMeshNaive() { // Set up frustum for the camera RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); zCCamera::GetCamera()->Activate(); auto drawRadius = RendererState.RendererSettings.SkeletalMeshDrawRadius; @@ -1832,7 +1832,7 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { SkeletalVobInfo* svi = SkeletalVobMap[vob]; // Tell all dynamic lights that we removed a vob they could have cached - for ( auto& vlit : VobLightMap ) { + for ( auto& vlit : VobLights_Sorted ) { if ( vi && vlit.second->LightShadowBuffers ) vlit.second->LightShadowBuffers->OnVobRemovedFromWorld( vi ); @@ -1840,7 +1840,12 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { vlit.second->LightShadowBuffers->OnVobRemovedFromWorld( svi ); } - VobLightInfo* li = VobLightMap[static_cast(vob)]; + VobLightInfo* li = nullptr; + { + auto lit = VobLights_Sorted.find( static_cast(vob) ); + if ( lit != VobLights_Sorted.end() ) + li = lit->second; + } // Erase it from the particle-effect list auto pit = std::find( ParticleEffectVobs.begin(), ParticleEffectVobs.end(), vob ); @@ -1856,7 +1861,7 @@ void GothicAPI::OnRemovedVob( zCVob* vob, zCWorld* world ) { } // Erase it from the list of lights - VobLightMap.erase( static_cast(vob) ); + VobLights_Sorted.erase( static_cast(vob) ); // Remove from BSP-Cache std::vector* nodes = nullptr; @@ -3024,11 +3029,8 @@ void GothicAPI::DrawTransparencyVobs() { if ( !TransparencyVobs.empty() ) { // Setup alpha blending RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); RendererState.BlendState.SetAlphaBlending(); - RendererState.BlendState.SetDirty(); RendererState.DepthState.SetDefault(); - RendererState.DepthState.SetDirty(); } while ( !TransparencyVobs.empty() ) { @@ -3114,11 +3116,8 @@ void GothicAPI::DrawSkeletalVN() { SkeletalVobInfo* vi = VNSkeletalVobs.back(); RendererState.RasterizerState.SetDefault(); - RendererState.RasterizerState.SetDirty(); RendererState.BlendState.SetAlphaBlending(); - RendererState.BlendState.SetDirty(); RendererState.DepthState.SetDefault(); - RendererState.DepthState.SetDirty(); D3D11GraphicsEngine* g = reinterpret_cast(Engine::GraphicsEngine); @@ -4038,7 +4037,7 @@ void GothicAPI::CollectVisibleVobs( ctx.drawDistances.OutdoorVobsSmall = RendererState.RendererSettings.OutdoorSmallVobDrawRadius; ctx.drawDistances.IndoorVobs = RendererState.RendererSettings.IndoorVobDrawRadius; ctx.drawDistances.VisualFX = RendererState.RendererSettings.VisualFXDrawRadius; - CollectVisibleVobs( ctx ); + CollectVisibleVobs( ctx, collectFlags ); if ( RendererState.RendererSettings.SortRenderQueue ) { struct SortableVob { @@ -4081,6 +4080,7 @@ void GothicAPI::CollectVisibleVobs( // they should be unique at this point. if ( collectFlags & COLLECT_MUTATE ) { + for ( auto it : renderQueue.vobs ) { VobInstanceInfo vii = {}; vii.world = it->WorldMatrix; @@ -4272,14 +4272,14 @@ std::vector::iterator GothicAPI::MoveVobFromBspToDynamic( VobInfo* vob static void CVVH_AddNotDrawnVobToList( std::vector& source, - float dist, + float distSq, const RndCullContext& ctx, DirectX::ContainmentType bspContainment, BspTreeVobVisitor* visitor ) { const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); - auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs; - auto distSq = dist * dist; + auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs + && ctx.frustum.SupportsCulling(); for ( auto const& it : source ) { if ( it->VisibleInRenderPass ) continue; @@ -4306,13 +4306,14 @@ static void CVVH_AddNotDrawnVobToList( static void CVVH_AddNotDrawnVobToList( std::vector& source, - float dist, const RndCullContext& ctx, + float distSq, const RndCullContext& ctx, DirectX::ContainmentType bspContainment, BspTreeVobVisitor* visitor) { const auto camPos = XMLoadFloat3( &ctx.cameraPosition ); - auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs; - auto vDistSq = XMVectorReplicate( dist * dist ); + auto cullingEnabled = Engine::GAPI->GetRendererState().RendererSettings.DebugSettings.Culling.CullVobs + && ctx.frustum.SupportsCulling(); + auto vDistSq = XMVectorReplicate( distSq ); for ( auto const& it : source ) { if ( it->VisibleInRenderPass ) continue; @@ -4401,12 +4402,12 @@ void GothicAPI::BuildBspVobMapCacheHelper( zCBspBase* base ) { for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; - // Add the light to the map if not already done - auto vit = VobLightMap.find( vob ); - if ( vit == VobLightMap.end() ) { + // Add the light to the sorted vector if not already done + auto [vit, inserted] = VobLights_Sorted.insert( vob, nullptr ); + if ( inserted ) { VobLightInfo* vi = new VobLightInfo; vi->Vob = vob; - VobLightMap[vob] = vi; + vit->second = vi; float minDynamicUpdateLightRange = Engine::GAPI->GetRendererState().RendererSettings.MinLightShadowUpdateRange; if ( RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY @@ -5726,23 +5727,27 @@ static void CollectVisibleVobsHelper( BspInfo* base, const RndCullContext& ctx, BspTreeVobVisitor* visitor, DirectX::ContainmentType inheritedContainment, - float yMaxWorld + float yMaxWorld, + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE ) { - const float vobIndoorDist = ctx.drawDistances.IndoorVobs; - const float vobOutdoorDist = ctx.drawDistances.OutdoorVobs; - const float vobOutdoorSmallDist = ctx.drawDistances.OutdoorVobsSmall; + const float vobIndoorDistSq = ctx.drawDistances.IndoorVobs * ctx.drawDistances.IndoorVobs; + const float vobOutdoorDistSq = ctx.drawDistances.OutdoorVobs * ctx.drawDistances.OutdoorVobs; + const float vobOutdoorSmallDistSq = ctx.drawDistances.OutdoorVobsSmall * ctx.drawDistances.OutdoorVobsSmall; + const float visualFXDrawRadius = ctx.drawDistances.VisualFX; + const float visualFXDrawRadiusSq = ctx.drawDistances.VisualFX * ctx.drawDistances.VisualFX; + const XMFLOAT3 camPos = ctx.cameraPosition; const FXMVECTOR cameraPosition = XMLoadFloat3( &camPos ); - EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE; int clipFlags = EGothicCullFlags::CullSidesNear; if ( ctx.stage == RenderStage::STAGE_DRAW_SHADOWS ) { - collectFlags = EBspTreeCollectFlags::COLLECT_VOBS; clipFlags = EGothicCullFlags::CullSidesNear; } + const bool checkDist = (collectFlags & COLLECT_DISABLE_CHECK_DIST) == 0; + const auto& RendererState = Engine::GAPI->GetRendererState(); - auto& VobLightMap = Engine::GAPI->VobLightMap; + auto& VobLights = Engine::GAPI->VobLights_Sorted; while ( base->OriginalNode ) { // Check for occlusion-culling if ( RendererState.RendererSettings.EnableOcclusionCulling && !base->OcclusionInfo.VisibleLastFrame ) { @@ -5754,9 +5759,11 @@ static void CollectVisibleVobsHelper( BspInfo* base, nodeYMax = std::max( nodeYMax, base->OriginalNode->BBox3D.Max.y ); nodeBox.Max.y = nodeYMax; - float dist = Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ); + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + : 0; ContainmentType clipResult = inheritedContainment; - if ( dist < vobOutdoorDist ) { + if ( distSq < vobOutdoorDistSq ) { if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { if ( clipResult != ContainmentType::CONTAINS ) { clipResult = ctx.frustum.Contains( Frustum::BBoxFromzTBBox3D( nodeBox ) ); @@ -5795,35 +5802,40 @@ static void CollectVisibleVobsHelper( BspInfo* base, std::vector& listC = base->Vobs; std::vector& listD = base->Mobs; - const float dist = Toolbox::ComputePointAABBDistance( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ); + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, base->OriginalNode->BBox3D.Min, base->OriginalNode->BBox3D.Max ) + : 0; if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { - if ( collectFlags & COLLECT_INDOOR_VOBS && dist < vobIndoorDist ) { - CVVH_AddNotDrawnVobToList( listA, vobIndoorDist, ctx, clipResult, visitor ); + if ( collectFlags & COLLECT_INDOOR_VOBS && distSq < vobIndoorDistSq ) { + CVVH_AddNotDrawnVobToList( listA, vobIndoorDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( listB, vobOutdoorSmallDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( listB, vobOutdoorSmallDistSq, ctx, clipResult, visitor ); } - if ( dist < vobOutdoorDist ) { - CVVH_AddNotDrawnVobToList( listC, vobOutdoorDist, ctx, clipResult, visitor ); + if ( distSq < vobOutdoorDistSq ) { + CVVH_AddNotDrawnVobToList( listC, vobOutdoorDistSq, ctx, clipResult, visitor ); } } if ( collectFlags & COLLECT_MOBS - && RendererState.RendererSettings.DrawMobs && dist < vobOutdoorSmallDist ) { - CVVH_AddNotDrawnVobToList( listD, vobOutdoorDist, ctx, clipResult, visitor); + && RendererState.RendererSettings.DrawMobs && distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( listD, vobOutdoorDistSq, ctx, clipResult, visitor); } if ( collectFlags & COLLECT_LIGHTS - && RendererState.RendererSettings.EnableDynamicLighting && dist < visualFXDrawRadius ) { + && RendererState.RendererSettings.EnableDynamicLighting && distSq < visualFXDrawRadiusSq ) { // Add dynamic lights for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { zCVobLight* vob = leaf->LightVobList.Array[i]; - const float lightCameraDist = XMVectorGetX( XMVector3Length( cameraPosition - vob->GetPositionWorldXM() ) ); + const float lightCameraDist = checkDist + ? XMVectorGetX( XMVector3Length( cameraPosition - vob->GetPositionWorldXM() ) ) + : 0; + if ( lightCameraDist + vob->GetLightRange() < visualFXDrawRadius ) { BoundingSphere lightSphere; @@ -5835,9 +5847,9 @@ static void CollectVisibleVobsHelper( BspInfo* base, continue; } - // Check if we already have this light - auto vit = VobLightMap.find( vob ); - if ( vit == VobLightMap.end() ) { + // Check if we already have this light, insert if new + auto [vit, inserted] = VobLights.insert( vob, nullptr ); + if ( inserted ) { bool PFXVobLight = false; if ( zCVob* parent = vob->GetVobParent() ) { if ( parent->As() ) { @@ -5845,12 +5857,12 @@ static void CollectVisibleVobsHelper( BspInfo* base, } } - // Add if not. This light must have been added during gameplay + // This light must have been added during gameplay VobLightInfo* vi = new VobLightInfo; vi->Vob = vob; vi->IsPFXVobLight = PFXVobLight; vi->UpdateShadows = !PFXVobLight; - vit = VobLightMap.emplace( vob, vi ).first; + vit->second = vi; // Create shadow-buffers for these lights since it was dynamically added to the world if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) @@ -5874,15 +5886,17 @@ static void CollectVisibleVobsHelper( BspInfo* base, boxCell.Max.y = node->BBox3D.Min.y; zTBBox3D tmpbox = boxCell; - float plane_normal; - XMStoreFloat( &plane_normal, XMVector3Dot( XMLoadFloat3( &node->Plane.Normal ), cameraPosition ) ); + float plane_normal = FLT_MAX; + if ( checkDist ) XMStoreFloat( &plane_normal, XMVector3Dot( XMLoadFloat3( &node->Plane.Normal ), cameraPosition ) ); + if ( plane_normal > node->Plane.Distance ) { if ( node->Front ) { reinterpret_cast(&tmpbox.Min)[planeAxis] = node->Plane.Distance; CollectVisibleVobsHelper( base->Front, tmpbox, ctx, visitor, clipResult, - yMaxWorld ); + yMaxWorld, + collectFlags); } reinterpret_cast(&boxCell.Max)[planeAxis] = node->Plane.Distance; @@ -5894,7 +5908,8 @@ static void CollectVisibleVobsHelper( BspInfo* base, CollectVisibleVobsHelper( base->Back, tmpbox, ctx, visitor, clipResult, - yMaxWorld ); + yMaxWorld, + collectFlags ); } reinterpret_cast(&boxCell.Min)[planeAxis] = node->Plane.Distance; @@ -5905,7 +5920,196 @@ static void CollectVisibleVobsHelper( BspInfo* base, } } -void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { +struct BspTraversalNode { + BspInfo* base; + zTBBox3D boxCell; + DirectX::ContainmentType inheritedContainment; +}; + +static void CollectVisibleVobsHelperNonRecursive( BspInfo* base, + zTBBox3D boxCell, + const RndCullContext& ctx, + BspTreeVobVisitor* visitor, + DirectX::ContainmentType inheritedContainment, + float yMaxWorld, + EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE +) { + const float vobIndoorDistSq = ctx.drawDistances.IndoorVobs * ctx.drawDistances.IndoorVobs; + const float vobOutdoorDistSq = ctx.drawDistances.OutdoorVobs * ctx.drawDistances.OutdoorVobs; + const float vobOutdoorSmallDistSq = ctx.drawDistances.OutdoorVobsSmall * ctx.drawDistances.OutdoorVobsSmall; + + const float visualFXDrawRadius = ctx.drawDistances.VisualFX; + const float visualFXDrawRadiusSq = ctx.drawDistances.VisualFX * ctx.drawDistances.VisualFX; + const XMFLOAT3 camPos = ctx.cameraPosition; + + const bool checkDist = (collectFlags & COLLECT_DISABLE_CHECK_DIST) == 0; + + // Cache globals outside the traversal loop to prevent redundant memory fetches + const auto& RendererState = Engine::GAPI->GetRendererState(); + auto& VobLights = Engine::GAPI->VobLights_Sorted; + + // Pre-allocate a small stack to eliminate recursion entirely + // 64 is exceptionally deep for a BSP tree, ensuring we won't overflow + BspTraversalNode stack[64]; + int stackPtr = 0; + + stack[stackPtr++] = { base, boxCell, inheritedContainment }; + + while ( stackPtr > 0 ) { + BspTraversalNode current = stack[--stackPtr]; + BspInfo* currBase = current.base; + zTBBox3D currBox = current.boxCell; + ContainmentType clipResult = current.inheritedContainment; + + // The original tail-recursion loop + while ( currBase && currBase->OriginalNode ) { + + if ( RendererState.RendererSettings.EnableOcclusionCulling && !currBase->OcclusionInfo.VisibleLastFrame ) { + break; // Proceed to next item in the stack + } + + zTBBox3D nodeBox = currBase->OriginalNode->BBox3D; + float nodeYMax = std::min( yMaxWorld, camPos.y ); + nodeYMax = std::max( nodeYMax, currBase->OriginalNode->BBox3D.Max.y ); + nodeBox.Max.y = nodeYMax; + + const float distSq = checkDist + ? Toolbox::ComputePointAABBDistanceSq( camPos, currBase->OriginalNode->BBox3D.Min, currBase->OriginalNode->BBox3D.Max ) + : 0; + + if ( distSq < vobOutdoorDistSq ) { + if ( !RendererState.RendererSettings.EnableOcclusionCulling ) { + if ( clipResult != ContainmentType::CONTAINS ) { + clipResult = ctx.frustum.Contains( Frustum::BBoxFromzTBBox3D( nodeBox ) ); + } + } else { + switch ( static_cast( currBase->OcclusionInfo.LastCameraClipType ) ) { + case zTCam_ClipType::ZTCAM_CLIPTYPE_IN: clipResult = ContainmentType::CONTAINS; break; + case zTCam_ClipType::ZTCAM_CLIPTYPE_CROSSING: clipResult = ContainmentType::INTERSECTS; break; + case zTCam_ClipType::ZTCAM_CLIPTYPE_OUT: clipResult = ContainmentType::DISJOINT; break; + } + } + + if ( clipResult == ContainmentType::DISJOINT ) { + break; + } + } else { + break; // Too far + } + + if ( currBase->OriginalNode->IsLeaf() ) { + zCBspLeaf* leaf = static_cast(currBase->OriginalNode); + + if ( collectFlags & COLLECT_VOBS && RendererState.RendererSettings.DrawVOBs ) { + if ( collectFlags & COLLECT_INDOOR_VOBS && distSq < vobIndoorDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->IndoorVobs, vobIndoorDistSq, ctx, clipResult, visitor ); + } + if ( distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->SmallVobs, vobOutdoorSmallDistSq, ctx, clipResult, visitor ); + } + if ( distSq < vobOutdoorDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->Vobs, vobOutdoorDistSq, ctx, clipResult, visitor ); + } + } + + if ( collectFlags & COLLECT_MOBS && RendererState.RendererSettings.DrawMobs && distSq < vobOutdoorSmallDistSq ) { + CVVH_AddNotDrawnVobToList( currBase->Mobs, vobOutdoorDistSq, ctx, clipResult, visitor ); + } + + if ( collectFlags & COLLECT_LIGHTS && RendererState.RendererSettings.EnableDynamicLighting && distSq < visualFXDrawRadiusSq ) { + for ( int i = 0; i < leaf->LightVobList.NumInArray; i++ ) { + zCVobLight* vob = leaf->LightVobList.Array[i]; + + // Avoid square root by using squared distances + bool inRange = false; + if ( checkDist ) { + float range = vob->GetLightRange(); + float threshold = visualFXDrawRadius - range; + + if ( threshold > 0.0f ) { + XMFLOAT3 vobPos = vob->GetPositionWorld(); + float dx = camPos.x - vobPos.x; + float dy = camPos.y - vobPos.y; + float dz = camPos.z - vobPos.z; + float distSq = dx * dx + dy * dy + dz * dz; + inRange = distSq < (threshold * threshold); + } + } else { + inRange = true; + } + + if ( inRange ) { + BoundingSphere lightSphere; + lightSphere.Center = vob->GetPositionWorld(); + lightSphere.Radius = vob->GetLightRange(); + + if ( clipResult != ContainmentType::CONTAINS && !ctx.frustum.Intersects( lightSphere ) ) { + continue; + } + + auto [vit, inserted] = VobLights.insert( vob, nullptr ); + if ( inserted ) { + bool PFXVobLight = false; + if ( zCVob* parent = vob->GetVobParent() ) { + if ( parent->As() ) PFXVobLight = true; + } + + VobLightInfo* vi = new VobLightInfo; + vi->Vob = vob; + vi->IsPFXVobLight = PFXVobLight; + vi->UpdateShadows = !PFXVobLight; + vit->second = vi; + + if ( !vi->IsPFXVobLight && RendererState.RendererSettings.EnablePointlightShadows >= GothicRendererSettings::PLS_STATIC_ONLY ) + Engine::GraphicsEngine->CreateShadowedPointLight( &vi->LightShadowBuffers, vi, true ); + } + + VobLightInfo* vi = vit->second; + if ( vi->VisibleInRenderPass ) continue; + visitor->Visit( vi ); + ctx.queue->PushLightVob( vi ); + } + } + } + break; // Break the inner tail-recursion loop to pop the next stack item + } else { + zCBspNode* node = static_cast(currBase->OriginalNode); + int planeAxis = node->PlaneSignbits; + + currBox.Min.y = node->BBox3D.Min.y; + currBox.Max.y = node->BBox3D.Max.y; + + zTBBox3D tmpbox = currBox; + float plane_normal = FLT_MAX; + + // Scalar math to avoid Load-Hit-Store SIMD stalls + if ( checkDist ) { + plane_normal = (node->Plane.Normal.x * camPos.x) + + (node->Plane.Normal.y * camPos.y) + + (node->Plane.Normal.z * camPos.z); + } + + if ( plane_normal > node->Plane.Distance ) { + if ( node->Front ) { + reinterpret_cast(&tmpbox.Min)[planeAxis] = node->Plane.Distance; + stack[stackPtr++] = { currBase->Front, tmpbox, clipResult }; + } + reinterpret_cast(&currBox.Max)[planeAxis] = node->Plane.Distance; + currBase = currBase->Back; + } else { + if ( node->Back ) { + reinterpret_cast(&tmpbox.Max)[planeAxis] = node->Plane.Distance; + stack[stackPtr++] = { currBase->Back, tmpbox, clipResult }; + } + reinterpret_cast(&currBox.Min)[planeAxis] = node->Plane.Distance; + currBase = currBase->Front; + } + } + } + } +} + +void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx, EBspTreeCollectFlags collectFlags ) { zCBspTree* tree = LoadedWorldInfo->BspTree; zCBspBase* rootBsp = tree->GetRootNode(); @@ -5914,11 +6118,12 @@ void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { static thread_local BspTreeVobVisitor bspVobVisitor{}; // Recursively go through the tree and draw all nodes - CollectVisibleVobsHelper( root, root->OriginalNode->BBox3D, + CollectVisibleVobsHelperNonRecursive( root, root->OriginalNode->BBox3D, ctx, &bspVobVisitor, ContainmentType::INTERSECTS, - Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetRootNode()->BBox3D.Max.y + Engine::GAPI->GetLoadedWorldInfo()->BspTree->GetRootNode()->BBox3D.Max.y, + collectFlags ); FXMVECTOR camPos = XMLoadFloat3( &ctx.cameraPosition ); @@ -5932,7 +6137,8 @@ void GothicAPI::CollectVisibleVobs( const RndCullContext& ctx ) { std::list removeList; // TODO: This should not be needed! // Add visible dynamically added vobs - if ( RendererState.RendererSettings.DrawVOBs ) { + if ( RendererState.RendererSettings.DrawVOBs + && (collectFlags & EBspTreeCollectFlags::COLLECT_DYNAMIC_VOBS)) { float dist; for ( VobInfo* it : DynamicallyAddedVobs ) { if ( it->VisibleInRenderPass ) continue; diff --git a/D3D11Engine/GothicAPI.h b/D3D11Engine/GothicAPI.h index e923a467..8c5a7c49 100644 --- a/D3D11Engine/GothicAPI.h +++ b/D3D11Engine/GothicAPI.h @@ -46,15 +46,17 @@ struct RndCullContext { }; enum EBspTreeCollectFlags : unsigned int { - COLLECT_VOBS = 1 << 0, + COLLECT_VOBS = 1 << 0, // static vobs COLLECT_LIGHTS = 1 << 1, - COLLECT_MOBS = 1 << 2, - COLLECT_INDOOR_VOBS = 1 << 3, + COLLECT_MOBS = 1 << 2, // skeletal mobs + COLLECT_INDOOR_VOBS = 1 << 3, // indoor vobs + COLLECT_DYNAMIC_VOBS = 1 << 4, // dynamic static / transparent vobs - COLLECT_ALL_VOBS = COLLECT_VOBS | COLLECT_INDOOR_VOBS, + COLLECT_ALL_VOBS = COLLECT_VOBS | COLLECT_INDOOR_VOBS | COLLECT_DYNAMIC_VOBS, + COLLECT_DISABLE_CHECK_DIST = 1 << 29, COLLECT_MUTATE = 1 << 30, - COLLECT_ALL_MUTATE = 0xFFFFFFFF, + COLLECT_ALL_MUTATE = 0xFFFFFFFF & ~(COLLECT_DISABLE_CHECK_DIST), COLLECT_ALL_NO_MUTATE = COLLECT_ALL_MUTATE & ~COLLECT_MUTATE, }; @@ -206,6 +208,54 @@ class GVegetationBox; class zCMorphMesh; class zCDecal; +// Minimal flat-map: always-sorted vector of pairs for O(log n) binary-search lookups. +// All methods inline to the same lower_bound calls — zero overhead over hand-written code. +template +struct SortedPairVector { + using Entry = std::pair; + using iterator = typename std::vector::iterator; + using const_iterator = typename std::vector::const_iterator; + + iterator begin() { return m_data.begin(); } + iterator end() { return m_data.end(); } + const_iterator begin() const { return m_data.begin(); } + const_iterator end() const { return m_data.end(); } + + // Binary search for key. Returns end() if not found. + __forceinline iterator find( Key key ) { + auto it = std::lower_bound( m_data.begin(), m_data.end(), key, Cmp{} ); + return (it != m_data.end() && it->first == key) ? it : m_data.end(); + } + + // Insert {key, value} maintaining sort order. If key already exists, does nothing. + // Returns {iterator_to_element, true_if_newly_inserted}. + __forceinline std::pair insert( Key key, Value value ) { + auto it = std::lower_bound( m_data.begin(), m_data.end(), key, Cmp{} ); + if ( it != m_data.end() && it->first == key ) + return { it, false }; + return { m_data.insert( it, { key, value } ), true }; + } + + // Erase by key. Returns true if found and erased. + __forceinline bool erase( Key key ) { + auto it = find( key ); + if ( it == m_data.end() ) return false; + m_data.erase( it ); + return true; + } + + void clear() { m_data.clear(); } + bool empty() const { return m_data.empty(); } + size_t size() const { return m_data.size(); } + void reserve( size_t n ) { m_data.reserve( n ); } + +private: + struct Cmp { + bool operator()( const Entry& a, Key k ) const { return a.first < k; } + }; + std::vector m_data; +}; + class GothicAPI { public: GothicAPI(); @@ -216,8 +266,8 @@ class GothicAPI { /** Call to OnRemoveVob(player) and OnAddVob(player) in case of invisibility */ void ReloadPlayerVob(); - inline std::string GetGameName() { return m_gameName; } - inline void SetGameName( std::string value ) { m_gameName = value; } + inline const std::string& GetGameName() const { return m_gameName; } + inline void SetGameName( std::string value ) { m_gameName = std::move(value); } /** Called when the game starts */ void OnGameStart(); @@ -526,7 +576,7 @@ class GothicAPI { EGothicCullFlags cullFlags = EGothicCullFlags::CullAll, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_MUTATE); - void CollectVisibleVobs( const RndCullContext& ctx ); + void CollectVisibleVobs(const RndCullContext& ctx, EBspTreeCollectFlags collectFlags = EBspTreeCollectFlags::COLLECT_ALL_NO_MUTATE); /** Collects visible sections from the current camera perspective */ void CollectVisibleSections( std::vector& sections ); @@ -746,7 +796,6 @@ class GothicAPI { float GetSkyTimeScale(); static void ProcessVobAnimation( zCVob* vob, zTAnimationMode aniMode, VobInstanceInfo& vobInstance ); - private: /** Collects polygons in the given AABB */ void CollectPolygonsInAABBRec( BspInfo* base, const zTBBox3D& bbox, std::vector& list ); @@ -836,7 +885,8 @@ class GothicAPI { std::unordered_map VobMap; public: // temporarily, to allow CollectVisibleVobsHelper to be templated for inlining optimizations - std::unordered_map VobLightMap; + // Sorted by zCVobLight* for binary-search lookups + SortedPairVector VobLights_Sorted; private: std::unordered_map SkeletalVobMap; diff --git a/D3D11Engine/GothicGraphicsState.h b/D3D11Engine/GothicGraphicsState.h index 02a6f1ea..94483799 100644 --- a/D3D11Engine/GothicGraphicsState.h +++ b/D3D11Engine/GothicGraphicsState.h @@ -117,18 +117,13 @@ struct GothicGraphicsState { }; __declspec(align(4)) struct GothicPipelineState { - /** Sets this state dirty, which means that it will be updated before next rendering */ - void SetDirty() { - StateDirty = true; - HashThis( reinterpret_cast(this), StructSize ); - } - - /** Hashes the whole struct */ - void HashThis( char* data, int size ) { + /** Recomputes the hash from current state data. Called automatically by UpdateRenderStates(). */ + void ComputeHash() { Hash = 0; - // Start hashing at the data of the other structs, skip the data of this one - for ( int i = sizeof( GothicPipelineState ); i < size; i += 4 ) { + // Hash the derived struct data, skipping the base GothicPipelineState fields + char* data = reinterpret_cast(this); + for ( int i = sizeof( GothicPipelineState ); i < StructSize; i += 4 ) { DWORD d; memcpy( &d, data + i, 4 ); @@ -140,7 +135,6 @@ __declspec(align(4)) struct GothicPipelineState { return Hash == o.Hash; } - bool StateDirty; size_t Hash; int StructSize; }; @@ -220,7 +214,6 @@ struct GothicDepthBufferStateInfo : public GothicPipelineState { c.DepthWriteEnabled = DepthWriteEnabled; c.DepthBufferCompareFunc = DepthBufferCompareFunc; - c.StateDirty = StateDirty; c.Hash = Hash; c.StructSize = StructSize; return c; @@ -232,7 +225,6 @@ struct GothicDepthBufferStateInfo : public GothicPipelineState { c.DepthBufferCompareFunc = DepthBufferCompareFunc; c.StructSize = StructSize; - c.SetDirty(); } }; @@ -371,7 +363,6 @@ struct GothicBlendStateInfo : public GothicPipelineState { c.AlphaToCoverage = AlphaToCoverage; c.ColorWritesEnabled = ColorWritesEnabled; - c.StateDirty = StateDirty; c.Hash = Hash; c.StructSize = StructSize; return c; @@ -389,7 +380,6 @@ struct GothicBlendStateInfo : public GothicPipelineState { c.ColorWritesEnabled = ColorWritesEnabled; c.StructSize = StructSize; - c.SetDirty(); } }; @@ -632,6 +622,7 @@ struct GothicRendererSettings { WireframeVobs = false; WireframeWorld = false; DrawShadowGeometry = true; + UseIndirectVobShadows = false; FixViewFrustum = false; DisableWatermark = true; DisableRendering = false; @@ -756,6 +747,7 @@ struct GothicRendererSettings { DebugSettings.Culling.CullBspSections = true; DebugSettings.Culling.CullVobs = true; DebugSettings.ShadowCascades.LazyCascadeUpdate = true; + DebugSettings.ShadowCascades.ExtendBack = 600; } void SetupOldWorldSpecificValues() { @@ -813,6 +805,7 @@ struct GothicRendererSettings { int ShadowCascadePCFLimit; E_ShadowFrustumCulling ShadowFrustumCullingMode; bool DrawShadowGeometry; + bool UseIndirectVobShadows; bool VegetationAlphaToCoverage; bool DisableWatermark; bool DisableRendering; @@ -944,6 +937,8 @@ struct GothicRendererSettings { struct { bool UseMDI; bool UseLayeredRendering; + bool EnableAtlasStaticVobs; + bool EnableAtlasWorldMesh; } FeatureSet; } DebugSettings; }; @@ -1070,10 +1065,10 @@ struct GothicRendererState { TransformState.SetDefault(); RendererSettings.SetDefault(); - DepthState.SetDirty(); - BlendState.SetDirty(); - RasterizerState.SetDirty(); - SamplerState.SetDirty(); + DepthState.ComputeHash(); + BlendState.ComputeHash(); + RasterizerState.ComputeHash(); + SamplerState.ComputeHash(); } GothicDepthBufferStateInfo DepthState; diff --git a/D3D11Engine/ImGuiShim.cpp b/D3D11Engine/ImGuiShim.cpp index 1067cff8..27b0dac8 100644 --- a/D3D11Engine/ImGuiShim.cpp +++ b/D3D11Engine/ImGuiShim.cpp @@ -1211,6 +1211,7 @@ void RenderAdvancedColumn2( GothicRendererSettings& settings, GothicAPI* gapi ) if (ImGui::BeginTabItem("Shadows", nullptr, ImGuiTabItemFlags_::ImGuiTabItemFlags_NoReorder)) { ImGui::Checkbox("Lazy update", &settings.DebugSettings.ShadowCascades.LazyCascadeUpdate ); ImGui::SetItemTooltip("Update last cascades less frequently to save performance, may cause uneven frametimes"); + ImGui::Checkbox("Indirect", &settings.UseIndirectVobShadows ); ImGui::SliderFloat("Extend Back", &settings.DebugSettings.ShadowCascades.ExtendBack, -10000, 50000, "%.0f"); ImGui::SliderFloat("Extend Front", &settings.DebugSettings.ShadowCascades.ExtendFront, -10000, 50000, "%.0f"); @@ -1229,6 +1230,10 @@ void RenderAdvancedColumn2( GothicRendererSettings& settings, GothicAPI* gapi ) if (ImGui::BeginTabItem("Featureset", nullptr, ImGuiTabItemFlags_::ImGuiTabItemFlags_NoReorder)) { ImGui::Checkbox("Use MDI", &settings.DebugSettings.FeatureSet.UseMDI ); ImGui::Checkbox("Use Layered Drawing", &settings.DebugSettings.FeatureSet.UseLayeredRendering ); + ImGui::Checkbox("Atlas Static Vobs", &settings.DebugSettings.FeatureSet.EnableAtlasStaticVobs ); + ImGui::SetItemTooltip("Enable texture atlas based rendering for static vobs (experimental, requires world reload)"); + ImGui::Checkbox("Atlas World Mesh", &settings.DebugSettings.FeatureSet.EnableAtlasWorldMesh ); + ImGui::SetItemTooltip("Enable texture atlas based rendering for world mesh (experimental, requires world reload)"); ImGui::EndTabItem(); } diff --git a/D3D11Engine/RenderToTextureBuffer.h b/D3D11Engine/RenderToTextureBuffer.h index 828b2802..683aa19c 100644 --- a/D3D11Engine/RenderToTextureBuffer.h +++ b/D3D11Engine/RenderToTextureBuffer.h @@ -13,7 +13,7 @@ struct RenderToTextureBuffer { } /** Creates the render-to-texture buffers */ - RenderToTextureBuffer( const Microsoft::WRL::ComPtr& device, + RenderToTextureBuffer( ID3D11Device* device, UINT SizeX, UINT SizeY, DXGI_FORMAT Format, diff --git a/D3D11Engine/SMAA/D3D11SMAA.cpp b/D3D11Engine/SMAA/D3D11SMAA.cpp index 0747a2be..284ed2dc 100644 --- a/D3D11Engine/SMAA/D3D11SMAA.cpp +++ b/D3D11Engine/SMAA/D3D11SMAA.cpp @@ -67,24 +67,8 @@ bool D3D11SMAA::Init() sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; // Point filter m_device->CreateSamplerState(&sampDesc, m_samplerPoint.GetAddressOf()); - // 5. Create Helper States - D3D11_RASTERIZER_DESC rasterDesc = {}; - rasterDesc.FillMode = D3D11_FILL_SOLID; - rasterDesc.CullMode = D3D11_CULL_NONE; - rasterDesc.DepthClipEnable = true; - m_device->CreateRasterizerState(&rasterDesc, m_rasterizerState.GetAddressOf()); - - D3D11_DEPTH_STENCIL_DESC dsDesc = {}; - dsDesc.DepthEnable = FALSE; - dsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; - dsDesc.DepthFunc = D3D11_COMPARISON_ALWAYS; - m_device->CreateDepthStencilState(&dsDesc, m_disableDepthState.GetAddressOf()); - - // Default blend state (Opaque/Overwrite) - D3D11_BLEND_DESC blendDesc = {}; - blendDesc.RenderTarget[0].BlendEnable = FALSE; - blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - m_device->CreateBlendState(&blendDesc, m_blendState.GetAddressOf()); + // Note: Rasterizer, depth-stencil, and blend states are managed by the caller + // through the Gothic state tracking system. return true; } @@ -131,9 +115,8 @@ void D3D11SMAA::Render(ID3D11ShaderResourceView* inputSRV, // Common State Setup m_context->IASetInputLayout(nullptr); // Using VertexID generation m_context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - m_context->RSSetState(m_rasterizerState.Get()); - m_context->OMSetDepthStencilState(m_disableDepthState.Get(), 0); - m_context->OMSetBlendState(m_blendState.Get(), nullptr, 0xFFFFFFFF); + // Note: Rasterizer, depth-stencil, and blend states are configured by the caller + // through the Gothic state tracking system (Engine::GAPI->GetRendererState()). ID3D11SamplerState* samplers[] = { m_samplerLinear.Get(), m_samplerPoint.Get() }; m_context->PSSetSamplers(0, 2, samplers); @@ -220,7 +203,4 @@ void D3D11SMAA::ReleaseResources() { m_constantBuffer.Reset(); m_samplerLinear.Reset(); m_samplerPoint.Reset(); - m_rasterizerState.Reset(); - m_disableDepthState.Reset(); - m_blendState.Reset(); } diff --git a/D3D11Engine/SMAA/D3D11SMAA.h b/D3D11Engine/SMAA/D3D11SMAA.h index 6beece45..3339b469 100644 --- a/D3D11Engine/SMAA/D3D11SMAA.h +++ b/D3D11Engine/SMAA/D3D11SMAA.h @@ -68,9 +68,6 @@ class D3D11SMAA { Microsoft::WRL::ComPtr m_constantBuffer; Microsoft::WRL::ComPtr m_samplerLinear; Microsoft::WRL::ComPtr m_samplerPoint; - Microsoft::WRL::ComPtr m_rasterizerState; - Microsoft::WRL::ComPtr m_disableDepthState; - Microsoft::WRL::ComPtr m_blendState; // Default (overwrite off) int m_width; int m_height; diff --git a/D3D11Engine/Shaders/CS_BuildHiZ.hlsl b/D3D11Engine/Shaders/CS_BuildHiZ.hlsl new file mode 100644 index 00000000..bfdb97e4 --- /dev/null +++ b/D3D11Engine/Shaders/CS_BuildHiZ.hlsl @@ -0,0 +1,57 @@ +//-------------------------------------------------------------------------------------- +// Hi-Z Pyramid Build Compute Shader +// Builds a MAX-depth mip chain for hierarchical occlusion culling (reversed-Z). +// Each mip texel stores the NEAREST depth (highest reversed-Z) in its 2x2 source region. +// Mip 0: copy from depth buffer. +// Mip N>0: 2x2 MAX downsample from previous mip. +// +// D3D11 forbids binding the same resource as both SRV and UAV, so we use a +// scratch texture as the UAV target, then CopySubresourceRegion into the +// real Hi-Z texture after each dispatch. +//-------------------------------------------------------------------------------------- + +cbuffer HiZCB : register( b0 ) +{ + uint outputWidth; + uint outputHeight; + uint inputMipLevel; + uint isCopyPass; // 1 = mip 0 (copy from depth), 0 = downsample +}; + +Texture2D InputTexture : register( t0 ); +RWTexture2D OutputTexture : register( u0 ); + +[numthreads( 8, 8, 1 )] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ + if ( DTid.x >= outputWidth || DTid.y >= outputHeight ) + return; + + if ( isCopyPass ) + { + // Mip 0: straight copy from the depth buffer (reversed-Z, so 0 = far) + OutputTexture[DTid.xy] = InputTexture.Load( int3( DTid.xy, 0 ) ); + } + else + { + // 2x2 MAX downsample from the previous mip level of the Hi-Z texture. + // With reversed-Z depth (near=1, far=0), we take the MAX to get the + // NEAREST (closest to camera) surface per tile. + // + // CS_CullVobs then takes the MIN across footprint texels of this MAX chain, + // finding the least-occluded tile in the AABB's screen projection. + // The test "maxDepth < hiZDepth" passes only when the AABB's nearest corner + // (maxDepth) is farther than the nearest occluder in every tile of the footprint. + // + // Using MIN here instead would collapse every tile touching the sky to ~0, + // making the test never fire since depth values are non-negative. + uint2 srcBase = DTid.xy * 2; + + float d00 = InputTexture.Load( int3( srcBase + uint2( 0, 0 ), inputMipLevel ) ); + float d10 = InputTexture.Load( int3( srcBase + uint2( 1, 0 ), inputMipLevel ) ); + float d01 = InputTexture.Load( int3( srcBase + uint2( 0, 1 ), inputMipLevel ) ); + float d11 = InputTexture.Load( int3( srcBase + uint2( 1, 1 ), inputMipLevel ) ); + + OutputTexture[DTid.xy] = max( max( d00, d10 ), max( d01, d11 ) ); + } +} diff --git a/D3D11Engine/Shaders/CS_CullVobs.hlsl b/D3D11Engine/Shaders/CS_CullVobs.hlsl new file mode 100644 index 00000000..f335313d --- /dev/null +++ b/D3D11Engine/Shaders/CS_CullVobs.hlsl @@ -0,0 +1,250 @@ +//-------------------------------------------------------------------------------------- +// GPU Frustum + Distance Culling Compute Shader +// Tests each vob AABB against 6 frustum planes + draw distance, +// writes visible instances to RWStructuredBuffer and atomically +// increments InstanceCount in the indirect args buffer. +//-------------------------------------------------------------------------------------- + +cbuffer CullCB : register( b0 ) +{ + float4 frustumPlanes[6]; + float3 cameraPosition; + float drawDistance; + float globalWindStrength; + uint windAdvanced; + uint numVobs; + uint feedbackFrameNumber; + uint enableHiZ; + uint hiZMipCount; + float hiZWidth; + float hiZHeight; + float4x4 viewProjection; +}; + +struct VobGPUData +{ + float3 aabbCenter; + float pad0; + float3 aabbExtent; + float pad1; + float4x4 world; + float4x4 prevWorld; + uint color; + float aniModeStrength; + float canBeAffectedByPlayer; + uint submeshStart; + uint submeshCount; + uint pad2[3]; +}; + +struct SubmeshGPUData +{ + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + uint argIndex; + uint instanceBaseOffset; + uint globalSourceIndex; +}; + +struct VobInstanceInfoAtlas +{ + float4x4 world; + float4x4 prevWorld; + uint color; + float windStrength; + float canBeAffectedByPlayer; + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + uint globalSourceIndex; +}; + +StructuredBuffer VobBuffer : register( t0 ); +StructuredBuffer SubmeshBuffer : register( t1 ); +Texture2D HiZTexture : register( t2 ); +RWStructuredBuffer InstanceOutput : register( u0 ); +RWByteAddressBuffer IndirectArgsUAV : register( u1 ); + +// GPU feedback for streaming: source-indexed RWTexture2D +// The CS stamps visible sources once per (vob, submesh) — orders of magnitude +// cheaper than per-pixel atomics in the pixel shader. +RWTexture2D FeedbackUAV : register( u5 ); + +// Hi-Z occlusion test: project AABB to screen, pick mip level, compare depth. +// Returns true if the AABB is OCCLUDED (should be culled). +bool IsOccludedHiZ( float3 aabbCenter, float3 aabbExtent ) +{ + // Generate all 8 corners of the AABB + float3 corners[8]; + corners[0] = aabbCenter + float3( -aabbExtent.x, -aabbExtent.y, -aabbExtent.z ); + corners[1] = aabbCenter + float3( aabbExtent.x, -aabbExtent.y, -aabbExtent.z ); + corners[2] = aabbCenter + float3( -aabbExtent.x, aabbExtent.y, -aabbExtent.z ); + corners[3] = aabbCenter + float3( aabbExtent.x, aabbExtent.y, -aabbExtent.z ); + corners[4] = aabbCenter + float3( -aabbExtent.x, -aabbExtent.y, aabbExtent.z ); + corners[5] = aabbCenter + float3( aabbExtent.x, -aabbExtent.y, aabbExtent.z ); + corners[6] = aabbCenter + float3( -aabbExtent.x, aabbExtent.y, aabbExtent.z ); + corners[7] = aabbCenter + float3( aabbExtent.x, aabbExtent.y, aabbExtent.z ); + + float minX = 1.0, minY = 1.0, maxX = 0.0, maxY = 0.0; + float maxDepth = 0.0; // Reversed-Z: nearest corner has the highest Z. Track max across corners. + + [unroll] + for ( int i = 0; i < 8; i++ ) + { + float4 clip = mul( float4( corners[i], 1.0 ), viewProjection ); + + // Behind camera — can't occlude, bail out as visible + if ( clip.w <= 0.0 ) + return false; + + float3 ndc = clip.xyz / clip.w; + + // NDC to UV [0,1] range (Y is flipped for texture space) + float u = ndc.x * 0.5 + 0.5; + float v = -ndc.y * 0.5 + 0.5; + + minX = min( minX, u ); + maxX = max( maxX, u ); + minY = min( minY, v ); + maxY = max( maxY, v ); + + // Track the nearest AABB corner (highest Z in reversed-Z) + maxDepth = max( maxDepth, ndc.z ); + } + + // Clamp to screen bounds + minX = saturate( minX ); + maxX = saturate( maxX ); + minY = saturate( minY ); + maxY = saturate( maxY ); + + // Degenerate or off-screen — treat as visible + if ( minX >= maxX || minY >= maxY ) + return false; + + // Compute screen-space size in pixels at mip 0 + float sizeX = ( maxX - minX ) * hiZWidth; + float sizeY = ( maxY - minY ) * hiZHeight; + float maxSize = max( sizeX, sizeY ); + + // Pick mip level: we want the mip where the AABB covers roughly 2x2 texels + float mipF = ceil( log2( max( maxSize, 1.0 ) ) ); + uint mip = min( (uint)mipF, hiZMipCount - 1 ); + + // Compute texel coordinates at this mip level + float mipWidth = max( hiZWidth / (float)( 1u << mip ), 1.0 ); + float mipHeight = max( hiZHeight / (float)( 1u << mip ), 1.0 ); + + int2 texMin = int2( minX * mipWidth, minY * mipHeight ); + int2 texMax = int2( maxX * mipWidth, maxY * mipHeight ); + + // Clamp to valid range + texMin = max( texMin, int2( 0, 0 ) ); + texMax = min( texMax, int2( (int)mipWidth - 1, (int)mipHeight - 1 ) ); + + // Sample Hi-Z: take the min depth across the covered texels. + // MIN mip chain stores farthest depth per texel (reversed-Z: smallest Z = farthest). + // We take min across texels to get the overall farthest surface — conservative. + float hiZDepth = 1.0; + for ( int y = texMin.y; y <= texMax.y; y++ ) + { + for ( int x = texMin.x; x <= texMax.x; x++ ) + { + hiZDepth = min( hiZDepth, HiZTexture.Load( int3( x, y, mip ) ) ); + } + } + + // Reversed-Z: near=1, far=0. + // maxDepth = nearest AABB corner (highest Z in reversed-Z). + // HiZ is a MAX mip chain: each texel = nearest surface (highest Z) in its region. + // We take MIN across the AABB footprint texels to find the least-occluded tile. + // AABB is occluded when its nearest corner is farther than the nearest surface + // in every footprint tile, i.e. maxDepth < min(hiZMaxValues) = hiZDepth. + return ( maxDepth < hiZDepth ); +} + +[numthreads( 64, 1, 1 )] +void CSMain( uint3 DTid : SV_DispatchThreadID ) +{ + uint idx = DTid.x; + if ( idx >= numVobs ) + return; + + VobGPUData vob = VobBuffer[idx]; + + // Draw distance cull (center-to-camera distance) + float3 toCamera = vob.aabbCenter - cameraPosition; + float distSq = dot( toCamera, toCamera ); + if ( distSq > drawDistance * drawDistance ) + return; + + // Frustum cull: 6-plane AABB test + [unroll] + for ( int p = 0; p < 6; p++ ) + { + float3 n = frustumPlanes[p].xyz; + float d = frustumPlanes[p].w; + float r = dot( abs( n ), vob.aabbExtent ); + float s = dot( n, vob.aabbCenter ) + d; + if ( s - r > 0.0 ) + return; // fully outside this plane + } + + // Hi-Z occlusion cull: test AABB against hierarchical depth buffer + if ( enableHiZ ) + { + if ( IsOccludedHiZ( vob.aabbCenter, vob.aabbExtent ) ) + return; + } + + // Compute wind strength for this vob + float windStr = 0.0; + if ( vob.aniModeStrength > 0.0 && windAdvanced ) + { + windStr = max( 0.1, vob.aniModeStrength ) * globalWindStrength; + } + + // Emit one instance per submesh of this vob + for ( uint s = 0; s < vob.submeshCount; s++ ) + { + SubmeshGPUData sm = SubmeshBuffer[vob.submeshStart + s]; + + // Atomic increment InstanceCount in the indirect args buffer. + // Each D3D11_DRAW_INDEXED_INSTANCED_INDIRECT_ARGS is 20 bytes (5 x uint32): + // [0] IndexCountPerInstance + // [4] InstanceCount <-- we increment this + // [8] StartIndexLocation + // [12] BaseVertexLocation + // [16] StartInstanceLocation + uint slot; + IndirectArgsUAV.InterlockedAdd( sm.argIndex * 20 + 4, 1, slot ); + + // Write instance data at the pre-allocated offset + atomic slot + VobInstanceInfoAtlas inst; + inst.world = vob.world; + inst.prevWorld = vob.prevWorld; + inst.color = vob.color; + inst.windStrength = windStr; + inst.canBeAffectedByPlayer = vob.canBeAffectedByPlayer; + inst.slice = sm.slice; + inst.uStart = sm.uStart; + inst.vStart = sm.vStart; + inst.uEnd = sm.uEnd; + inst.vEnd = sm.vEnd; + inst.globalSourceIndex = sm.globalSourceIndex; + + InstanceOutput[sm.instanceBaseOffset + slot] = inst; + + // Stamp feedback: one atomic per visible (vob, submesh) pair. + // Far cheaper than per-pixel atomics in the PS. + if ( feedbackFrameNumber > 0 ) + { + InterlockedMax( FeedbackUAV[uint2( sm.globalSourceIndex, 0 )], feedbackFrameNumber ); + } + } +} diff --git a/D3D11Engine/Shaders/DS_Defines.h b/D3D11Engine/Shaders/DS_Defines.h index 5f31d5fe..a162a26a 100644 --- a/D3D11Engine/Shaders/DS_Defines.h +++ b/D3D11Engine/Shaders/DS_Defines.h @@ -1,7 +1,7 @@ struct DEFERRED_PS_OUTPUT { float4 vDiffuse : SV_TARGET0; - float4 vNrm : SV_TARGET1; + float2 vNrm : SV_TARGET1; float2 vSI_SP : SV_TARGET2; float2 vVelocity : SV_TARGET3; // Screen-space velocity for motion vectors float vReactiveMask : SV_TARGET4; // Screen-space velocity for motion vectors @@ -14,20 +14,25 @@ struct DEFERRED_PS_OUTPUT_ALPHA_TO_COVERAGE uint fCoverage : SV_Coverage; }; +// Octahedral encoding: map a unit normal to [-1,1]^2 for R16G16_SNORM storage +// Reference: "A Survey of Efficient Representations for Independent Unit Vectors" (Cigolle et al. 2014) +float2 OctWrap(float2 v) +{ + return (1.0 - abs(v.yx)) * (v.xy >= 0.0 ? 1.0 : -1.0); +} - -float2 EncodeNormal(float3 n) +float2 EncodeNormalGBuffer(float3 n) { - float f = sqrt(8*n.z+8); - return n.xy / f + 0.5; + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : OctWrap(n.xy); + return n.xy; } -float3 DecodeNormal(float2 enc) + +// Decode octahedral [-1,1]^2 back to a unit normal +float3 DecodeNormalGBuffer(float2 encoded) { - float2 fenc = enc.xy*4-2; - float f = dot(fenc,fenc); - float g = sqrt(1-f/4); float3 n; - n.xy = fenc*g; - n.z = 1-f/2; - return n; + n.z = 1.0 - abs(encoded.x) - abs(encoded.y); + n.xy = n.z >= 0.0 ? encoded.xy : OctWrap(encoded.xy); + return normalize(n); } diff --git a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl index e36f2496..6bba1c70 100644 --- a/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl +++ b/D3D11Engine/Shaders/PS_AtmosphereGround.hlsl @@ -83,8 +83,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl index 2439aa09..fa887659 100644 --- a/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl +++ b/D3D11Engine/Shaders/PS_DS_AtmosphericScattering.hlsl @@ -18,7 +18,7 @@ cbuffer DS_ScreenQuadConstantBuffer : register(b0) { - matrix SQ_InvProj; // Optimize out! + float4 SQ_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 matrix SQ_InvView; matrix SQ_View; @@ -28,16 +28,15 @@ cbuffer DS_ScreenQuadConstantBuffer : register(b0) float SQ_ShadowmapSize; float4 SQ_LightColor; - matrix SQ_ShadowView[MAX_CSM_CASCADES]; - matrix SQ_ShadowProj[MAX_CSM_CASCADES]; - - matrix SQ_RainView; - matrix SQ_RainProj; + matrix SQ_ShadowViewProj[MAX_CSM_CASCADES]; float SQ_ShadowStrength; float SQ_ShadowAOStrength; float SQ_WorldAOStrength; float SQ_ShadowSoftness; + + uint SQ_FrameIndex; + float3 SQ_Pad; }; //-------------------------------------------------------------------------------------- @@ -67,14 +66,11 @@ struct PS_INPUT float3 VSPositionFromDepth(float depth, float2 vTexCoord) { - // Get NDC clip-space position - float4 vProjectedPos = float4(vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), depth, 1.0f); - - // Transform by the inverse projection matrix - float4 vPositionVS = mul(vProjectedPos, SQ_InvProj); //invViewProj == invProjection here - - // Divide by w to get the view-space position - return vPositionVS.xyz / vPositionVS.www; + // Reconstruct view-space position from depth using projection parameters + // Avoids full 4x4 inverse projection matrix multiply + float2 ndc = vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f); + float linearZ = SQ_ProjParams.z / (depth - SQ_ProjParams.w); + return float3(ndc * SQ_ProjParams.xy * linearZ, linearZ); } //-------------------------------------------------------------------------------------- @@ -125,12 +121,12 @@ static const float2 g_PoissonDisk8[8] = { float2( 0.0000f, 0.0000f) }; -// Generate per-pixel rotation for temporal stability with TAA float2x2 GetPoissonRotationMatrix(float2 screenPos) { - // Use interleaved gradient noise for temporally stable rotation - // This pattern works well with TAA as it provides good coverage over multiple frames - float angle = frac(52.9829189f * frac(dot(screenPos, float2(0.06711056f, 0.00583715f)))) * 6.283185307f; + float temporalOffset = (float)(SQ_FrameIndex % 8) * 0.6180339887f; + + float angle = frac(52.9829189f * frac(dot(screenPos, float2(0.06711056f, 0.00583715f)) + temporalOffset)) * 6.283185307f; + float s, c; sincos(angle, s, c); return float2x2(c, -s, s, c); @@ -138,7 +134,7 @@ float2x2 GetPoissonRotationMatrix(float2 screenPos) float IsInShadow(float3 wsPosition, Texture2DArray shadowmapArray, SamplerComparisonState samplerState) { - float4 vShadowSamplingPos = mul(float4(wsPosition, 1), mul(SQ_ShadowView[0], SQ_ShadowProj[0])); + float4 vShadowSamplingPos = mul(float4(wsPosition, 1), SQ_ShadowViewProj[0]); vShadowSamplingPos.xyz /= vShadowSamplingPos.www; float2 projectedTexCoords = vShadowSamplingPos.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f); @@ -147,7 +143,7 @@ float IsInShadow(float3 wsPosition, Texture2DArray shadowmapArray, SamplerCompar float IsWet(float3 wsPosition, Texture2D shadowmap, SamplerComparisonState samplerState, matrix viewProj) { - float4 vShadowSamplingPos = mul(float4(wsPosition, 1), mul(SQ_RainView, SQ_RainProj)); + float4 vShadowSamplingPos = mul(float4(wsPosition, 1), SQ_RainViewProj); vShadowSamplingPos.xyz /= vShadowSamplingPos.www; float2 projectedTexCoords = vShadowSamplingPos.xy * float2(0.5f, -0.5f) + float2(0.5f, 0.5f); @@ -161,7 +157,7 @@ float IsWet(float3 wsPosition, Texture2D shadowmap, SamplerComparisonState sampl //-------------------------------------------------------------------------------------- float4 GetCascadeUVAndBounds(float3 wsPosition, int cascadeIndex) { - matrix viewProj = mul(SQ_ShadowView[cascadeIndex], SQ_ShadowProj[cascadeIndex]); + matrix viewProj = SQ_ShadowViewProj[cascadeIndex]; float4 vShadowSamplingPos = mul(float4(wsPosition, 1), viewProj); vShadowSamplingPos.xyz /= vShadowSamplingPos.www; @@ -187,7 +183,7 @@ float4 GetCascadeUVAndBounds(float3 wsPosition, int cascadeIndex) //-------------------------------------------------------------------------------------- float SampleCascadeShadowSoft(float3 wsPosition, int cascadeIndex, float vertLighting, float bias, float2 screenPos, float softness) { - matrix viewProj = mul(SQ_ShadowView[cascadeIndex], SQ_ShadowProj[cascadeIndex]); + matrix viewProj = SQ_ShadowViewProj[cascadeIndex]; float4 vShadowSamplingPos = mul(float4(wsPosition, 1), viewProj); vShadowSamplingPos.xyz /= vShadowSamplingPos.www; @@ -426,7 +422,7 @@ void ApplyRainNormalDeformation(inout float3 vsNormal, float3 wsPosition, inout void ApplySceneWettness(float3 wsPosition, float3 vsPosition, float3 vsDir, inout float3 vsNormal, in out float3 diffuse, in out float specIntensity, in out float specPower, out float specAdd) { // Ask the rain-shadowmap if we can hit this pixel - float pixelWettnes = ComputeShadowValue(0.0f, wsPosition, TX_RainShadowmap, SS_Comp, vsPosition.z, 1.0f, mul(SQ_RainView, SQ_RainProj), 0.0001f, 2.5f) * AC_SceneWettness; + float pixelWettnes = ComputeShadowValue(0.0f, wsPosition, TX_RainShadowmap, SS_Comp, vsPosition.z, 1.0f, SQ_RainViewProj, 0.0001f, 2.5f) * AC_SceneWettness; pixelWettnes = pixelWettnes < 0.001f ? 0 : pixelWettnes; //IsWet(wsPosition, TX_RainShadowmap, SS_Comp) * AC_SceneWettness; @@ -499,15 +495,17 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); float vertLighting = diffuse.a; - // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); - - // If we dont have a normal, just return the diffuse color - if (gb2.w < 0.001f) + // Sample depth first to detect sky pixels (reversed-Z: sky has depth == 0.0) + float expDepth = TX_Depth.Sample(SS_Linear, uv).r; + if (expDepth < 0.00001f) + // Sky pixel — no geometry was written, just return the diffuse (sky) color return float4(diffuse.rgb, 1); - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Get the second GBuffer + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; + + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); @@ -515,7 +513,6 @@ float4 PSMain(PS_INPUT Input) : SV_TARGET float specPower = gb3.y; // Reconstruct VS World Position from depth - float expDepth = TX_Depth.Sample(SS_Linear, uv).r; float3 vsPosition = VSPositionFromDepth(expDepth, uv); float3 wsPosition = mul(float4(vsPosition, 1), SQ_InvView).xyz; float3 V = normalize(-vsPosition); diff --git a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl index 2fab7758..584e5087 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLight.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLight.hlsl @@ -16,7 +16,7 @@ cbuffer DS_PointLightConstantBuffer : register( b0 ) float2 PL_ViewportSize; float2 PL_Pad2; - matrix PL_InvProj; // Optimize out! + float4 PL_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 matrix PL_InvView; // Optimize out! float3 PL_LightScreenPos; @@ -43,14 +43,10 @@ struct PS_INPUT float3 VSPositionFromDepth(float depth, float2 vTexCoord) { - // Get NDC clip-space position - float4 vProjectedPos = float4(vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), depth, 1.0f); - - // Transform by the inverse projection matrix - float4 vPositionVS = mul(vProjectedPos, PL_InvProj); //invViewProj == invProjection here - - // Divide by w to get the view-space position - return vPositionVS.xyz / vPositionVS.www; + // Reconstruct view-space position from depth using projection parameters + float2 ndc = vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f); + float linearZ = PL_ProjParams.z / (depth - PL_ProjParams.w); + return float3(ndc * PL_ProjParams.xy * linearZ, linearZ); } //-------------------------------------------------------------------------------------- @@ -104,10 +100,10 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl index de85d5a3..3412b577 100644 --- a/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl +++ b/D3D11Engine/Shaders/PS_DS_PointLightDynShadow.hlsl @@ -16,7 +16,7 @@ cbuffer DS_PointLightConstantBuffer : register( b0 ) float2 PL_ViewportSize; float2 PL_Pad2; - matrix PL_InvProj; // Optimize out! + float4 PL_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 matrix PL_InvView; // Optimize out! float3 PL_LightScreenPos; @@ -86,14 +86,10 @@ struct PS_INPUT float3 VSPositionFromDepth(float depth, float2 vTexCoord) { - // Get NDC clip-space position - float4 vProjectedPos = float4(vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), depth, 1.0f); - - // Transform by the inverse projection matrix - float4 vPositionVS = mul(vProjectedPos, PL_InvProj); //invViewProj == invProjection here - - // Divide by w to get the view-space position - return vPositionVS.xyz / vPositionVS.www; + // Reconstruct view-space position from depth using projection parameters + float2 ndc = vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f); + float linearZ = PL_ProjParams.z / (depth - PL_ProjParams.w); + return float3(ndc * PL_ProjParams.xy * linearZ, linearZ); } //-------------------------------------------------------------------------------------- @@ -146,10 +142,10 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET float4 diffuse = TX_Diffuse.Sample(SS_Linear, uv); // Get the second GBuffer - float4 gb2 = TX_Nrm.Sample(SS_Linear, uv); + float2 gb2 = TX_Nrm.Sample(SS_Linear, uv).xy; - // Decode the view-space normal back - float3 normal = normalize(gb2.xyz); + // Decode the view-space normal from octahedral R16G16_SNORM + float3 normal = DecodeNormalGBuffer(gb2); // Get specular parameters float4 gb3 = TX_SI_SP.Sample(SS_Linear, uv); diff --git a/D3D11Engine/Shaders/PS_Diffuse.hlsl b/D3D11Engine/Shaders/PS_Diffuse.hlsl index 72be484f..e1962dcb 100644 --- a/D3D11Engine/Shaders/PS_Diffuse.hlsl +++ b/D3D11Engine/Shaders/PS_Diffuse.hlsl @@ -106,8 +106,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET //output.vDiffuse = float4(Input.vTexcoord2, 0, 1); //output.vDiffuse = float4(Input.vNormalVS, 1); - output.vNrm.xyz = nrm; - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(nrm); output.vSI_SP.x = MI_SpecularIntensity * fx.r; output.vSI_SP.y = MI_SpecularPower * fx.g; diff --git a/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl new file mode 100644 index 00000000..8ad84315 --- /dev/null +++ b/D3D11Engine/Shaders/PS_DiffuseAtlas.hlsl @@ -0,0 +1,113 @@ +//-------------------------------------------------------------------------------------- +// Atlas pixel shader for static vobs +// Samples from Texture2DArray using (u, v, slice) from vertex shader +//-------------------------------------------------------------------------------------- +#include +#include +#include +#include + +cbuffer MI_MaterialInfo : register( b2 ) +{ + float MI_SpecularIntensity; + float MI_SpecularPower; + float MI_NormalmapStrength; + float MI_ParallaxOcclusionStrength; + + float4 MI_Color; +} + +cbuffer DIST_Distance : register( b3 ) +{ + float DIST_DrawDistance; + float DIST_LodBias; + float2 DIST_Pad; +} + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +SamplerState SS_Linear : register( s0 ); +SamplerState SS_samMirror : register( s1 ); +Texture2DArray TX_AtlasArray : register( t0 ); +Texture2D TX_Texture1 : register( t1 ); +Texture2D TX_Texture2 : register( t2 ); +TextureCube TX_ReflectionCube : register( t4 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, slice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // (uStart, vStart, uEnd, vEnd) + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float4 vPosition : SV_POSITION; +}; + +// Calculate screen-space velocity from clip positions +float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) +{ + if (currClipPos.w == 0.0 || prevClipPos.w == 0.0) + return float2(0, 0); + + float2 currNDC = currClipPos.xy / currClipPos.w; + float2 prevNDC = prevClipPos.xy / prevClipPos.w; + + float2 currUV = float2(currNDC.x * 0.5 + 0.5, 1.0 - (currNDC.y * 0.5 + 0.5)); + float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 1.0 - (prevNDC.y * 0.5 + 0.5)); + + return prevUV - currUV; +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET +{ + DEFERRED_PS_OUTPUT output; + output.vReactiveMask = 0.0f; + + // Per-pixel atlas UV remapping: avoids frac() interpolation collapse in the VS + // (frac(1.0)=0.0 in VS causes entire [0,1] UV range to collapse to a single texel). + // SampleGrad uses gradients from the raw (pre-frac) UVs so MIP selection stays correct + // even at UV wrap boundaries where frac() would create huge derivative discontinuities. + float2 rawUV = Input.vTexcoord3D.xy; + float slice = Input.vTexcoord3D.z; + float2 atlasScale = Input.vAtlasRect.zw - Input.vAtlasRect.xy; // (uEnd-uStart, vEnd-vStart) + + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * atlasScale * biasFactor; + float2 gradY = ddy(rawUV) * atlasScale * biasFactor; + float2 atlasUV = Input.vAtlasRect.xy + frac(rawUV) * atlasScale; + + float4 color = TX_AtlasArray.SampleGrad(SS_Linear, float3(atlasUV, slice), gradX, gradY); + +#if ALPHATEST == 1 + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); + output.vReactiveMask = 0.1f; +#endif + + float3 nrm = normalize(Input.vNormalVS); + + float4 fx = 1.0f; + + output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); + + output.vNrm = EncodeNormalGBuffer(nrm); + + output.vSI_SP.x = MI_SpecularIntensity * fx.r; + output.vSI_SP.y = MI_SpecularPower * fx.g; + + output.vVelocity = CalculateVelocity(Input.vCurrClipPos, Input.vPrevClipPos); + + return output; +} diff --git a/D3D11Engine/Shaders/PS_Grass.hlsl b/D3D11Engine/Shaders/PS_Grass.hlsl index 5f12bed1..a77ebbeb 100644 --- a/D3D11Engine/Shaders/PS_Grass.hlsl +++ b/D3D11Engine/Shaders/PS_Grass.hlsl @@ -65,8 +65,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, 1); - output.vNrm.xyz = normalize(Input.vNormalVS); - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS)); output.vSI_SP.xy = 0; diff --git a/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl b/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl index 864876a6..a1cbdb48 100644 --- a/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl +++ b/D3D11Engine/Shaders/PS_PFX_GodRayMask.hlsl @@ -8,7 +8,7 @@ SamplerState SS_Linear : register( s0 ); SamplerState SS_samMirror : register( s1 ); Texture2D TX_Texture0 : register( t0 ); -Texture2D TX_Texture1 : register( t1 ); +Texture2D TX_Depth : register( t1 ); //-------------------------------------------------------------------------------------- // Input / Output structures @@ -26,9 +26,10 @@ struct PS_INPUT float4 PSMain( PS_INPUT Input ) : SV_TARGET { float4 color = TX_Texture0.Sample(SS_Linear, Input.vTexcoord); - float4 gb2 = TX_Texture1.Sample(SS_Linear, Input.vTexcoord); - if(gb2.w < 0.001f) + // Sky detection via depth buffer (reversed-Z: sky has depth == 0.0) + float depth = TX_Depth.Sample(SS_Linear, Input.vTexcoord).r; + if(depth < 0.00001f) return color; return float4(0,0,0,0); diff --git a/D3D11Engine/Shaders/PS_PFX_GodRayZoom.hlsl b/D3D11Engine/Shaders/PS_PFX_GodRayZoom.hlsl index 7c20bbb9..fb4fbe07 100644 --- a/D3D11Engine/Shaders/PS_PFX_GodRayZoom.hlsl +++ b/D3D11Engine/Shaders/PS_PFX_GodRayZoom.hlsl @@ -30,14 +30,21 @@ struct PS_INPUT float4 vPosition : SV_POSITION; }; +// Interleaved Gradient Noise for cheap, effective dithering +float InterleavedGradientNoise(float2 uv) +{ + float3 magic = float3(0.06711056f, 0.00583715f, 52.9829189f); + return frac(magic.z * frac(dot(uv, magic.xy))); +} + //-------------------------------------------------------------------------------------- // Pixel Shader //-------------------------------------------------------------------------------------- float4 PSMain( PS_INPUT Input ) : SV_TARGET { - const int NUM_SAMPLES = 30; + // Increased sample count for a smoother gradient + const int NUM_SAMPLES = 64; float2 center = GR_Center; - float zoomMax = 0.5f; float3 color = 0; float illumDecay = 1.0f; @@ -46,16 +53,27 @@ float4 PSMain( PS_INPUT Input ) : SV_TARGET float2 uv = Input.vTexcoord; - [unroll] - for(int i=0;i 1.0f || uv.y < 0.0f || uv.y > 1.0f) + { + continue; + } + + color += TX_Texture0.Sample(SS_Linear, uv).rgb * illumDecay * GR_Weight; illumDecay *= GR_Decay; } color /= NUM_SAMPLES; - - return float4(color * GR_ColorMod,1); -} - + + return float4(color * GR_ColorMod, 1.0f); +} \ No newline at end of file diff --git a/D3D11Engine/Shaders/PS_PFX_Heightfog.hlsl b/D3D11Engine/Shaders/PS_PFX_Heightfog.hlsl index fc44581b..1708061e 100644 --- a/D3D11Engine/Shaders/PS_PFX_Heightfog.hlsl +++ b/D3D11Engine/Shaders/PS_PFX_Heightfog.hlsl @@ -6,7 +6,7 @@ cbuffer PFXBuffer : register( b0 ) { - matrix HF_InvProj; + float4 HF_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 matrix HF_InvView; float3 HF_CameraPosition; float HF_FogHeight; @@ -33,14 +33,10 @@ Texture2D TX_Depth : register( t1 ); float3 VSPositionFromDepth(float depth, float2 vTexCoord) { - // Get NDC clip-space position - float4 vProjectedPos = float4(vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f), depth, 1.0f); - - // Transform by the inverse projection matrix - float4 vPositionVS = mul(vProjectedPos, HF_InvProj); //invViewProj == invProjection here - - // Divide by w to get the view-space position - return vPositionVS.xyz / vPositionVS.www; + // Reconstruct view-space position from depth using projection parameters + float2 ndc = vTexCoord * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f); + float linearZ = HF_ProjParams.z / (depth - HF_ProjParams.w); + return float3(ndc * HF_ProjParams.xy * linearZ, linearZ); } float ComputeVolumetricFog(float3 cameraToWorldPos, float3 posOriginal) diff --git a/D3D11Engine/Shaders/PS_World.hlsl b/D3D11Engine/Shaders/PS_World.hlsl index 661c3785..8311f0d8 100644 --- a/D3D11Engine/Shaders/PS_World.hlsl +++ b/D3D11Engine/Shaders/PS_World.hlsl @@ -85,8 +85,7 @@ DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET DEFERRED_PS_OUTPUT output; output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); - output.vNrm.xyz = normalize(Input.vNormalVS); - output.vNrm.w = 1.0f; + output.vNrm = EncodeNormalGBuffer(normalize(Input.vNormalVS)); output.vSI_SP.x = MI_SpecularIntensity; output.vSI_SP.y = MI_SpecularPower; diff --git a/D3D11Engine/Shaders/PS_WorldAtlas.hlsl b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl new file mode 100644 index 00000000..42aed68f --- /dev/null +++ b/D3D11Engine/Shaders/PS_WorldAtlas.hlsl @@ -0,0 +1,171 @@ +//-------------------------------------------------------------------------------------- +// World mesh pixel shader for atlas indirect draw path +// Samples diffuse, normal and FX maps from separate Texture2DArray atlases. +// Flags bits: 1 = HAS_NORMAL, 2 = HAS_FX, 4 = ALPHA_TEST +//-------------------------------------------------------------------------------------- +#include +#include +#include +#include + +cbuffer MI_MaterialInfo : register( b2 ) +{ + float MI_SpecularIntensity; + float MI_SpecularPower; + float MI_NormalmapStrength; + float MI_ParallaxOcclusionStrength; + + float4 MI_Color; +} + +cbuffer DIST_Distance : register( b3 ) +{ + float DIST_DrawDistance; + float DIST_LodBias; + float2 DIST_Pad; +} + +//-------------------------------------------------------------------------------------- +// Textures and Samplers +//-------------------------------------------------------------------------------------- +SamplerState SS_Linear : register( s0 ); +SamplerState SS_samMirror : register( s1 ); +Texture2DArray TX_AtlasDiffuse : register( t0 ); +Texture2DArray TX_AtlasNormal : register( t1 ); +Texture2DArray TX_AtlasFx : register( t2 ); +TextureCube TX_ReflectionCube : register( t4 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct PS_INPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, diffuseSlice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // diffuse atlas rect + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float3 vNormalAtlas3D : TEXCOORD8; // (rawU, rawV, normalSlice) + float4 vNormalAtlasRect : TEXCOORD9; // normal atlas rect + float3 vFxAtlas3D : TEXCOORD10; // (rawU, rawV, fxSlice) + nointerpolation uint vFlags : TEXCOORD11; + float4 vFxAtlasRect : TEXCOORD12; // fx atlas rect + float4 vPosition : SV_POSITION; +}; + +// Calculate screen-space velocity from clip positions +float2 CalculateVelocity(float4 currClipPos, float4 prevClipPos) +{ + if (currClipPos.w == 0.0 || prevClipPos.w == 0.0) + return float2(0, 0); + + float2 currNDC = currClipPos.xy / currClipPos.w; + float2 prevNDC = prevClipPos.xy / prevClipPos.w; + + float2 currUV = float2(currNDC.x * 0.5 + 0.5, 1.0 - (currNDC.y * 0.5 + 0.5)); + float2 prevUV = float2(prevNDC.x * 0.5 + 0.5, 1.0 - (prevNDC.y * 0.5 + 0.5)); + + return prevUV - currUV; +} + +// Helper: sample from an atlas Texture2DArray with correct mip via SampleGrad + frac() +// Clamps the final atlas UV inside the entry boundary, scaled by the mip level +// so that at higher mips the border grows to prevent bilinear bleed into neighbors. +float4 SampleAtlas(Texture2DArray atlas, SamplerState ss, float3 rawUVSlice, float4 atlasRect, float lodBias) +{ + float2 rawUV = rawUVSlice.xy; + float slice = rawUVSlice.z; + float2 scale = atlasRect.zw - atlasRect.xy; + // SampleGrad ignores sampler MipLODBias, so we manually apply the LOD bias + // (needed for FSR upscaling to produce sharp textures at lower resolutions) + float biasFactor = exp2(lodBias); + float2 gradX = ddx(rawUV) * scale * biasFactor; + float2 gradY = ddy(rawUV) * scale * biasFactor; + + // Query actual atlas dimensions instead of assuming a fixed size + float atlasW, atlasH, atlasSlices; + atlas.GetDimensions(atlasW, atlasH, atlasSlices); + + // Compute approximate mip level from gradients + float2 dxTex = gradX * atlasW; + float2 dyTex = gradY * atlasH; + float maxSq = max(dot(dxTex, dxTex), dot(dyTex, dyTex)); + float mipLevel = max(0.0, 0.5 * log2(maxSq)); + + // Scale the half-texel border by 2^mip so it covers the filter footprint at that level + float2 border = (0.5 / float2(atlasW, atlasH)) * exp2(ceil(mipLevel)); + + float2 atlasUV = atlasRect.xy + frac(rawUV) * scale; + atlasUV = clamp(atlasUV, atlasRect.xy + border, atlasRect.zw - border); + return atlas.SampleGrad(ss, float3(atlasUV, slice), gradX, gradY); +} + +//-------------------------------------------------------------------------------------- +// Pixel Shader +//-------------------------------------------------------------------------------------- +DEFERRED_PS_OUTPUT PSMain( PS_INPUT Input ) : SV_TARGET +{ + DEFERRED_PS_OUTPUT output; + output.vReactiveMask = 0.0f; + + // --- Diffuse --- + float4 color = SampleAtlas(TX_AtlasDiffuse, SS_Linear, Input.vTexcoord3D, Input.vAtlasRect, DIST_LodBias); + + // Alpha test + if (Input.vFlags & 4u) + { + ClipDistanceEffect(length(Input.vViewPosition), DIST_DrawDistance, color.r * 2 - 1, 500.0f); + DoAlphaTest(color.a); + output.vReactiveMask = 0.1f; + } + + // --- Normal mapping --- + float3 nrm; + if (Input.vFlags & 1u) + { + // Reconstruct the FX-atlas rect for the normal map from interpolated data. + // The normal atlas uses the same UV space as diffuse. + float4 nrmAtlasRect = Input.vNormalAtlasRect; + float2 rawUV = Input.vNormalAtlas3D.xy; + float slice = Input.vNormalAtlas3D.z; + float2 scale = nrmAtlasRect.zw - nrmAtlasRect.xy; + float biasFactor = exp2(DIST_LodBias); + float2 gradX = ddx(rawUV) * scale * biasFactor; + float2 gradY = ddy(rawUV) * scale * biasFactor; + float2 atlasUV = nrmAtlasRect.xy + frac(rawUV) * scale; + + nrm = perturb_normal_from_grad( + Input.vNormalVS, + Input.vViewPosition, + TX_AtlasNormal, + float3(atlasUV, slice), + gradX, gradY, + SS_Linear, + MI_NormalmapStrength); + } + else + { + nrm = normalize(Input.vNormalVS); + } + + // --- FX map --- + float4 fx = 1.0f; + if (Input.vFlags & 2u) + { + fx = SampleAtlas(TX_AtlasFx, SS_Linear, Input.vFxAtlas3D, Input.vFxAtlasRect, DIST_LodBias); + } + + output.vDiffuse = float4(color.rgb, Input.vDiffuse.y); + + output.vNrm = EncodeNormalGBuffer(nrm); + + output.vSI_SP.x = MI_SpecularIntensity * fx.r; + output.vSI_SP.y = MI_SpecularPower * fx.g; + + output.vVelocity = CalculateVelocity(Input.vCurrClipPos, Input.vPrevClipPos); + + return output; +} diff --git a/D3D11Engine/Shaders/Toolbox.h b/D3D11Engine/Shaders/Toolbox.h index 5a22a510..40dd8f00 100644 --- a/D3D11Engine/Shaders/Toolbox.h +++ b/D3D11Engine/Shaders/Toolbox.h @@ -40,4 +40,16 @@ float3 perturb_normal( float3 N, float3 V, Texture2D normalmap, float2 texcoord, float3x3 TBN = cotangent_frame( N, -V, texcoord ); return normalize( mul(transpose(TBN), nrmmap) ); +} + +// Atlas variant: samples from a Texture2DArray using SampleGrad for correct mip selection +float3 perturb_normal_from_grad( float3 N, float3 V, Texture2DArray normalmap, float3 uvSlice, float2 gradX, float2 gradY, SamplerState samplerState, float normalmapDepth = 1.0f) +{ + float3 nrmmap = normalmap.SampleGrad(samplerState, uvSlice, gradX, gradY).xyz * 2 - 1; + nrmmap.xy *= -1.0f; + nrmmap.xy *= normalmapDepth; + nrmmap = normalize(nrmmap); + + float3x3 TBN = cotangent_frame( N, -V, uvSlice.xy ); + return normalize( mul(transpose(TBN), nrmmap) ); } \ No newline at end of file diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl new file mode 100644 index 00000000..0b063d50 --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirect.hlsl @@ -0,0 +1,217 @@ +//-------------------------------------------------------------------------------------- +// Simple vertex shader +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +cbuffer WindParams : register(b1) +{ + float3 windDir; + float globalTime; + float minHeight; + float maxHeight; + float2 padding0; + float3 playerPos; + float padding1; +}; + +StructuredBuffer instances : register(t1); + +// Unpack DWORD color (R8G8B8A8_UNORM layout) to float4 +float4 UnpackColor(uint packed) +{ + return float4( + float(packed & 0xFF) / 255.0, + float((packed >> 8) & 0xFF) / 255.0, + float((packed >> 16) & 0xFF) / 255.0, + float((packed >> 24) & 0xFF) / 255.0 + ); +} + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // The Input Assembler automatically adds StartInstanceLocation to this fetch! + uint instanceID : INSTANCE_ID; +}; + +struct VS_OUTPUT +{ + float2 vTexcoord : TEXCOORD0; + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; // Current clip position for velocity + float4 vPrevClipPos : TEXCOORD7; // Previous clip position for velocity + + float4 vPosition : SV_POSITION; +}; + +#if SHD_WIND + +//less then trunkStiffness (%) will be absolutely stay, like tree trunk +static const float trunkStiffness = 0.12f; +static const float phaseVariation = 0.40f; +static const float windStrengMult = 16.0f; // original engine uses [0.1 -> 5] range, we use higher values in formulas +static const float PI_2 = 6.283185; // 2 * PI + +float GetInstancePhaseOffset(float4x4 objMatrix) +{ + // Random seed by object's matrix + // Combine object matrix and maxHeight for more stable randomness + float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxHeight; + return frac(sin(seed) * 43758.5453) * phaseVariation; +} + +float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength) +{ + // Calculate if vertex should be affected (1 if heightNorm >= trunkStiffness, 0 otherwise) + float shouldAffect = saturate(sign(heightNorm - trunkStiffness + 0.0001f)); + + float instancePhase = GetInstancePhaseOffset(instMatrix) * PI_2; + + // Smooth height factor with more natural falloff + float adjustedHeight = saturate((heightNorm - trunkStiffness) / (1.0 - trunkStiffness)) * shouldAffect; + float heightFactor = pow(adjustedHeight, 2.6f); + + // Main wave + float mainWave = sin(timeSec * 1.0 + heightNorm * 3.0 + instancePhase) * 0.8; + + // Second wave + float secondaryWave = cos(timeSec * 0.7 + heightNorm * 5.0 + instancePhase * 1.5) * 0.80; + + // Inertia + float inertiaEffect = sin(timeSec * 0.3 + heightNorm * 8.0) * 0.1; + + // Height amplitude + float topSmoothing = smoothstep(0.7, 0.9, adjustedHeight); + + // Combine waves + float combinedWave = (mainWave + secondaryWave * 0.5) * (1.0 - topSmoothing * 0.3) + inertiaEffect * topSmoothing; + + // Chaotical motion + float leafTurbulence = (sin(timeSec * 4.0 + vertexPos.x * 15.0) + + cos(timeSec * 3.7 + vertexPos.z * 12.0)) * 0.05 * topSmoothing; + + // Final offset + float3 windOffset = direction * windStrength * windStrengMult * + (combinedWave + leafTurbulence) * heightFactor; + + return windOffset; +} +#endif + +#if SHD_INFLUENCE + +// HERO AFFECTS CONST +static const float heroAffectRange = 100.0f; +static const float heroAffectStrength = 38.0f; + +float3 CalculatePlayerInfluence( + float3 playerPos, + float3 vertexLocalPos, + float minHeight, + float maxHeight, + float4x4 instWorldMatrix +) +{ + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((vertexLocalPos.y - minHeight) / heightRange); + + // 15% of object height check + float heightMask = smoothstep(0.14, 0.16, vertexHeightNorm); + + float3 vertexWorldPos = mul(float4(vertexLocalPos, 1.0), instWorldMatrix).xyz; + float3 toVertex = vertexWorldPos - playerPos; + + float3 displaceDirWorld = lerp(float3(0, 1, 0), normalize(toVertex), step(0.001, length(toVertex))); + + float distanceXZ = length(toVertex.xz); + float distanceFactor = exp(-(distanceXZ*distanceXZ)/(1.8*heroAffectRange*heroAffectRange)); + + float influence = distanceFactor * vertexHeightNorm * heightMask; + + float randomOffset = frac(sin(dot(vertexLocalPos.xz, float2(12.9898, 78.233))) * 43758.5453); + influence *= 0.9 + 0.1 * randomOffset; + + float3 displaceDirLocal = normalize(mul(displaceDirWorld, (float3x3)instWorldMatrix)); + return displaceDirLocal * heroAffectStrength * influence; +} +#endif + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + // Base vertex position (local) + float3 position = Input.vPosition; + VobInstanceInfo instance = instances[Input.instanceID]; + +#if SHD_INFLUENCE + + if (instance.canBeAffectedByPlayer > 0) + { + // HERO MOVING BUSHES SHADER + position += CalculatePlayerInfluence(playerPos, position, minHeight, maxHeight, instance.world); + } +#endif + +#if SHD_WIND + + if (instance.windStrenth > 0) + { + // WIND SHADER + // Protect 0 height + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((Input.vPosition.y - minHeight) / heightRange); + + // Apply wind + position += ApplyTreeWind( + Input.vPosition, + normalize(windDir), + vertexHeightNorm, + globalTime, + instance.world, + instance.windStrenth + ); + } +#endif + + // Common processing for both cases + float3 worldPos = mul(float4(position, 1.0), instance.world).xyz; + + // Calculate previous world position for motion vectors + float3 prevWorldPos = mul(float4(position, 1.0), instance.prevWorld).xyz; + + Output.vPosition = mul(float4(worldPos, 1.0), frame.M_ViewProj); + Output.vTexcoord = Input.vTex1; + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = UnpackColor(instance.color); + Output.vNormalVS = mul(Input.vNormal, mul((float3x3)instance.world, (float3x3)frame.M_View)); + Output.vViewPosition = mul(float4(worldPos, 1.0), frame.M_View); + + // Store clip positions for velocity calculation in pixel shader + // Use UNJITTERED matrices for correct velocity (jitter would cause incorrect motion) + Output.vCurrClipPos = mul(float4(worldPos, 1.0), frame.M_UnjitteredViewProj); + Output.vPrevClipPos = mul(float4(prevWorldPos, 1.0), frame.M_PrevViewProj); + + return Output; +} + diff --git a/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl new file mode 100644 index 00000000..bd4349ef --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExInstancedObjIndirectAtlas.hlsl @@ -0,0 +1,212 @@ +//-------------------------------------------------------------------------------------- +// Instanced vertex shader for atlas indirect draw path +// Uses StructuredBuffer for per-instance data including atlas UV rect +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +cbuffer WindParams : register(b1) +{ + float3 windDir; + float globalTime; + float minHeight; + float maxHeight; + float2 padding0; + float3 playerPos; + float padding1; +}; + +struct VobInstanceInfoAtlas { + float4x4 world; + float4x4 prevWorld; + uint color; + float windStrength; + float canBeAffectedByPlayer; + int slice; + float uStart; + float vStart; + float uEnd; + float vEnd; + uint globalSourceIndex; +}; + +StructuredBuffer instances : register(t1); + +// Unpack DWORD color (R8G8B8A8_UNORM layout) to float4 +float4 UnpackColor(uint packed) +{ + return float4( + float(packed & 0xFF) / 255.0, + float((packed >> 8) & 0xFF) / 255.0, + float((packed >> 16) & 0xFF) / 255.0, + float((packed >> 24) & 0xFF) / 255.0 + ); +} + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // The Input Assembler automatically adds StartInstanceLocation to this fetch! + uint instanceID : INSTANCE_REMAP_INDEX; +}; + +struct VS_OUTPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, slice) — raw UVs passed to PS for per-pixel atlas remap + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // (uStart, vStart, uEnd, vEnd) — atlas sub-rect for PS remap + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + + float4 vPosition : SV_POSITION; +}; + +#if SHD_WIND + +//less then trunkStiffness (%) will be absolutely stay, like tree trunk +static const float trunkStiffness = 0.12f; +static const float phaseVariation = 0.40f; +static const float windStrengMult = 16.0f; +static const float PI_2 = 6.283185; + +float GetInstancePhaseOffset(float4x4 objMatrix) +{ + float seed = dot(objMatrix._11_22_33, float3(12.9898, 78.233, 53.539)) + maxHeight; + return frac(sin(seed) * 43758.5453) * phaseVariation; +} + +float3 ApplyTreeWind(float3 vertexPos, float3 direction, float heightNorm, float timeSec, float4x4 instMatrix, float windStrength) +{ + float shouldAffect = saturate(sign(heightNorm - trunkStiffness + 0.0001f)); + + float instancePhase = GetInstancePhaseOffset(instMatrix) * PI_2; + + float adjustedHeight = saturate((heightNorm - trunkStiffness) / (1.0 - trunkStiffness)) * shouldAffect; + float heightFactor = pow(adjustedHeight, 2.6f); + + float mainWave = sin(timeSec * 1.0 + heightNorm * 3.0 + instancePhase) * 0.8; + float secondaryWave = cos(timeSec * 0.7 + heightNorm * 5.0 + instancePhase * 1.5) * 0.80; + float inertiaEffect = sin(timeSec * 0.3 + heightNorm * 8.0) * 0.1; + + float topSmoothing = smoothstep(0.7, 0.9, adjustedHeight); + float combinedWave = (mainWave + secondaryWave * 0.5) * (1.0 - topSmoothing * 0.3) + inertiaEffect * topSmoothing; + + float leafTurbulence = (sin(timeSec * 4.0 + vertexPos.x * 15.0) + + cos(timeSec * 3.7 + vertexPos.z * 12.0)) * 0.05 * topSmoothing; + + float3 windOffset = direction * windStrength * windStrengMult * + (combinedWave + leafTurbulence) * heightFactor; + + return windOffset; +} +#endif + +#if SHD_INFLUENCE + +static const float heroAffectRange = 100.0f; +static const float heroAffectStrength = 38.0f; + +float3 CalculatePlayerInfluence( + float3 playerPos, + float3 vertexLocalPos, + float minHeight, + float maxHeight, + float4x4 instWorldMatrix +) +{ + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((vertexLocalPos.y - minHeight) / heightRange); + + float heightMask = smoothstep(0.14, 0.16, vertexHeightNorm); + + float3 vertexWorldPos = mul(float4(vertexLocalPos, 1.0), instWorldMatrix).xyz; + float3 toVertex = vertexWorldPos - playerPos; + + float3 displaceDirWorld = lerp(float3(0, 1, 0), normalize(toVertex), step(0.001, length(toVertex))); + + float distanceXZ = length(toVertex.xz); + float distanceFactor = exp(-(distanceXZ*distanceXZ)/(1.8*heroAffectRange*heroAffectRange)); + + float influence = distanceFactor * vertexHeightNorm * heightMask; + + float randomOffset = frac(sin(dot(vertexLocalPos.xz, float2(12.9898, 78.233))) * 43758.5453); + influence *= 0.9 + 0.1 * randomOffset; + + float3 displaceDirLocal = normalize(mul(displaceDirWorld, (float3x3)instWorldMatrix)); + return displaceDirLocal * heroAffectStrength * influence; +} +#endif + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + float3 position = Input.vPosition; + VobInstanceInfoAtlas inst = instances[Input.instanceID]; + +#if SHD_INFLUENCE + + if (inst.canBeAffectedByPlayer > 0) + { + position += CalculatePlayerInfluence(playerPos, position, minHeight, maxHeight, inst.world); + } +#endif + +#if SHD_WIND + + if (inst.windStrength > 0) + { + float heightRange = max(maxHeight - minHeight, 0.001); + float vertexHeightNorm = saturate((Input.vPosition.y - minHeight) / heightRange); + + position += ApplyTreeWind( + Input.vPosition, + normalize(windDir), + vertexHeightNorm, + globalTime, + inst.world, + inst.windStrength + ); + } +#endif + + // World-space transform + float3 worldPos = mul(float4(position, 1.0), inst.world).xyz; + float3 prevWorldPos = mul(float4(position, 1.0), inst.prevWorld).xyz; + + Output.vPosition = mul(float4(worldPos, 1.0), frame.M_ViewProj); + + // Pass raw UVs + slice to PS; atlas remapping done per-pixel to avoid frac() interpolation artifacts + Output.vTexcoord3D = float3(Input.vTex1, (float)inst.slice); + Output.vAtlasRect = float4(inst.uStart, inst.vStart, inst.uEnd, inst.vEnd); + + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = UnpackColor(inst.color); + Output.vNormalVS = mul(Input.vNormal, mul((float3x3)inst.world, (float3x3)frame.M_View)); + Output.vViewPosition = mul(float4(worldPos, 1.0), frame.M_View); + + // Motion vectors (unjittered) + Output.vCurrClipPos = mul(float4(worldPos, 1.0), frame.M_UnjitteredViewProj); + Output.vPrevClipPos = mul(float4(prevWorldPos, 1.0), frame.M_PrevViewProj); + + return Output; +} diff --git a/D3D11Engine/Shaders/VS_ExPointLight.hlsl b/D3D11Engine/Shaders/VS_ExPointLight.hlsl index 76ac90b9..d8af00b6 100644 --- a/D3D11Engine/Shaders/VS_ExPointLight.hlsl +++ b/D3D11Engine/Shaders/VS_ExPointLight.hlsl @@ -19,7 +19,7 @@ cbuffer DS_PointLightConstantBuffer : register( b1 ) float PL_Pad1; float3 PL_PositionView; - matrix PL_InvProj; // Optimize out! + float4 PL_ProjParams; // x = 1/P._11, y = 1/P._22, z = P._43, w = P._33 }; diff --git a/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl b/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl new file mode 100644 index 00000000..3fd6df7f --- /dev/null +++ b/D3D11Engine/Shaders/VS_ExWorldAtlas.hlsl @@ -0,0 +1,98 @@ +//-------------------------------------------------------------------------------------- +// World mesh vertex shader for atlas indirect draw path +// Reads per-submesh atlas descriptors from a StructuredBuffer. +// The submesh index comes from the instance ID buffer + StartInstanceLocation. +//-------------------------------------------------------------------------------------- + +#include "Globals_VS_ExConstants.h" + +cbuffer Matrices_PerFrame : register( b0 ) +{ + VS_ExConstantBuffer_PerFrame frame; +}; + +struct WorldMeshSubmeshGPUData +{ + int diffuseSlice; + float dUStart, dVStart, dUEnd, dVEnd; + int normalSlice; + float nUStart, nVStart, nUEnd, nVEnd; + int fxSlice; + float fUStart, fVStart, fUEnd, fVEnd; + uint flags; +}; + +StructuredBuffer submeshData : register( t1 ); + +//-------------------------------------------------------------------------------------- +// Input / Output structures +//-------------------------------------------------------------------------------------- +struct VS_INPUT +{ + float3 vPosition : POSITION; + float3 vNormal : NORMAL; + float2 vTex1 : TEXCOORD0; + float2 vTex2 : TEXCOORD1; + float4 vDiffuse : DIFFUSE; + + // StartInstanceLocation in the MDI args offsets this so it equals the submesh index + uint submeshIdx : INSTANCE_REMAP_INDEX; +}; + +struct VS_OUTPUT +{ + float3 vTexcoord3D : TEXCOORD0; // (rawU, rawV, diffuseSlice) + float2 vTexcoord2 : TEXCOORD1; + float4 vDiffuse : TEXCOORD2; + float4 vAtlasRect : TEXCOORD3; // diffuse (uStart, vStart, uEnd, vEnd) + float3 vNormalVS : TEXCOORD4; + float3 vViewPosition : TEXCOORD5; + float4 vCurrClipPos : TEXCOORD6; + float4 vPrevClipPos : TEXCOORD7; + float3 vNormalAtlas3D : TEXCOORD8; // (rawU, rawV, normalSlice) + float4 vNormalAtlasRect : TEXCOORD9; // normal (uStart, vStart, uEnd, vEnd) + float3 vFxAtlas3D : TEXCOORD10; // (rawU, rawV, fxSlice) + nointerpolation uint vFlags : TEXCOORD11; // material flags + float4 vFxAtlasRect : TEXCOORD12; // fx (uStart, vStart, uEnd, vEnd) + float4 vPosition : SV_POSITION; +}; + +//-------------------------------------------------------------------------------------- +// Vertex Shader +//-------------------------------------------------------------------------------------- +VS_OUTPUT VSMain( VS_INPUT Input ) +{ + VS_OUTPUT Output; + + WorldMeshSubmeshGPUData sm = submeshData[Input.submeshIdx]; + + // World mesh vertices are already in world space (M_World = Identity) + float3 positionWorld = Input.vPosition; + + Output.vPosition = mul( float4(positionWorld, 1), frame.M_ViewProj ); + + // Pass raw UVs + slice — PS does frac() and atlas remap per-pixel + Output.vTexcoord3D = float3( Input.vTex1, (float)sm.diffuseSlice ); + Output.vAtlasRect = float4( sm.dUStart, sm.dVStart, sm.dUEnd, sm.dVEnd ); + + Output.vTexcoord2 = Input.vTex2; + Output.vDiffuse = Input.vDiffuse; + Output.vNormalVS = mul( Input.vNormal, (float3x3)frame.M_View ); + Output.vViewPosition = mul( float4(positionWorld, 1), frame.M_View ).xyz; + + // Normal map atlas coords + Output.vNormalAtlas3D = float3( Input.vTex1, (float)sm.normalSlice ); + Output.vNormalAtlasRect = float4( sm.nUStart, sm.nVStart, sm.nUEnd, sm.nVEnd ); + + // FX map atlas coords + Output.vFxAtlas3D = float3( Input.vTex1, (float)sm.fxSlice ); + Output.vFxAtlasRect = float4( sm.fUStart, sm.fVStart, sm.fUEnd, sm.fVEnd ); + + Output.vFlags = sm.flags; + + // Motion vectors — static world mesh, so prev == current + Output.vCurrClipPos = mul( float4(positionWorld, 1.0), frame.M_UnjitteredViewProj ); + Output.vPrevClipPos = mul( float4(positionWorld, 1.0), frame.M_PrevViewProj ); + + return Output; +} diff --git a/D3D11Engine/Toolbox.cpp b/D3D11Engine/Toolbox.cpp index 0b0fca05..dc504b17 100644 --- a/D3D11Engine/Toolbox.cpp +++ b/D3D11Engine/Toolbox.cpp @@ -150,6 +150,25 @@ namespace Toolbox { return _mm_cvtss_f32( _mm_rcp_ss( _mm_rsqrt_ss( _mm_set_ss( dx * dx + dz * dz ) ) ) ); } + float ComputePointAABBDistanceSq( const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max ) { + float dx = std::max( std::max( min.x - p.x, 0.0f ), p.x - max.x ); + float dy = std::max( std::max( min.y - p.y, 0.0f ), p.y - max.y ); + float dz = std::max( std::max( min.z - p.z, 0.0f ), p.z - max.z ); + + return (dx * dx) + (dy * dy) + (dz * dz); + } + + float ComputePointAABBDistanceSq( const XMFLOAT3& p, const DirectX::BoundingBox& box ) { + // 1. Get absolute distance from point to the center of the box + // 2. Subtract the box extents to get the distance to the edge + // 3. Clamp to 0 if the point is inside the box bounds along that axis + float dx = std::max( 0.0f, std::abs( p.x - box.Center.x ) - box.Extents.x ); + float dy = std::max( 0.0f, std::abs( p.y - box.Center.y ) - box.Extents.y ); + float dz = std::max( 0.0f, std::abs( p.z - box.Center.z ) - box.Extents.z ); + + return (dx * dx) + (dy * dy) + (dz * dz); + } + /** Computes the Normal of a triangle */ FXMVECTOR ComputeNormal( const XMFLOAT3& v0, const XMFLOAT3& v1, const XMFLOAT3& v2 ) { FXMVECTOR Normal = XMVector3Normalize( XMVector3Cross( (XMLoadFloat3( &v1 ) - XMLoadFloat3( &v0 )), (XMLoadFloat3( &v2 ) - XMLoadFloat3( &v0 )) ) ); diff --git a/D3D11Engine/Toolbox.h b/D3D11Engine/Toolbox.h index 371dc8b7..2b06c1c6 100644 --- a/D3D11Engine/Toolbox.h +++ b/D3D11Engine/Toolbox.h @@ -6,6 +6,7 @@ #include #include "Types.h" +#include /** Misc. tools */ enum zTCam_ClipType; @@ -171,6 +172,10 @@ namespace Toolbox { /** Computes the distance of a point to an AABB */ float ComputePointAABBDistance( const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max ); + float ComputePointAABBDistanceSq(const XMFLOAT3& p, const XMFLOAT3& min, const XMFLOAT3& max); + + float ComputePointAABBDistanceSq(const XMFLOAT3& p, const DirectX::BoundingBox& box); + /** Returns whether the given file exists */ bool FileExists( const std::string& file ); diff --git a/D3D11Engine/VobCulling.cpp b/D3D11Engine/VobCulling.cpp new file mode 100644 index 00000000..db0b0e52 --- /dev/null +++ b/D3D11Engine/VobCulling.cpp @@ -0,0 +1,151 @@ +#include "VobCulling.h" +#include +#include "ConstantBufferStructs.h" +#include "WorldObjects.h" +#include "zCModel.h" +#include "zCMaterial.h" +#include + +using namespace DirectX; + +void VobCulling::CullAndGatherStaticVOBs_AVX2( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ) +{ + outRenderQueue.clear(); + // Pre-reserve to avoid reallocations + outRenderQueue.reserve( instances.size() * 2 ); + + const __m256 abs_mask = _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); + const __m256 zero = _mm256_setzero_ps(); + + struct alignas(32) SIMDPlane { + __m256 nx, ny, nz, d; + __m256 abs_nx, abs_ny, abs_nz; + }; + + SIMDPlane splanes[6]; + for ( int p = 0; p < 6; ++p ) { + splanes[p].nx = _mm256_set1_ps( planes[p].x ); + splanes[p].ny = _mm256_set1_ps( planes[p].y ); + splanes[p].nz = _mm256_set1_ps( planes[p].z ); + splanes[p].d = _mm256_set1_ps( planes[p].w ); + + splanes[p].abs_nx = _mm256_and_ps( splanes[p].nx, abs_mask ); + splanes[p].abs_ny = _mm256_and_ps( splanes[p].ny, abs_mask ); + splanes[p].abs_nz = _mm256_and_ps( splanes[p].nz, abs_mask ); + } + + for ( size_t i = 0; i < batches.size(); ++i ) { + const AABB_SoA_Batch8& batch = batches[i]; + + __m256 cx = _mm256_load_ps( batch.cx ); + __m256 cy = _mm256_load_ps( batch.cy ); + __m256 cz = _mm256_load_ps( batch.cz ); + __m256 ex = _mm256_load_ps( batch.ex ); + __m256 ey = _mm256_load_ps( batch.ey ); + __m256 ez = _mm256_load_ps( batch.ez ); + + __m256 v_mask = _mm256_castsi256_ps( _mm256_set1_epi32( 0xFFFFFFFF ) ); + + for ( int p = 0; p < 6; ++p ) { + __m256 nx = splanes[p].nx; + __m256 ny = splanes[p].ny; + __m256 nz = splanes[p].nz; + __m256 d = splanes[p].d; + + __m256 abs_nx = splanes[p].abs_nx; + __m256 abs_ny = splanes[p].abs_ny; + __m256 abs_nz = splanes[p].abs_nz; + + __m256 r = _mm256_mul_ps( ex, abs_nx ); + r = _mm256_fmadd_ps( ey, abs_ny, r ); + r = _mm256_fmadd_ps( ez, abs_nz, r ); + + __m256 dist = _mm256_fmadd_ps( cx, nx, d ); + dist = _mm256_fmadd_ps( cy, ny, dist ); + dist = _mm256_fmadd_ps( cz, nz, dist ); + + __m256 outside = _mm256_cmp_ps( _mm256_sub_ps( dist, r ), zero, _CMP_GT_OQ ); + v_mask = _mm256_andnot_ps( outside, v_mask ); + } + + uint32_t mask = _mm256_movemask_ps( v_mask ); + + // INSTANT SKIP: If mask is 0, all 8 items are outside the frustum. + if ( mask == 0 ) continue; + + // BIT SCAN: Extract visible items efficiently + while ( mask != 0 ) { + // Find the index of the lowest set bit (0 to 7) + uint32_t bitIndex = _tzcnt_u32( mask ); + + // Calculate actual instance index + uint32_t instanceIdx = (i * 8) + bitIndex; + + // Push to dense render queue + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + + // Clear the lowest set bit so we can find the next one + // e.g., 010100 -> 010000 + mask &= (mask - 1); + } + } +} + +void VobCulling::CullAndGatherStaticVOBs_DirectXMath( + const std::vector& batches, + const std::vector& instances, + const XMFLOAT4 planes[6], + std::vector& outRenderQueue ) +{ + outRenderQueue.clear(); + // Pre-reserve to avoid reallocations + outRenderQueue.reserve( instances.size() * 2 ); + + for ( size_t i = 0; i < batches.size(); ++i ) { + const AABB_SoA_Batch8& batch = batches[i]; + + // Process each of the 8 AABBs in this batch + for ( int j = 0; j < 8; ++j ) { + XMFLOAT3 center( batch.cx[j], batch.cy[j], batch.cz[j] ); + XMFLOAT3 extents( batch.ex[j], batch.ey[j], batch.ez[j] ); + + bool visible = true; + + // Test against all 6 frustum planes + for ( int p = 0; p < 6; ++p ) { + // Get absolute values of plane normal components + float abs_nx = std::abs( planes[p].x ); + float abs_ny = std::abs( planes[p].y ); + float abs_nz = std::abs( planes[p].z ); + + // Calculate the radius (projected extent along plane normal) + float r = extents.x * abs_nx + extents.y * abs_ny + extents.z * abs_nz; + + // Calculate distance from center to plane + float dist = center.x * planes[p].x + center.y * planes[p].y + center.z * planes[p].z + planes[p].w; + + // If dist - r > 0, box is completely outside this plane + if ( dist - r > 0.0f ) { + visible = false; + break; + } + } + + if ( visible ) { + uint32_t instanceIdx = static_cast(i * 8 + j); + + outRenderQueue.push_back( { + instanceIdx, + reinterpret_cast(instances[instanceIdx]->VisualInfo), + } ); + } + } + } +} diff --git a/D3D11Engine/VobCulling.h b/D3D11Engine/VobCulling.h new file mode 100644 index 00000000..4eb9bae9 --- /dev/null +++ b/D3D11Engine/VobCulling.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include +#include + +// 1. The SoAoS Bounding Data (Aligned for AVX) +struct alignas(32) AABB_SoA_Batch8 { + float cx[8], cy[8], cz[8]; + float ex[8], ey[8], ez[8]; +}; + +// 3. The Dense Render Item (Output of the culler) +struct StaticVobRenderItem { + uint32_t instanceIndex; // index into an VobInfo* + struct MeshVisualInfo* mvi; +}; + +struct VobInfo; + +class VobCulling +{ +public: + static void CullAndGatherStaticVOBs( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ) { +#ifdef __AVX2__ + CullAndGatherStaticVOBs_AVX2( batches, instances, planes, outRenderQueue ); +#else + CullAndGatherStaticVOBs_DirectXMath( batches, instances, planes, outRenderQueue ); +#endif + } + +private: + static void CullAndGatherStaticVOBs_AVX2( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ); + + // DirectXMath-based alternative for debugging/verification + static void CullAndGatherStaticVOBs_DirectXMath( + const std::vector& batches, + const std::vector& instances, + const DirectX::XMFLOAT4 planes[6], + std::vector& outRenderQueue ); +}; + diff --git a/D3D11Engine/WorldConverter.cpp b/D3D11Engine/WorldConverter.cpp index afcfed87..2596ed03 100644 --- a/D3D11Engine/WorldConverter.cpp +++ b/D3D11Engine/WorldConverter.cpp @@ -93,8 +93,8 @@ void WorldConverter::WorldMeshCollectPolyRange( const float3& position, float ra std::vector vertices; IndexVertices( &it->second->Vertices[0], it->second->Vertices.size(), vertices, indices ); - it->second->Vertices = vertices; - it->second->Indices = indices; + it->second->Vertices = std::move( vertices ); + it->second->Indices = std::move( indices ); // Create the buffers Engine::GraphicsEngine->CreateVertexBuffer( &it->second->MeshVertexBuffer ); @@ -251,8 +251,8 @@ XRESULT WorldConverter::LoadWorldMeshFromFile( const std::string& file, std::map std::vector indices; IndexVertices( &it.second->Vertices[0], it.second->Vertices.size(), indexedVertices, indices ); - it.second->Vertices = indexedVertices; - it.second->Indices = indices; + it.second->Vertices = std::move( indexedVertices ); + it.second->Indices = std::move( indices ); // Create the buffers Engine::GraphicsEngine->CreateVertexBuffer( &it.second->MeshVertexBuffer ); @@ -526,8 +526,8 @@ HRESULT WorldConverter::ConvertWorldMesh( zCPolygon** polys, unsigned int numPol std::vector indices; IndexVertices( &it.second->Vertices[0], it.second->Vertices.size(), indexedVertices, indices ); - it.second->Vertices = indexedVertices; - it.second->Indices = indices; + it.second->Vertices = std::move( indexedVertices ); + it.second->Indices = std::move( indices ); // Create the buffers Engine::GraphicsEngine->CreateVertexBuffer( &it.second->MeshVertexBuffer ); @@ -668,7 +668,7 @@ void WorldConverter::GenerateFullSectionMesh( WorldMeshSectionInfo& section ) { std::vector indices; section.FullStaticMesh = new MeshInfo; - section.FullStaticMesh->Vertices = vx; + section.FullStaticMesh->Vertices = std::move( vx ); // Create the buffers Engine::GraphicsEngine->CreateVertexBuffer( §ion.FullStaticMesh->MeshVertexBuffer ); @@ -1257,8 +1257,8 @@ void WorldConverter::Extract3DSMeshFromVisual2( zCProgMeshProto* visual, MeshVis continue; } - mi->Vertices = vertices; - mi->Indices = indices; + mi->Vertices = std::move( vertices ); + mi->Indices = std::move( indices ); mi->MeshIndex = i; // Create the buffers diff --git a/D3D11Engine/packages.config b/D3D11Engine/packages.config index 44327afd..6c5d5b62 100644 --- a/D3D11Engine/packages.config +++ b/D3D11Engine/packages.config @@ -2,6 +2,7 @@ + \ No newline at end of file diff --git a/D3D11Engine/zCProgMeshProto.h b/D3D11Engine/zCProgMeshProto.h index 70d58204..59b9fccc 100644 --- a/D3D11Engine/zCProgMeshProto.h +++ b/D3D11Engine/zCProgMeshProto.h @@ -77,6 +77,7 @@ class zCProgMeshProto : public zCVisual { /** Constructs a readable mesh from the data given in the progmesh */ void ConstructVertexBuffer( std::vector* vertices ) { zCArrayAdapt* pl = GetPositionList(); + vertices->reserve( pl->NumInArray ); for ( int i = 0; i < pl->NumInArray; i++ ) { ExVertexStruct vx; diff --git a/D3D11Engine/zCVisual.h b/D3D11Engine/zCVisual.h index b03fced0..c8e43f1f 100644 --- a/D3D11Engine/zCVisual.h +++ b/D3D11Engine/zCVisual.h @@ -55,7 +55,7 @@ class zCVisual { } for ( unsigned int i = 0; i < extv.size(); i++ ) { - std::string ext = extv[i]; + const std::string& ext = extv[i]; if ( ext == ".3DS" ) return VT_PROGMESHPROTO;