From 4837425e1f39155507b2051b1575b5e345fff9cf Mon Sep 17 00:00:00 2001 From: Junkang <754089122@qq.com> Date: Tue, 9 Jul 2024 23:22:23 +0800 Subject: [PATCH 1/2] feat: rwStorage support --- .../Include/RHI/DirectX12/Common.h | 9 +--- Engine/Source/RHI-DirectX12/Src/BindGroup.cpp | 7 ++- Engine/Source/RHI-DirectX12/Src/Buffer.cpp | 2 +- .../Source/RHI-DirectX12/Src/BufferView.cpp | 34 +++++++------ .../RHI-Vulkan/Include/RHI/Vulkan/Common.h | 2 + Engine/Source/RHI-Vulkan/Src/BindGroup.cpp | 12 +++-- .../Source/RHI-Vulkan/Src/CommandRecorder.cpp | 10 ++-- Engine/Source/RHI/Include/RHI/BufferView.h | 6 ++- Engine/Source/RHI/Include/RHI/Common.h | 16 +++--- Engine/Source/RHI/Src/BufferView.cpp | 9 +++- Engine/Source/Render/Src/ShaderCompiler.cpp | 4 +- Sample/RHI-ParallelCompute/Compute.hlsl | 17 ++++--- .../RHI-ParallelCompute/ParallelCompute.cpp | 51 +++++++++++-------- 13 files changed, 102 insertions(+), 77 deletions(-) diff --git a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h index eb35851b4..417a297f6 100644 --- a/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h +++ b/Engine/Source/RHI-DirectX12/Include/RHI/DirectX12/Common.h @@ -239,12 +239,6 @@ namespace RHI::DirectX12 { ECIMPL_ITEM(IndexFormat::uint32, DXGI_FORMAT_R32_UINT) ECIMPL_END(DXGI_FORMAT) - ECIMPL_BEGIN(StorageFormat, DXGI_FORMAT) - ECIMPL_ITEM(StorageFormat::float32, DXGI_FORMAT_R32_FLOAT) - ECIMPL_ITEM(StorageFormat::uint32, DXGI_FORMAT_R32_UINT) - ECIMPL_ITEM(StorageFormat::sint32, DXGI_FORMAT_R32_SINT) - ECIMPL_END(DXGI_FORMAT) - ECIMPL_BEGIN(VertexStepMode, D3D12_INPUT_CLASSIFICATION) ECIMPL_ITEM(VertexStepMode::perVertex, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA) ECIMPL_ITEM(VertexStepMode::perInstance, D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA) @@ -256,7 +250,8 @@ namespace RHI::DirectX12 { ECIMPL_ITEM(BufferState::copySrc, D3D12_RESOURCE_STATE_COPY_SOURCE) ECIMPL_ITEM(BufferState::copyDst, D3D12_RESOURCE_STATE_COPY_DEST) ECIMPL_ITEM(BufferState::shaderReadOnly, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE) - ECIMPL_ITEM(BufferState::storage, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) + ECIMPL_ITEM(BufferState::storage, D3D12_RESOURCE_STATE_COMMON) + ECIMPL_ITEM(BufferState::rwStorage, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) ECIMPL_END(D3D12_RESOURCE_STATES) ECIMPL_BEGIN(TextureDimension, D3D12_RESOURCE_DIMENSION) diff --git a/Engine/Source/RHI-DirectX12/Src/BindGroup.cpp b/Engine/Source/RHI-DirectX12/Src/BindGroup.cpp index 3ba589ab4..8985bcdcf 100644 --- a/Engine/Source/RHI-DirectX12/Src/BindGroup.cpp +++ b/Engine/Source/RHI-DirectX12/Src/BindGroup.cpp @@ -12,11 +12,14 @@ namespace RHI::DirectX12 { static CD3DX12_CPU_DESCRIPTOR_HANDLE GetDescriptorCpuHandle(const BindGroupEntry& entry) { - if (entry.binding.type == BindingType::uniformBuffer || entry.binding.type == BindingType::storageBuffer) { + if (entry.binding.type == BindingType::uniformBuffer || + entry.binding.type == BindingType::storageBuffer || + entry.binding.type == BindingType::rwStorageBuffer) { const auto* bufferView = static_cast(std::get(entry.entity)); return bufferView->GetNativeCpuDescriptorHandle(); } - if (entry.binding.type == BindingType::texture || entry.binding.type == BindingType::storageTexture) { + if (entry.binding.type == BindingType::texture || + entry.binding.type == BindingType::storageTexture) { const auto* textureView = static_cast(std::get(entry.entity)); return textureView->GetNativeCpuDescriptorHandle(); } diff --git a/Engine/Source/RHI-DirectX12/Src/Buffer.cpp b/Engine/Source/RHI-DirectX12/Src/Buffer.cpp index 3856850b8..186891480 100644 --- a/Engine/Source/RHI-DirectX12/Src/Buffer.cpp +++ b/Engine/Source/RHI-DirectX12/Src/Buffer.cpp @@ -33,7 +33,7 @@ namespace RHI::DirectX12 { static D3D12_RESOURCE_FLAGS GetDX12ResourceFlag(const BufferUsageFlags flag) { static std::unordered_map rules = { - { BufferUsageBits::storage, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS }, + { BufferUsageBits::rwStorage, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS }, }; static D3D12_RESOURCE_FLAGS fallback = D3D12_RESOURCE_FLAG_NONE; diff --git a/Engine/Source/RHI-DirectX12/Src/BufferView.cpp b/Engine/Source/RHI-DirectX12/Src/BufferView.cpp index 591e2a80f..1b3569b13 100644 --- a/Engine/Source/RHI-DirectX12/Src/BufferView.cpp +++ b/Engine/Source/RHI-DirectX12/Src/BufferView.cpp @@ -9,19 +9,6 @@ #include #include -namespace RHI::DirectX12 { - static uint32_t GetStrideOfStorageBuffer(StorageFormat format) - { - if (format == StorageFormat::float32 || - format == StorageFormat::sint32 || - format == StorageFormat::uint32) { - return 4; - } - - return -1; - } -} - namespace RHI::DirectX12 { DX12BufferView::DX12BufferView(DX12Buffer& inBuffer, const BufferViewCreateInfo& inCreateInfo) : BufferView(inCreateInfo), buffer(inBuffer) @@ -63,12 +50,27 @@ namespace RHI::DirectX12 { Assert((bufferUsages & BufferUsageBits::storage) != 0); auto storageViewInfo = std::get(inCreateInfo.extend); - // TODO: check the uav typed load + D3D12_SHADER_RESOURCE_VIEW_DESC desc {}; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + desc.Buffer.FirstElement = inCreateInfo.offset; + desc.Buffer.NumElements = inCreateInfo.size / storageViewInfo.stride; + desc.Buffer.StructureByteStride = storageViewInfo.stride; + + nativeView = buffer.GetDevice().AllocateCbvSrvUavDescriptor(); + buffer.GetDevice().GetNative()->CreateShaderResourceView(buffer.GetNative(), &desc, std::get>(nativeView)->GetCpuHandle()); + } else if (inCreateInfo.type == BufferViewType::rwStorageBinding) { + Assert((bufferUsages & BufferUsageBits::rwStorage) != 0); + auto storageViewInfo = std::get(inCreateInfo.extend); + + // TODO: check the uav typed load when it is necessary D3D12_UNORDERED_ACCESS_VIEW_DESC desc {}; - desc.Format = EnumCast(storageViewInfo.format); + desc.Format = DXGI_FORMAT_UNKNOWN; desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; desc.Buffer.FirstElement = inCreateInfo.offset; - desc.Buffer.NumElements = inCreateInfo.size / GetStrideOfStorageBuffer(storageViewInfo.format); + desc.Buffer.NumElements = inCreateInfo.size / storageViewInfo.stride; + desc.Buffer.StructureByteStride = storageViewInfo.stride; nativeView = buffer.GetDevice().AllocateCbvSrvUavDescriptor(); buffer.GetDevice().GetNative()->CreateUnorderedAccessView(buffer.GetNative(), nullptr, &desc, std::get>(nativeView)->GetCpuHandle()); diff --git a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Common.h b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Common.h index d7e822c8e..20316fcb1 100644 --- a/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Common.h +++ b/Engine/Source/RHI-Vulkan/Include/RHI/Vulkan/Common.h @@ -189,6 +189,7 @@ namespace RHI::Vulkan { ECIMPL_BEGIN(BindingType, VkDescriptorType) ECIMPL_ITEM(BindingType::uniformBuffer, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) ECIMPL_ITEM(BindingType::storageBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) + ECIMPL_ITEM(BindingType::rwStorageBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) ECIMPL_ITEM(BindingType::sampler, VK_DESCRIPTOR_TYPE_SAMPLER) ECIMPL_ITEM(BindingType::texture, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ECIMPL_ITEM(BindingType::storageTexture, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) @@ -286,6 +287,7 @@ namespace RHI::Vulkan { FCIMPL_ITEM(BufferUsageBits::vertex, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT) FCIMPL_ITEM(BufferUsageBits::uniform, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) FCIMPL_ITEM(BufferUsageBits::storage, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) + FCIMPL_ITEM(BufferUsageBits::rwStorage, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) FCIMPL_ITEM(BufferUsageBits::indirect, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) FCIMPL_END(VkBufferUsageFlagBits) diff --git a/Engine/Source/RHI-Vulkan/Src/BindGroup.cpp b/Engine/Source/RHI-Vulkan/Src/BindGroup.cpp index e58d3840a..a0d24505f 100644 --- a/Engine/Source/RHI-Vulkan/Src/BindGroup.cpp +++ b/Engine/Source/RHI-Vulkan/Src/BindGroup.cpp @@ -81,9 +81,13 @@ namespace RHI::Vulkan { int bufferInfosNum = 0; for (int i = 0; i < entryCount; i++) { const auto& entry = inCreateInfo.entries[i]; - if (entry.binding.type == BindingType::uniformBuffer || entry.binding.type == BindingType::storageBuffer) { + if (entry.binding.type == BindingType::uniformBuffer + || entry.binding.type == BindingType::storageBuffer + || entry.binding.type == BindingType::rwStorageBuffer) { bufferInfosNum++; - } else if (entry.binding.type == BindingType::sampler || entry.binding.type == BindingType::texture ||entry.binding.type == BindingType::storageTexture) { + } else if (entry.binding.type == BindingType::sampler + || entry.binding.type == BindingType::texture + ||entry.binding.type == BindingType::storageTexture) { imageInfosNum++; } } @@ -99,7 +103,9 @@ namespace RHI::Vulkan { descriptorWrites[i].descriptorCount = 1; descriptorWrites[i].descriptorType = EnumCast(entry.binding.type); - if (entry.binding.type == BindingType::uniformBuffer || entry.binding.type == BindingType::storageBuffer) { + if (entry.binding.type == BindingType::uniformBuffer + || entry.binding.type == BindingType::storageBuffer + || entry.binding.type == BindingType::rwStorageBuffer) { auto* bufferView = static_cast(std::get(entry.entity)); bufferInfos.emplace_back(); diff --git a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp index d154889e7..c4ccb522e 100644 --- a/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp +++ b/Engine/Source/RHI-Vulkan/Src/CommandRecorder.cpp @@ -26,7 +26,8 @@ namespace RHI::Vulkan { { BufferState::copySrc, VK_ACCESS_TRANSFER_READ_BIT }, { BufferState::copyDst, VK_ACCESS_TRANSFER_WRITE_BIT }, { BufferState::shaderReadOnly, VK_ACCESS_SHADER_READ_BIT }, - { BufferState::storage, VK_ACCESS_SHADER_WRITE_BIT } + { BufferState::storage, VK_ACCESS_SHADER_READ_BIT }, + { BufferState::rwStorage, VK_ACCESS_SHADER_WRITE_BIT } }; return map.at(inState); } @@ -39,7 +40,9 @@ namespace RHI::Vulkan { { BufferState::copySrc, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::copyDst, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::shaderReadOnly, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, - { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } + { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, + { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } + }; return map.at(inState); } @@ -52,7 +55,8 @@ namespace RHI::Vulkan { { BufferState::copySrc, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::copyDst, VK_PIPELINE_STAGE_TRANSFER_BIT }, { BufferState::shaderReadOnly, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, - { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } + { BufferState::storage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT }, + { BufferState::rwStorage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT } }; return map.at(inState); } diff --git a/Engine/Source/RHI/Include/RHI/BufferView.h b/Engine/Source/RHI/Include/RHI/BufferView.h index f0335a0e2..c64a7df13 100644 --- a/Engine/Source/RHI/Include/RHI/BufferView.h +++ b/Engine/Source/RHI/Include/RHI/BufferView.h @@ -25,7 +25,9 @@ namespace RHI { }; struct StorageBufferViewInfo { - StorageFormat format; + uint32_t stride; + + explicit StorageBufferViewInfo(uint32_t inStride = 0); }; struct BufferViewCreateInfo { @@ -45,7 +47,7 @@ namespace RHI { BufferViewCreateInfo& SetSize(uint32_t inSize); BufferViewCreateInfo& SetExtendVertex(uint32_t inStride); BufferViewCreateInfo& SetExtendIndex(IndexFormat inFormat); - BufferViewCreateInfo& SetExtendStorage(StorageFormat inFormat); + BufferViewCreateInfo& SetExtendStorage(uint32_t inStride); size_t Hash() const; }; diff --git a/Engine/Source/RHI/Include/RHI/Common.h b/Engine/Source/RHI/Include/RHI/Common.h index e638fdc36..0a530d19e 100644 --- a/Engine/Source/RHI/Include/RHI/Common.h +++ b/Engine/Source/RHI/Include/RHI/Common.h @@ -192,6 +192,7 @@ namespace RHI { index, uniformBinding, storageBinding, + rwStorageBinding, max }; @@ -231,6 +232,7 @@ namespace RHI { enum class BindingType : EnumType { uniformBuffer, storageBuffer, + rwStorageBuffer, sampler, texture, storageTexture, @@ -290,14 +292,6 @@ namespace RHI { max }; - // TODO: Support more format - enum class StorageFormat: EnumType { - float32, - uint32, - sint32, - max - }; - enum class FrontFace : EnumType { ccw, cw, @@ -386,6 +380,7 @@ namespace RHI { copyDst, shaderReadOnly, storage, + rwStorage, max }; @@ -492,8 +487,9 @@ namespace RHI { vertex = 0x20, uniform = 0x40, storage = 0x80, - indirect = 0x100, - queryResolve = 0x200, + rwStorage = 0x100, + indirect = 0x200, + queryResolve = 0x400, max }; using BufferUsageFlags = Flags; diff --git a/Engine/Source/RHI/Src/BufferView.cpp b/Engine/Source/RHI/Src/BufferView.cpp index caef92fba..0be38cddb 100644 --- a/Engine/Source/RHI/Src/BufferView.cpp +++ b/Engine/Source/RHI/Src/BufferView.cpp @@ -15,6 +15,11 @@ namespace RHI { { } + StorageBufferViewInfo::StorageBufferViewInfo(uint32_t inStride) + : stride(inStride) + { + } + BufferViewCreateInfo::BufferViewCreateInfo( const BufferViewType inType, const uint32_t inSize, @@ -57,9 +62,9 @@ namespace RHI { return *this; } - BufferViewCreateInfo& BufferViewCreateInfo::SetExtendStorage(StorageFormat inFormat) + BufferViewCreateInfo& BufferViewCreateInfo::SetExtendStorage(uint32_t inStride) { - extend = StorageBufferViewInfo { inFormat }; + extend = StorageBufferViewInfo { inStride }; return *this; } diff --git a/Engine/Source/Render/Src/ShaderCompiler.cpp b/Engine/Source/Render/Src/ShaderCompiler.cpp index f9b37b0e7..78da8fc7a 100644 --- a/Engine/Source/Render/Src/ShaderCompiler.cpp +++ b/Engine/Source/Render/Src/ShaderCompiler.cpp @@ -43,7 +43,7 @@ namespace Render { { D3D_SIT_SAMPLER, RHI::BindingType::sampler }, { D3D_SIT_UAV_RWTYPED, RHI::BindingType::storageTexture }, { D3D_SIT_STRUCTURED, RHI::BindingType::storageBuffer }, - { D3D_SIT_UAV_RWSTRUCTURED, RHI::BindingType::storageBuffer } + { D3D_SIT_UAV_RWSTRUCTURED, RHI::BindingType::rwStorageBuffer } }; return map.at(type); } @@ -55,7 +55,7 @@ namespace Render { { D3D_SIT_TEXTURE, RHI::HlslBindingRangeType::texture }, { D3D_SIT_SAMPLER, RHI::HlslBindingRangeType::sampler }, { D3D_SIT_UAV_RWTYPED, RHI::HlslBindingRangeType::unorderedAccess }, - { D3D_SIT_STRUCTURED, RHI::HlslBindingRangeType::unorderedAccess }, + { D3D_SIT_STRUCTURED, RHI::HlslBindingRangeType::texture }, { D3D_SIT_UAV_RWSTRUCTURED, RHI::HlslBindingRangeType::unorderedAccess } }; return map.at(type); diff --git a/Sample/RHI-ParallelCompute/Compute.hlsl b/Sample/RHI-ParallelCompute/Compute.hlsl index 0b057534b..1ad5de7d2 100644 --- a/Sample/RHI-ParallelCompute/Compute.hlsl +++ b/Sample/RHI-ParallelCompute/Compute.hlsl @@ -1,14 +1,17 @@ #include -VkBinding(0, 0) cbuffer input : register(b0) -{ - float4 v[16]; +struct Data { + float2 v1; + float4 v2; }; -VkBinding(1, 0) RWStructuredBuffer output : register(u0); +VkBinding(0, 0) StructuredBuffer input : register(t0); -[numthreads(16, 1, 1)] -void CSMain(int id : SV_DispatchThreadID) { - output[id.x] = v[id.x] * v[id.x]; +VkBinding(1, 0) RWStructuredBuffer output : register(u0); + +[numthreads(32, 1, 1)] +void CSMain(uint3 id : SV_DispatchThreadID) { + output[id.x].v1 = input[id.x].v1 * input[id.x].v1; + output[id.x].v2 = input[id.x].v2 * input[id.x].v2; } diff --git a/Sample/RHI-ParallelCompute/ParallelCompute.cpp b/Sample/RHI-ParallelCompute/ParallelCompute.cpp index c1ae1820c..26f052b6b 100644 --- a/Sample/RHI-ParallelCompute/ParallelCompute.cpp +++ b/Sample/RHI-ParallelCompute/ParallelCompute.cpp @@ -30,15 +30,15 @@ class ParallelCompute final : public Application { BuildCmdBufferAndSubmit(); // Map the data so we can read it on CPU. - const auto* mappedData = static_cast(readbackBuffer->Map(MapMode::read, 0, dataNum * sizeof(FVec4))); + const auto* mappedData = static_cast(readbackBuffer->Map(MapMode::read, 0, dataNum * sizeof(PackedVec))); std::ofstream fout("results.txt"); Assert(fout.is_open()); for(int i = 0; i < dataNum; ++i) { - fout << "(" << mappedData[i].x << ", " << mappedData[i].y << ", " << - ", " << mappedData[i].z << ", " << mappedData[i].w << ")" << '\n'; + std::cout << "(" << mappedData[i].v1.x << ", " << mappedData[i].v1.y << ", " << mappedData[i].v2.x << ", " << mappedData[i].v2.y << ", " + << mappedData[i].v2.z << ", " << mappedData[i].v2.w << ")" << '\n'; } readbackBuffer->UnMap(); @@ -57,14 +57,15 @@ class ParallelCompute final : public Application { void PrepareDataAndCreateGPURes() { - std::vector data(dataNum); + std::vector data(dataNum); for(int i = 0; i < dataNum; ++i) { - data[i] = FVec4(i - 2, i - 1, i, i + 1); + data[i].v1 = FVec2(i + 1); + data[i].v2 = FVec4(i + 1); } const auto bufInfo = BufferCreateInfo() - .SetSize(data.size() * sizeof(FVec4)) + .SetSize(data.size() * sizeof(PackedVec)) .SetUsages(BufferUsageBits::mapWrite | BufferUsageBits::copySrc) .SetInitialState(BufferState::staging); @@ -76,15 +77,16 @@ class ParallelCompute final : public Application { } const auto inputBufInfo = BufferCreateInfo() - .SetSize(data.size() * sizeof(FVec4)) - .SetUsages(BufferUsageBits::copyDst | BufferUsageBits::uniform) + .SetSize(data.size() * sizeof(PackedVec)) + .SetUsages(BufferUsageBits::copyDst | BufferUsageBits::storage) .SetInitialState(BufferState::undefined); inputBuffer = device->CreateBuffer(inputBufInfo); const auto inputBufViewInfo = BufferViewCreateInfo() - .SetType(BufferViewType::uniformBinding) - .SetSize(data.size() * sizeof(FVec4)) - .SetOffset(0); + .SetType(BufferViewType::storageBinding) + .SetSize(data.size() * sizeof(PackedVec)) + .SetOffset(0) + .SetExtendStorage(sizeof(PackedVec)); inputBufferView = inputBuffer->CreateBufferView(inputBufViewInfo); const UniqueRef copyCmd = device->CreateCommandBuffer(); @@ -95,7 +97,7 @@ class ParallelCompute final : public Application { copyRecorder->CopyBufferToBuffer( stagingBuf.Get(), inputBuffer.Get(), - BufferCopyInfo(0, 0, data.size() * sizeof(FVec4))); + BufferCopyInfo(0, 0, data.size() * sizeof(PackedVec))); copyRecorder->ResourceBarrier(Barrier::Transition(inputBuffer.Get(), BufferState::copyDst, BufferState::shaderReadOnly)); copyRecorder->EndPass(); } @@ -108,20 +110,20 @@ class ParallelCompute final : public Application { mFence->Wait(); const auto outputBufferInfo = BufferCreateInfo() - .SetSize(data.size() * sizeof(FVec4)) - .SetUsages(BufferUsageBits::storage | BufferUsageBits::copySrc) - .SetInitialState(BufferState::storage); + .SetSize(data.size() * sizeof(PackedVec)) + .SetUsages(BufferUsageBits::rwStorage | BufferUsageBits::copySrc) + .SetInitialState(BufferState::rwStorage); outputBuffer = device->CreateBuffer(outputBufferInfo); const auto outputBufferViewInfo = BufferViewCreateInfo() - .SetType(BufferViewType::storageBinding) - .SetSize(data.size() * sizeof(FVec4)) + .SetType(BufferViewType::rwStorageBinding) + .SetSize(data.size() * sizeof(PackedVec)) .SetOffset(0) - .SetExtendStorage(StorageFormat::float32); + .SetExtendStorage(sizeof(PackedVec)); outputBufferView = outputBuffer->CreateBufferView(outputBufferViewInfo); const auto readbackBufferInfo = BufferCreateInfo() - .SetSize(data.size() * sizeof(FVec4)) + .SetSize(data.size() * sizeof(PackedVec)) .SetUsages(BufferUsageBits::mapRead | BufferUsageBits::copyDst) .SetInitialState(BufferState::copyDst); readbackBuffer = device->CreateBuffer(readbackBufferInfo); @@ -175,11 +177,11 @@ class ParallelCompute final : public Application { // read back to host buffer UniqueRef copyRecorder = recorder->BeginCopyPass(); - copyRecorder->ResourceBarrier(Barrier::Transition(outputBuffer.Get(), BufferState::storage, BufferState::copySrc)); + copyRecorder->ResourceBarrier(Barrier::Transition(outputBuffer.Get(), BufferState::rwStorage, BufferState::copySrc)); copyRecorder->CopyBufferToBuffer( outputBuffer.Get(), readbackBuffer.Get(), - BufferCopyInfo(0, 0, dataNum * sizeof(FVec4))); + BufferCopyInfo(0, 0, dataNum * sizeof(PackedVec))); copyRecorder->EndPass(); recorder->End(); @@ -190,7 +192,12 @@ class ParallelCompute final : public Application { fence->Wait(); } - const int dataNum = 16; + struct PackedVec { + FVec2 v1; + FVec4 v2; + }; + + const int dataNum = 32; Gpu* gpu = nullptr; UniqueRef device; From dfecd5b61030975c7099509d00a4feb1b8279076 Mon Sep 17 00:00:00 2001 From: Junkang <754089122@qq.com> Date: Thu, 11 Jul 2024 10:02:34 +0800 Subject: [PATCH 2/2] fix: spir-v requires different alignment rules with dx for storage buffer --- Sample/RHI-ParallelCompute/Compute.hlsl | 3 ++- Sample/RHI-ParallelCompute/ParallelCompute.cpp | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Sample/RHI-ParallelCompute/Compute.hlsl b/Sample/RHI-ParallelCompute/Compute.hlsl index 1ad5de7d2..a4960ab96 100644 --- a/Sample/RHI-ParallelCompute/Compute.hlsl +++ b/Sample/RHI-ParallelCompute/Compute.hlsl @@ -1,7 +1,8 @@ #include +// spir-v treat vec2 and vec4 as built-in type? struct Data { - float2 v1; + float4 v1; float4 v2; }; diff --git a/Sample/RHI-ParallelCompute/ParallelCompute.cpp b/Sample/RHI-ParallelCompute/ParallelCompute.cpp index 26f052b6b..fcc99315a 100644 --- a/Sample/RHI-ParallelCompute/ParallelCompute.cpp +++ b/Sample/RHI-ParallelCompute/ParallelCompute.cpp @@ -37,8 +37,10 @@ class ParallelCompute final : public Application { for(int i = 0; i < dataNum; ++i) { - std::cout << "(" << mappedData[i].v1.x << ", " << mappedData[i].v1.y << ", " << mappedData[i].v2.x << ", " << mappedData[i].v2.y << ", " - << mappedData[i].v2.z << ", " << mappedData[i].v2.w << ")" << '\n'; + std::cout << "(" + << mappedData[i].v1.x << ", " << mappedData[i].v1.y << ", " << mappedData[i].v1.z << ", " << mappedData[i].v1.w << ", " + << mappedData[i].v2.x << ", " << mappedData[i].v2.y << ", " << mappedData[i].v2.z << ", " << mappedData[i].v2.w + << ")" << '\n'; } readbackBuffer->UnMap(); @@ -60,7 +62,7 @@ class ParallelCompute final : public Application { std::vector data(dataNum); for(int i = 0; i < dataNum; ++i) { - data[i].v1 = FVec2(i + 1); + data[i].v1 = FVec4(i - 1); data[i].v2 = FVec4(i + 1); } @@ -193,7 +195,7 @@ class ParallelCompute final : public Application { } struct PackedVec { - FVec2 v1; + FVec4 v1; FVec4 v2; };