diff --git a/include/API/Device.h b/include/API/Device.h index f0b85829b..1c008c919 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -98,6 +98,10 @@ struct TraditionalRasterPipelineCreateDesc { std::optional DS; std::optional GS; ShaderContainer PS; + // View-instancing fan-out (1 == no view instancing). When > 1, backends + // that implement view instancing should route view N to render-target + // array slice N. + uint32_t ViewInstanceCount = 1; void setShader(Stages Stage, ShaderContainer &&SC) { switch (Stage) { @@ -253,7 +257,8 @@ createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf); // Creates a depth/stencil texture matching the dimensions of a render target. llvm::Expected> -createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height); +createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height, + uint32_t ArraySize = 1); llvm::Expected> createBufferWithData(Device &Dev, std::string Name, diff --git a/include/API/Enums.h b/include/API/Enums.h index d050b40fa..a77f76305 100644 --- a/include/API/Enums.h +++ b/include/API/Enums.h @@ -16,6 +16,7 @@ enum class ResourceKind { StructuredBuffer, ByteAddressBuffer, Texture2D, + Texture2DArray, RWBuffer, RWStructuredBuffer, RWByteAddressBuffer, diff --git a/include/API/FormatConversion.h b/include/API/FormatConversion.h index 816705389..5e4b4d3a2 100644 --- a/include/API/FormatConversion.h +++ b/include/API/FormatConversion.h @@ -21,6 +21,8 @@ #include "llvm/Support/Error.h" +#include + namespace offloadtest { // Bridge for code that still describes textures as DataFormat + Channels (e.g. @@ -139,20 +141,28 @@ validateTextureDescMatchesCPUBuffer(const TextureCreateDesc &Desc, "TextureCreateDesc mip levels %u does not match CPUBuffer mip " "levels %d.", Desc.MipLevels, Buf.OutputProps.MipLevels); + if (Desc.ArraySize != + static_cast(std::max(1, Buf.OutputProps.ArraySize))) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc array size %u does not match CPUBuffer array " + "size %d.", + Desc.ArraySize, Buf.OutputProps.ArraySize); const uint32_t TexelSize = getFormatSizeInBytes(Desc.Fmt); if (Buf.Stride > 0 && static_cast(Buf.Stride) != TexelSize) return llvm::createStringError( std::errc::invalid_argument, "CPUBuffer stride %d does not match texture format element size %u.", Buf.Stride, TexelSize); - const uint64_t ExpectedSize = - static_cast(Desc.Width) * Desc.Height * TexelSize; + const uint64_t ExpectedSize = static_cast(Desc.Width) * + Desc.Height * Desc.ArraySize * TexelSize; if (static_cast(Buf.size()) != ExpectedSize) return llvm::createStringError( std::errc::invalid_argument, "CPUBuffer size %u does not match expected size %llu " - "(width %u * height %u * element size %u).", - Buf.size(), ExpectedSize, Desc.Width, Desc.Height, TexelSize); + "(width %u * height %u * array size %u * element size %u).", + Buf.size(), ExpectedSize, Desc.Width, Desc.Height, Desc.ArraySize, + TexelSize); return llvm::Error::success(); } diff --git a/include/API/Texture.h b/include/API/Texture.h index 26b9b030f..b1a032f23 100644 --- a/include/API/Texture.h +++ b/include/API/Texture.h @@ -71,6 +71,10 @@ struct TextureCreateDesc { uint32_t Width; uint32_t Height; uint32_t MipLevels; + // Texture array slice count (1 == plain Texture2D). Render targets used + // with view instancing require this to match the pipeline's + // ViewInstanceCount. + uint32_t ArraySize = 1; // Clear value for render target or depth/stencil textures. // How and when this is applied depends on the backend: // - DX uses it as an optimized clear hint at resource creation time diff --git a/include/Support/Pipeline.h b/include/Support/Pipeline.h index 9cf0e5f77..e2897f26b 100644 --- a/include/Support/Pipeline.h +++ b/include/Support/Pipeline.h @@ -81,6 +81,7 @@ static inline DescriptorKind getDescriptorKind(ResourceKind RK) { case ResourceKind::StructuredBuffer: case ResourceKind::ByteAddressBuffer: case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::AccelerationStructure: return DescriptorKind::SRV; @@ -145,6 +146,7 @@ struct OutputProperties { int Width; int Depth; int MipLevels = 1; + int ArraySize = 1; }; static inline uint32_t getFormatSize(DataFormat Format) { @@ -246,6 +248,7 @@ struct Resource { case ResourceKind::Buffer: case ResourceKind::RWBuffer: case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::RWTexture2D: case ResourceKind::Sampler: case ResourceKind::SampledTexture2D: @@ -273,6 +276,7 @@ struct Resource { case ResourceKind::RWByteAddressBuffer: case ResourceKind::ConstantBuffer: case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::RWTexture2D: case ResourceKind::SampledTexture2D: case ResourceKind::AccelerationStructure: @@ -294,6 +298,7 @@ struct Resource { case ResourceKind::AccelerationStructure: return false; case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::RWTexture2D: case ResourceKind::SampledTexture2D: return true; @@ -356,6 +361,7 @@ struct Resource { case ResourceKind::StructuredBuffer: case ResourceKind::ByteAddressBuffer: case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::ConstantBuffer: case ResourceKind::Sampler: case ResourceKind::SampledTexture2D: @@ -541,6 +547,12 @@ struct Pipeline { DispatchParametersSet DispatchParameters; AccelerationStructureDescs AccelStructs; + // Number of view instances to render in a single draw (D3D12 view + // instancing). Default 1 = no view instancing. When > 1 the render target + // must be a Texture2DArray whose ArraySize is at least this value, and the + // backend routes view N to array slice N of the render/depth target. + uint32_t ViewInstanceCount = 1; + uint32_t getVertexCount() const { if (DispatchParameters.VertexCount) return *DispatchParameters.VertexCount; @@ -829,6 +841,7 @@ template <> struct ScalarEnumerationTraits { ENUM_CASE(StructuredBuffer); ENUM_CASE(ByteAddressBuffer); ENUM_CASE(Texture2D); + ENUM_CASE(Texture2DArray); ENUM_CASE(RWBuffer); ENUM_CASE(RWStructuredBuffer); ENUM_CASE(RWByteAddressBuffer); diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index edfaccb7b..47742f1e4 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -193,7 +193,7 @@ getDXPrimitiveTopology(PrimitiveTopology Topology, llvm_unreachable("All PrimitiveTopology cases handled"); } -static uint64_t getAlignedTextureBufferSize(const CPUBuffer &B) { +static uint64_t getAlignedSliceSize(const CPUBuffer &B) { const uint64_t AlignedPitch = getAlignedTexturePitch(B.OutputProps.Width, B.getElementSize()); const uint64_t LastRowSize = @@ -201,6 +201,19 @@ static uint64_t getAlignedTextureBufferSize(const CPUBuffer &B) { return uint64_t(B.OutputProps.Height - 1) * AlignedPitch + LastRowSize; } +static uint64_t getAlignedTextureBufferSize(const CPUBuffer &B) { + const uint32_t ArraySize = std::max(1, B.OutputProps.ArraySize); + const uint64_t SliceSize = getAlignedSliceSize(B); + if (ArraySize == 1) + return SliceSize; + // Each slice has to start at D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT + // (512-byte) boundary in the destination readback buffer for + // CopyTextureRegion's placed footprint to be legal. + const uint64_t SlicePitch = + llvm::alignTo(SliceSize, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + return SlicePitch * (ArraySize - 1) + SliceSize; +} + static uint32_t getUAVBufferSize(const Resource &R) { return R.HasCounter ? llvm::alignTo(R.size(), D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT) + @@ -226,6 +239,7 @@ static D3D12_RESOURCE_DIMENSION getDXDimension(ResourceKind RK) { case ResourceKind::AccelerationStructure: return D3D12_RESOURCE_DIMENSION_BUFFER; case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::RWTexture2D: return D3D12_RESOURCE_DIMENSION_TEXTURE2D; case ResourceKind::Sampler: @@ -246,11 +260,24 @@ getResourceDescription(const Resource &R) { "Multiple mip levels are not yet supported " "for DirectX textures."); + if (B.OutputProps.ArraySize < 1) + return llvm::createStringError(std::errc::invalid_argument, + "OutputProps.ArraySize must be >= 1."); + + if (B.OutputProps.ArraySize > 1 && R.Kind != ResourceKind::Texture2DArray) + return llvm::createStringError( + std::errc::not_supported, + "OutputProps.ArraySize > 1 is only supported for Texture2DArray."); + const DXGI_FORMAT Format = R.isTexture() ? getDXFormat(B.Format, B.Channels) : DXGI_FORMAT_UNKNOWN; const uint32_t Width = R.isTexture() ? B.OutputProps.Width : getUAVBufferSize(R); const uint32_t Height = R.isTexture() ? B.OutputProps.Height : 1; + const uint16_t DepthOrArraySize = + R.Kind == ResourceKind::Texture2DArray + ? static_cast(B.OutputProps.ArraySize) + : 1; D3D12_TEXTURE_LAYOUT Layout; if (R.isTexture()) @@ -265,8 +292,9 @@ getResourceDescription(const Resource &R) { const D3D12_RESOURCE_FLAGS Flags = R.isReadWrite() ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; - const D3D12_RESOURCE_DESC ResDesc = {Dimension, 0, Width, Height, 1, 1, - Format, {1, 0}, Layout, Flags}; + const D3D12_RESOURCE_DESC ResDesc = {Dimension, 0, Width, Height, + DepthOrArraySize, 1, Format, {1, 0}, + Layout, Flags}; return ResDesc; } @@ -296,6 +324,11 @@ static D3D12_SHADER_RESOURCE_VIEW_DESC getSRVDescription(const Resource &R) { Desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; Desc.Texture2D = D3D12_TEX2D_SRV{0, 1, 0, 0}; break; + case ResourceKind::Texture2DArray: + Desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + Desc.Texture2DArray = D3D12_TEX2D_ARRAY_SRV{ + 0, 1, 0, static_cast(R.BufferPtr->OutputProps.ArraySize), 0, 0}; + break; case ResourceKind::RWStructuredBuffer: case ResourceKind::RWBuffer: case ResourceKind::RWByteAddressBuffer: @@ -341,6 +374,7 @@ static D3D12_UNORDERED_ACCESS_VIEW_DESC getUAVDescription(const Resource &R) { case ResourceKind::Buffer: case ResourceKind::ByteAddressBuffer: case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::ConstantBuffer: case ResourceKind::Sampler: llvm_unreachable("Not a UAV type!"); @@ -1224,10 +1258,33 @@ class DXDevice : public offloadtest::Device { PSODesc.SampleDesc.Count = 1; ComPtr PSO; - if (auto Err = HR::toError( - Device->CreateGraphicsPipelineState(&PSODesc, IID_PPV_ARGS(&PSO)), - "Failed to create graphics PSO.")) + if (Desc.ViewInstanceCount > 1) { + // View-instanced pipelines can't be created from the legacy + // D3D12_GRAPHICS_PIPELINE_STATE_DESC; switch to the stream-desc API + // and attach a view-instancing subobject that routes view N to the + // matching render/depth target array slice. + llvm::SmallVector ViewLocations; + ViewLocations.reserve(Desc.ViewInstanceCount); + for (uint32_t I = 0; I < Desc.ViewInstanceCount; ++I) + ViewLocations.push_back({/*ViewportArrayIndex=*/0, + /*RenderTargetArrayIndex=*/I}); + + CD3DX12_PIPELINE_STATE_STREAM2 Stream(PSODesc); + Stream.ViewInstancingDesc = CD3DX12_VIEW_INSTANCING_DESC( + static_cast(Desc.ViewInstanceCount), ViewLocations.data(), + D3D12_VIEW_INSTANCING_FLAG_NONE); + + const D3D12_PIPELINE_STATE_STREAM_DESC StreamDesc = {sizeof(Stream), + &Stream}; + if (auto Err = HR::toError( + Device->CreatePipelineState(&StreamDesc, IID_PPV_ARGS(&PSO)), + "Failed to create view-instanced graphics PSO.")) + return Err; + } else if (auto Err = HR::toError(Device->CreateGraphicsPipelineState( + &PSODesc, IID_PPV_ARGS(&PSO)), + "Failed to create graphics PSO.")) { return Err; + } return std::make_unique( Name, RootSig, PSO, @@ -1364,7 +1421,8 @@ class DXDevice : public offloadtest::Device { TexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; TexDesc.Width = Desc.Width; TexDesc.Height = Desc.Height; - TexDesc.DepthOrArraySize = 1; + TexDesc.DepthOrArraySize = + static_cast(std::max(1u, Desc.ArraySize)); TexDesc.MipLevels = static_cast(Desc.MipLevels); TexDesc.Format = getDXGIFormat(Desc.Fmt); TexDesc.SampleDesc.Count = 1; @@ -1565,15 +1623,20 @@ class DXDevice : public offloadtest::Device { addUploadBeginBarrier(IS, Destination); if (R.isTexture()) { const offloadtest::CPUBuffer &B = *R.BufferPtr; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ - 0, CD3DX12_SUBRESOURCE_FOOTPRINT( - getDXFormat(B.Format, B.Channels), B.OutputProps.Width, - B.OutputProps.Height, 1, - B.OutputProps.Width * B.getElementSize())}; - const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(Destination.Get(), 0); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(Source.Get(), Footprint); - - IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + const DXGI_FORMAT DXFormat = getDXFormat(B.Format, B.Channels); + const uint32_t RowPitch = B.OutputProps.Width * B.getElementSize(); + const uint32_t SliceBytes = RowPitch * B.OutputProps.Height; + const uint32_t NumSlices = + R.Kind == ResourceKind::Texture2DArray ? B.OutputProps.ArraySize : 1; + for (uint32_t Slice = 0; Slice < NumSlices; ++Slice) { + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ + Slice * SliceBytes, + CD3DX12_SUBRESOURCE_FOOTPRINT(DXFormat, B.OutputProps.Width, + B.OutputProps.Height, 1, RowPitch)}; + const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(Destination.Get(), Slice); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(Source.Get(), Footprint); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + } } else IS.CB->CmdList->CopyBufferRegion(Destination.Get(), 0, Source.Get(), 0, R.size()); @@ -2266,8 +2329,32 @@ class DXDevice : public offloadtest::Device { Device->GetCopyableFootprints(&RTDesc, 0u, 1u, 0u, &Placed, &NumRows, &RowSizeInBytes, &TotalBytes); - P.Bindings.RTargetBufferPtr->copyFromTexture(Mapped, - Placed.Footprint.RowPitch); + CPUBuffer &OutBuf = *P.Bindings.RTargetBufferPtr; + const uint32_t ArraySize = std::max(1, OutBuf.OutputProps.ArraySize); + if (ArraySize == 1) { + OutBuf.copyFromTexture(Mapped, Placed.Footprint.RowPitch); + } else { + // Slices were placed in the readback buffer at 512-byte-aligned + // offsets by createGraphicsCommands(); pull them back out into the + // CPUBuffer's tight slice-major layout. + const uint64_t SliceSize = getAlignedSliceSize(OutBuf); + const uint64_t SlicePitch = + llvm::alignTo(SliceSize, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + const uint64_t TightSliceBytes = uint64_t(OutBuf.OutputProps.Width) * + OutBuf.OutputProps.Height * + OutBuf.getElementSize(); + const uint32_t RowBytes = OutBuf.getImageRowBytes(); + uint8_t *Dst = reinterpret_cast(OutBuf.Data[0].get()); + const uint8_t *SrcBase = reinterpret_cast(Mapped); + const uint32_t Height = static_cast(OutBuf.OutputProps.Height); + for (uint32_t Slice = 0; Slice < ArraySize; ++Slice) { + const uint8_t *SliceSrc = SrcBase + Slice * SlicePitch; + uint8_t *SliceDst = Dst + Slice * TightSliceBytes; + for (uint32_t Y = 0; Y < Height; ++Y) + memcpy(SliceDst + size_t(Y) * RowBytes, + SliceSrc + size_t(Y) * Placed.Footprint.RowPitch, RowBytes); + } + } Readback.Buffer->Unmap(0, nullptr); return llvm::Error::success(); } @@ -2304,7 +2391,8 @@ class DXDevice : public offloadtest::Device { llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, - P.Bindings.RTargetBufferPtr->OutputProps.Height); + P.Bindings.RTargetBufferPtr->OutputProps.Height, + std::max(1, P.Bindings.RTargetBufferPtr->OutputProps.ArraySize)); if (!TexOrErr) return TexOrErr.takeError(); IS.DepthStencil = std::move(*TexOrErr); @@ -2368,24 +2456,33 @@ class DXDevice : public offloadtest::Device { Encoder.endEncoding(); // Transition the render target to copy source and copy to the readback - // buffer. + // buffer. For view-instanced (Texture2DArray) render targets, copy each + // slice at its 512-byte-aligned offset; readBack() reads them out + // slice-major. const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( RT.Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); IS.CB->CmdList->ResourceBarrier(1, &Barrier); const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; - const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ - 0, - CD3DX12_SUBRESOURCE_FOOTPRINT( - getDXFormat(B.Format, B.Channels), B.OutputProps.Width, - B.OutputProps.Height, 1, - getAlignedTexturePitch(B.OutputProps.Width, B.getElementSize()))}; - const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RTReadback.Buffer.Get(), - Footprint); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RT.Resource.Get(), 0); - - IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + const uint32_t ArraySize = std::max(1, B.OutputProps.ArraySize); + const uint64_t SliceSize = getAlignedSliceSize(B); + const uint64_t SlicePitch = + ArraySize == 1 + ? SliceSize + : llvm::alignTo(SliceSize, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); + const CD3DX12_SUBRESOURCE_FOOTPRINT FootprintDesc( + getDXFormat(B.Format, B.Channels), B.OutputProps.Width, + B.OutputProps.Height, 1, + getAlignedTexturePitch(B.OutputProps.Width, B.getElementSize())); + for (uint32_t Slice = 0; Slice < ArraySize; ++Slice) { + const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ + /*Offset=*/Slice * SlicePitch, FootprintDesc}; + const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RTReadback.Buffer.Get(), + Footprint); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RT.Resource.Get(), Slice); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + } auto CopyBackResource = [&IS, this](ResourcePair &R) { if (R.first->isTexture()) { @@ -2538,6 +2635,7 @@ class DXDevice : public offloadtest::Device { PipelineDesc.Topology = P.Bindings.Topology; PipelineDesc.PatchControlPoints = P.Bindings.PatchControlPoints; PipelineDesc.DSFormat = Format::D32FloatS8Uint; + PipelineDesc.ViewInstanceCount = P.ViewInstanceCount; for (auto &Shader : P.Shaders) { ShaderContainer SC = {}; SC.EntryPoint = Shader.Entry; diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index 86875096e..94fa1f2c8 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -84,6 +84,7 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, Desc.Width = Buf.OutputProps.Width; Desc.Height = Buf.OutputProps.Height; Desc.MipLevels = 1; + Desc.ArraySize = std::max(1, Buf.OutputProps.ArraySize); Desc.OptimizedClearValue = ClearColor{}; if (auto Err = validateTextureDescMatchesCPUBuffer(Desc, Buf)) @@ -94,7 +95,8 @@ offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, llvm::Expected> offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, - uint32_t Height) { + uint32_t Height, + uint32_t ArraySize) { TextureCreateDesc Desc = {}; Desc.Location = MemoryLocation::GpuOnly; Desc.Usage = TextureUsage::DepthStencil; @@ -102,6 +104,7 @@ offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, Desc.Width = Width; Desc.Height = Height; Desc.MipLevels = 1; + Desc.ArraySize = std::max(1u, ArraySize); Desc.OptimizedClearValue = ClearDepthStencil{1.0f, 0}; return Dev.createTexture("DepthStencil", Desc); diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 7959daa9b..8fd42ba26 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -1092,6 +1092,8 @@ class MTLDevice : public offloadtest::Device { Desc = MTL::TextureDescriptor::texture2DDescriptor(Format, Width, Height, false); break; + case ResourceKind::Texture2DArray: + llvm_unreachable("Texture2DArray is not yet supported in Metal."); case ResourceKind::Sampler: llvm_unreachable("Not implemented yet."); case ResourceKind::SampledTexture2D: diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 282b2b9c2..c7d0f8b46 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -77,6 +77,7 @@ static VkDescriptorType getDescriptorType(const ResourceKind RK) { return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; case ResourceKind::RWTexture2D: @@ -163,6 +164,7 @@ static VkBufferUsageFlagBits getFlagBits(const ResourceKind RK) { case ResourceKind::ConstantBuffer: return VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::RWTexture2D: case ResourceKind::Sampler: case ResourceKind::SampledTexture2D: @@ -179,6 +181,8 @@ static VkImageViewType getImageViewType(const ResourceKind RK) { case ResourceKind::RWTexture2D: case ResourceKind::SampledTexture2D: return VK_IMAGE_VIEW_TYPE_2D; + case ResourceKind::Texture2DArray: + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; case ResourceKind::Buffer: case ResourceKind::RWBuffer: case ResourceKind::ByteAddressBuffer: @@ -196,6 +200,7 @@ static VkImageViewType getImageViewType(const ResourceKind RK) { static VkImageType getVKImageType(const ResourceKind RK) { switch (RK) { case ResourceKind::Texture2D: + case ResourceKind::Texture2DArray: case ResourceKind::RWTexture2D: case ResourceKind::SampledTexture2D: return VK_IMAGE_TYPE_2D; diff --git a/lib/Support/Pipeline.cpp b/lib/Support/Pipeline.cpp index 3940a550d..fc146a04d 100644 --- a/lib/Support/Pipeline.cpp +++ b/lib/Support/Pipeline.cpp @@ -77,6 +77,8 @@ void MappingTraits::mapping(IO &I, if (auto Err = P.validateDispatchParameters()) I.setError(llvm::toString(std::move(Err))); + I.mapOptional("ViewInstanceCount", P.ViewInstanceCount, 1u); + if (!I.outputting()) { for (auto &D : P.Sets) { for (auto &R : D.Resources) { @@ -429,6 +431,7 @@ void MappingTraits::mapping(IO &I, H = std::max(1u, H / 2); D = std::max(1u, D / 2); } + ExpectedSize *= static_cast(std::max(1, B.OutputProps.ArraySize)); if (B.Size != ExpectedSize) I.setError(Twine("Buffer '") + B.Name + "' size (" + Twine(B.Size) + @@ -546,6 +549,7 @@ void MappingTraits::mapping( I.mapRequired("Width", P.Width); I.mapRequired("Depth", P.Depth); I.mapOptional("MipLevels", P.MipLevels, 1); + I.mapOptional("ArraySize", P.ArraySize, 1); } void MappingTraits::mapping( diff --git a/test/Feature/Textures/Texture2DArray.Load.test.yaml b/test/Feature/Textures/Texture2DArray.Load.test.yaml new file mode 100644 index 000000000..06fb90864 --- /dev/null +++ b/test/Feature/Textures/Texture2DArray.Load.test.yaml @@ -0,0 +1,101 @@ +#--- source.hlsl +[[vk::binding(0, 0)]] Texture2DArray Tex : register(t0); +[[vk::binding(1, 0)]] RWBuffer Out : register(u0); + +[numthreads(1, 1, 1)] +void main() { + // Explicit Load(int4) + // Location: (x, y, slice, mip) + // Slice 0: Red, Green, Blue, White + Out[0] = Tex.Load(int4(0, 0, 0, 0)); + Out[1] = Tex.Load(int4(1, 0, 0, 0)); + Out[2] = Tex.Load(int4(0, 1, 0, 0)); + Out[3] = Tex.Load(int4(1, 1, 0, 0)); + + // Slice 1: solid Red + Out[4] = Tex.Load(int4(0, 0, 1, 0)); + Out[5] = Tex.Load(int4(1, 1, 1, 0)); + + // Slice 2: solid Green + Out[6] = Tex.Load(int4(0, 0, 2, 0)); + Out[7] = Tex.Load(int4(1, 1, 2, 0)); + + // Load(int4, int2) - With Offset, slice 0 + // (0,0) + (1,0) = (1,0) -> Green + Out[8] = Tex.Load(int4(0, 0, 0, 0), int2(1, 0)); + // (1,1) + (-1,-1) = (0,0) -> Red + Out[9] = Tex.Load(int4(1, 1, 0, 0), int2(-1, -1)); +} + +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + +Buffers: + - Name: Tex + Format: Float32 + Channels: 4 + OutputProps: { Width: 2, Height: 2, Depth: 1, ArraySize: 3 } + Data: [ # Slice 0: Red, Green, Blue, White + 1.0, 0.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + 0.0, 0.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, + # Slice 1: solid Red + 1.0, 0.0, 0.0, 1.0, + 1.0, 0.0, 0.0, 1.0, + 1.0, 0.0, 0.0, 1.0, + 1.0, 0.0, 0.0, 1.0, + # Slice 2: solid Green + 0.0, 1.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0, + 0.0, 1.0, 0.0, 1.0 ] + + - Name: Out + Format: Float32 + Channels: 4 + FillSize: 160 # 10 * sizeof(float4) + + - Name: Expected + Format: Float32 + Channels: 4 + Data: [ 1.0, 0.0, 0.0, 1.0, # Slice 0 (0,0) Red + 0.0, 1.0, 0.0, 1.0, # Slice 0 (1,0) Green + 0.0, 0.0, 1.0, 1.0, # Slice 0 (0,1) Blue + 1.0, 1.0, 1.0, 1.0, # Slice 0 (1,1) White + 1.0, 0.0, 0.0, 1.0, # Slice 1 (0,0) Red + 1.0, 0.0, 0.0, 1.0, # Slice 1 (1,1) Red + 0.0, 1.0, 0.0, 1.0, # Slice 2 (0,0) Green + 0.0, 1.0, 0.0, 1.0, # Slice 2 (1,1) Green + 0.0, 1.0, 0.0, 1.0, # Offset (1,0) -> Slice 0 Green + 1.0, 0.0, 0.0, 1.0 ] # Offset (-1,-1) -> Slice 0 Red + +DescriptorSets: + - Resources: + - Name: Tex + Kind: Texture2DArray + DirectXBinding: { Register: 0, Space: 0 } + VulkanBinding: { Binding: 0 } + - Name: Out + Kind: RWBuffer + DirectXBinding: { Register: 0, Space: 0 } + VulkanBinding: { Binding: 1 } + +Results: + - Result: LoadTest + Rule: BufferExact + Actual: Out + Expected: Expected +... +#--- end + + +# Texture2DArray support is currently DirectX-only. +# UNSUPPORTED: Vulkan || Metal + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_0 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/Feature/ViewInstancing/multiview-2views.test b/test/Feature/ViewInstancing/multiview-2views.test new file mode 100644 index 000000000..86d01fdc9 --- /dev/null +++ b/test/Feature/ViewInstancing/multiview-2views.test @@ -0,0 +1,109 @@ +# Verifies D3D12 view instancing routes SV_ViewID-driven outputs into distinct +# render target array slices. +# +# Wires: +# * Pipeline.ViewInstanceCount = 2 (PSODesc.ViewInstancingDesc with view +# locations {0,0} and {0,1} -- one viewport, one RT array slice per view). +# * RenderTarget OutputProps.ArraySize = 2 (Texture2DArray RT created with +# DepthOrArraySize = 2). +# * Pixel shader emits a per-view color, so slice s contents are determined +# by SV_ViewID == s. +# +# Pixel shader reads SV_ViewID directly. The PS is built with ps_6_1, the +# minimum shader model that supports SV_ViewID as a stage input. + +#--- vertex.hlsl +struct PSInput { + float4 position : SV_POSITION; +}; + +PSInput main(float4 position : POSITION) { + PSInput o; + o.position = position; + return o; +} + +#--- pixel.hlsl +struct PSInput { + float4 position : SV_POSITION; +}; + +// Per-view color is independent of fragment position so a single pixel-center +// comparison is enough to confirm SV_ViewID routed the right invocation to +// the right slice. +float4 main(PSInput input, uint vid : SV_ViewID) : SV_TARGET { + if (vid == 0) + return float4(0.25, 0.5, 0.75, 1.0); + return float4(0.75, 0.5, 0.25, 1.0); +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Vertex + Entry: main + - Stage: Pixel + Entry: main +ViewInstanceCount: 2 +Buffers: + # Two CCW triangles covering the full 2x2 RT. + - Name: VertexData + Format: Float32 + Stride: 12 + Data: [ -1.0, -1.0, 0.0, + -1.0, 1.0, 0.0, + 1.0, 1.0, 0.0, + 1.0, 1.0, 0.0, + 1.0, -1.0, 0.0, + -1.0, -1.0, 0.0 ] + - Name: Output + Format: Float32 + Channels: 4 + FillSize: 128 # 2x2 RT, ArraySize=2, 16 B/pixel = 128 B total + OutputProps: + Width: 2 + Height: 2 + Depth: 1 + ArraySize: 2 + - Name: Output_Expected + Format: Float32 + Channels: 4 + Data: [ + # Slice 0 (SV_ViewID == 0) + 0.25, 0.5, 0.75, 1.0, 0.25, 0.5, 0.75, 1.0, + 0.25, 0.5, 0.75, 1.0, 0.25, 0.5, 0.75, 1.0, + # Slice 1 (SV_ViewID == 1) + 0.75, 0.5, 0.25, 1.0, 0.75, 0.5, 0.25, 1.0, + 0.75, 0.5, 0.25, 1.0, 0.75, 0.5, 0.25, 1.0, + ] +Bindings: + VertexBuffer: VertexData + VertexAttributes: + - Format: Float32 + Channels: 3 + Offset: 0 + Name: POSITION + RenderTarget: Output +DescriptorSets: [] +Results: + - Result: ViewInstancedRT + Rule: BufferExact + Actual: Output + Expected: Output_Expected +... +#--- end + +# View instancing is a D3D12-only execution-suite feature today; Vulkan and +# Metal silently ignore Pipeline.ViewInstanceCount (RenderEncoder default +# no-op) so we don't try to validate them here. +# UNSUPPORTED: Vulkan, Metal + +# Clang's HLSL -> DXIL lowering does not yet implement the graphics-stage +# signature intrinsics (llvm.dx.load.input / llvm.dx.store.output) needed for +# SV_POSITION / SV_TARGET / SV_ViewID plumbing. +# XFAIL: Clang && DirectX + +# RUN: split-file %s %t +# RUN: %dxc_target -T vs_6_1 -Fo %t-vertex.o %t/vertex.hlsl +# RUN: %dxc_target -T ps_6_1 -Fo %t-pixel.o %t/pixel.hlsl +# RUN: %offloader %t/pipeline.yaml %t-vertex.o %t-pixel.o diff --git a/test/Feature/ViewInstancing/multiview-4views.test b/test/Feature/ViewInstancing/multiview-4views.test new file mode 100644 index 000000000..d3003b52b --- /dev/null +++ b/test/Feature/ViewInstancing/multiview-4views.test @@ -0,0 +1,105 @@ +# Verifies D3D12 view instancing routes four SV_ViewID-driven outputs into +# distinct render target array slices. + +#--- vertex.hlsl +struct PSInput { + float4 position : SV_POSITION; +}; + +PSInput main(float4 position : POSITION) { + PSInput o; + o.position = position; + return o; +} + +#--- pixel.hlsl +struct PSInput { + float4 position : SV_POSITION; +}; + +float4 main(PSInput input, uint vid : SV_ViewID) : SV_TARGET { + if (vid == 0) + return float4(0.25, 0.5, 0.75, 1.0); + if (vid == 1) + return float4(0.75, 0.5, 0.25, 1.0); + if (vid == 2) + return float4(0.125, 0.25, 0.5, 1.0); + return float4(0.5, 0.25, 0.125, 1.0); +} + +#--- pipeline.yaml +--- +Shaders: + - Stage: Vertex + Entry: main + - Stage: Pixel + Entry: main +ViewInstanceCount: 4 +Buffers: + # Two CCW triangles covering the full 2x2 RT. + - Name: VertexData + Format: Float32 + Stride: 12 + Data: [ -1.0, -1.0, 0.0, + -1.0, 1.0, 0.0, + 1.0, 1.0, 0.0, + 1.0, 1.0, 0.0, + 1.0, -1.0, 0.0, + -1.0, -1.0, 0.0 ] + - Name: Output + Format: Float32 + Channels: 4 + FillSize: 256 # 2x2 RT, ArraySize=4, 16 B/pixel = 256 B total + OutputProps: + Width: 2 + Height: 2 + Depth: 1 + ArraySize: 4 + - Name: Output_Expected + Format: Float32 + Channels: 4 + Data: [ + # Slice 0 (SV_ViewID == 0) + 0.25, 0.5, 0.75, 1.0, 0.25, 0.5, 0.75, 1.0, + 0.25, 0.5, 0.75, 1.0, 0.25, 0.5, 0.75, 1.0, + # Slice 1 (SV_ViewID == 1) + 0.75, 0.5, 0.25, 1.0, 0.75, 0.5, 0.25, 1.0, + 0.75, 0.5, 0.25, 1.0, 0.75, 0.5, 0.25, 1.0, + # Slice 2 (SV_ViewID == 2) + 0.125, 0.25, 0.5, 1.0, 0.125, 0.25, 0.5, 1.0, + 0.125, 0.25, 0.5, 1.0, 0.125, 0.25, 0.5, 1.0, + # Slice 3 (SV_ViewID == 3) + 0.5, 0.25, 0.125, 1.0, 0.5, 0.25, 0.125, 1.0, + 0.5, 0.25, 0.125, 1.0, 0.5, 0.25, 0.125, 1.0, + ] +Bindings: + VertexBuffer: VertexData + VertexAttributes: + - Format: Float32 + Channels: 3 + Offset: 0 + Name: POSITION + RenderTarget: Output +DescriptorSets: [] +Results: + - Result: ViewInstancedRT + Rule: BufferExact + Actual: Output + Expected: Output_Expected +... +#--- end + +# View instancing is a D3D12-only execution-suite feature today; Vulkan and +# Metal silently ignore Pipeline.ViewInstanceCount (RenderEncoder default +# no-op) so we don't try to validate them here. +# UNSUPPORTED: Vulkan, Metal + +# Clang's HLSL -> DXIL lowering does not yet implement the graphics-stage +# signature intrinsics (llvm.dx.load.input / llvm.dx.store.output) needed for +# SV_POSITION / SV_TARGET / SV_ViewID plumbing. +# XFAIL: Clang && DirectX + +# RUN: split-file %s %t +# RUN: %dxc_target -T vs_6_1 -Fo %t-vertex.o %t/vertex.hlsl +# RUN: %dxc_target -T ps_6_1 -Fo %t-pixel.o %t/pixel.hlsl +# RUN: %offloader %t/pipeline.yaml %t-vertex.o %t-pixel.o