Add support for large sampler arrays on Vulkan (#6489)

* Add support for large sampler arrays on Vulkan

* Shader cache version bump

* Format whitespace

* Move DescriptorSetManager to PipelineLayoutCacheEntry to allow different pool sizes per layout

* Handle array textures with different types on the same buffer

* Somewhat better caching system

* Avoid useless buffer data modification checks

* Move redundant bindings update checking to the backend

* Fix an issue where texture arrays would get the same bindings across stages on Vulkan

* Backport some fixes from part 2

* Fix typo

* PR feedback

* Format whitespace

* Add some missing XML docs
This commit is contained in:
gdkchan 2024-04-07 18:25:55 -03:00 committed by GitHub
parent 808803d97a
commit 3e6e0e4afa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
83 changed files with 3263 additions and 955 deletions

View file

@ -15,8 +15,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
// - The handle is a constant buffer value.
// - The handle is the result of a bitwise OR logical operation.
// - Both sources of the OR operation comes from a constant buffer.
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
LinkedListNode<INode> nextNode;
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = nextNode)
{
nextNode = node.Next;
if (node.Value is not TextureOperation texOp)
{
continue;
@ -27,185 +31,207 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
continue;
}
if (texOp.Inst == Instruction.TextureSample || texOp.Inst.IsTextureQuery())
if (!TryConvertBindless(block, resourceManager, gpuAccessor, texOp))
{
Operand bindlessHandle = texOp.GetSource(0);
// If we can't do bindless elimination, remove the texture operation.
// Set any destination variables to zero.
// In some cases the compiler uses a shuffle operation to get the handle,
// for some textureGrad implementations. In those cases, we can skip the shuffle.
if (bindlessHandle.AsgOp is Operation shuffleOp && shuffleOp.Inst == Instruction.Shuffle)
for (int destIndex = 0; destIndex < texOp.DestsCount; destIndex++)
{
bindlessHandle = shuffleOp.GetSource(0);
block.Operations.AddBefore(node, new Operation(Instruction.Copy, texOp.GetDest(destIndex), OperandHelper.Const(0)));
}
bindlessHandle = Utils.FindLastOperation(bindlessHandle, block);
Utils.DeleteNode(node, texOp);
}
}
}
// Some instructions do not encode an accurate sampler type:
// - Most instructions uses the same type for 1D and Buffer.
// - Query instructions may not have any type.
// For those cases, we need to try getting the type from current GPU state,
// as long bindless elimination is successful and we know where the texture descriptor is located.
bool rewriteSamplerType =
texOp.Type == SamplerType.TextureBuffer ||
texOp.Inst == Instruction.TextureQuerySamples ||
texOp.Inst == Instruction.TextureQuerySize;
private static bool TryConvertBindless(BasicBlock block, ResourceManager resourceManager, IGpuAccessor gpuAccessor, TextureOperation texOp)
{
if (texOp.Inst == Instruction.TextureSample || texOp.Inst.IsTextureQuery())
{
Operand bindlessHandle = texOp.GetSource(0);
if (bindlessHandle.Type == OperandType.ConstantBuffer)
// In some cases the compiler uses a shuffle operation to get the handle,
// for some textureGrad implementations. In those cases, we can skip the shuffle.
if (bindlessHandle.AsgOp is Operation shuffleOp && shuffleOp.Inst == Instruction.Shuffle)
{
bindlessHandle = shuffleOp.GetSource(0);
}
bindlessHandle = Utils.FindLastOperation(bindlessHandle, block);
// Some instructions do not encode an accurate sampler type:
// - Most instructions uses the same type for 1D and Buffer.
// - Query instructions may not have any type.
// For those cases, we need to try getting the type from current GPU state,
// as long bindless elimination is successful and we know where the texture descriptor is located.
bool rewriteSamplerType =
texOp.Type == SamplerType.TextureBuffer ||
texOp.Inst == Instruction.TextureQuerySamples ||
texOp.Inst == Instruction.TextureQuerySize;
if (bindlessHandle.Type == OperandType.ConstantBuffer)
{
SetHandle(
resourceManager,
gpuAccessor,
texOp,
bindlessHandle.GetCbufOffset(),
bindlessHandle.GetCbufSlot(),
rewriteSamplerType,
isImage: false);
return true;
}
if (!TryGetOperation(bindlessHandle.AsgOp, out Operation handleCombineOp))
{
return false;
}
if (handleCombineOp.Inst != Instruction.BitwiseOr)
{
return false;
}
Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
// For cases where we have a constant, ensure that the constant is always
// the second operand.
// Since this is a commutative operation, both are fine,
// and having a "canonical" representation simplifies some checks below.
if (src0.Type == OperandType.Constant && src1.Type != OperandType.Constant)
{
(src0, src1) = (src1, src0);
}
TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle;
// Try to match the following patterns:
// Masked pattern:
// - samplerHandle = samplerHandle & 0xFFF00000;
// - textureHandle = textureHandle & 0xFFFFF;
// - combinedHandle = samplerHandle | textureHandle;
// Where samplerHandle and textureHandle comes from a constant buffer.
// Shifted pattern:
// - samplerHandle = samplerId << 20;
// - combinedHandle = samplerHandle | textureHandle;
// Where samplerId and textureHandle comes from a constant buffer.
// Constant pattern:
// - combinedHandle = samplerHandleConstant | textureHandle;
// Where samplerHandleConstant is a constant value, and textureHandle comes from a constant buffer.
if (src0.AsgOp is Operation src0AsgOp)
{
if (src1.AsgOp is Operation src1AsgOp &&
src0AsgOp.Inst == Instruction.BitwiseAnd &&
src1AsgOp.Inst == Instruction.BitwiseAnd)
{
SetHandle(
resourceManager,
gpuAccessor,
texOp,
bindlessHandle.GetCbufOffset(),
bindlessHandle.GetCbufSlot(),
rewriteSamplerType,
isImage: false);
src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF);
src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000);
continue;
}
if (!TryGetOperation(bindlessHandle.AsgOp, out Operation handleCombineOp))
{
continue;
}
if (handleCombineOp.Inst != Instruction.BitwiseOr)
{
continue;
}
Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
// For cases where we have a constant, ensure that the constant is always
// the second operand.
// Since this is a commutative operation, both are fine,
// and having a "canonical" representation simplifies some checks below.
if (src0.Type == OperandType.Constant && src1.Type != OperandType.Constant)
{
(src0, src1) = (src1, src0);
}
TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle;
// Try to match the following patterns:
// Masked pattern:
// - samplerHandle = samplerHandle & 0xFFF00000;
// - textureHandle = textureHandle & 0xFFFFF;
// - combinedHandle = samplerHandle | textureHandle;
// Where samplerHandle and textureHandle comes from a constant buffer.
// Shifted pattern:
// - samplerHandle = samplerId << 20;
// - combinedHandle = samplerHandle | textureHandle;
// Where samplerId and textureHandle comes from a constant buffer.
// Constant pattern:
// - combinedHandle = samplerHandleConstant | textureHandle;
// Where samplerHandleConstant is a constant value, and textureHandle comes from a constant buffer.
if (src0.AsgOp is Operation src0AsgOp)
{
if (src1.AsgOp is Operation src1AsgOp &&
src0AsgOp.Inst == Instruction.BitwiseAnd &&
src1AsgOp.Inst == Instruction.BitwiseAnd)
// The OR operation is commutative, so we can also try to swap the operands to get a match.
if (src0 == null || src1 == null)
{
src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF);
src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000);
// The OR operation is commutative, so we can also try to swap the operands to get a match.
if (src0 == null || src1 == null)
{
src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF);
src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000);
}
if (src0 == null || src1 == null)
{
continue;
}
src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF);
src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000);
}
else if (src0AsgOp.Inst == Instruction.ShiftLeft)
{
Operand shift = src0AsgOp.GetSource(1);
if (shift.Type == OperandType.Constant && shift.Value == 20)
{
src0 = src1;
src1 = src0AsgOp.GetSource(0);
handleType = TextureHandleType.SeparateSamplerId;
}
if (src0 == null || src1 == null)
{
return false;
}
}
else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft)
else if (src0AsgOp.Inst == Instruction.ShiftLeft)
{
Operand shift = src1AsgOp.GetSource(1);
Operand shift = src0AsgOp.GetSource(1);
if (shift.Type == OperandType.Constant && shift.Value == 20)
{
src1 = src1AsgOp.GetSource(0);
src0 = src1;
src1 = src0AsgOp.GetSource(0);
handleType = TextureHandleType.SeparateSamplerId;
}
}
else if (src1.Type == OperandType.Constant && (src1.Value & 0xfffff) == 0)
{
handleType = TextureHandleType.SeparateConstantSamplerHandle;
}
}
else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft)
{
Operand shift = src1AsgOp.GetSource(1);
if (src0.Type != OperandType.ConstantBuffer)
if (shift.Type == OperandType.Constant && shift.Value == 20)
{
continue;
}
if (handleType == TextureHandleType.SeparateConstantSamplerHandle)
{
SetHandle(
resourceManager,
gpuAccessor,
texOp,
TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType),
TextureHandle.PackSlots(src0.GetCbufSlot(), 0),
rewriteSamplerType,
isImage: false);
}
else if (src1.Type == OperandType.ConstantBuffer)
{
SetHandle(
resourceManager,
gpuAccessor,
texOp,
TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
rewriteSamplerType,
isImage: false);
src1 = src1AsgOp.GetSource(0);
handleType = TextureHandleType.SeparateSamplerId;
}
}
else if (texOp.Inst == Instruction.ImageLoad ||
texOp.Inst == Instruction.ImageStore ||
texOp.Inst == Instruction.ImageAtomic)
else if (src1.Type == OperandType.Constant && (src1.Value & 0xfffff) == 0)
{
Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
handleType = TextureHandleType.SeparateConstantSamplerHandle;
}
if (src0.Type == OperandType.ConstantBuffer)
{
int cbufOffset = src0.GetCbufOffset();
int cbufSlot = src0.GetCbufSlot();
if (src0.Type != OperandType.ConstantBuffer)
{
return false;
}
if (texOp.Format == TextureFormat.Unknown)
{
if (texOp.Inst == Instruction.ImageAtomic)
{
texOp.Format = ShaderProperties.GetTextureFormatAtomic(gpuAccessor, cbufOffset, cbufSlot);
}
else
{
texOp.Format = ShaderProperties.GetTextureFormat(gpuAccessor, cbufOffset, cbufSlot);
}
}
if (handleType == TextureHandleType.SeparateConstantSamplerHandle)
{
SetHandle(
resourceManager,
gpuAccessor,
texOp,
TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType),
TextureHandle.PackSlots(src0.GetCbufSlot(), 0),
rewriteSamplerType,
isImage: false);
bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer;
return true;
}
else if (src1.Type == OperandType.ConstantBuffer)
{
SetHandle(
resourceManager,
gpuAccessor,
texOp,
TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
rewriteSamplerType,
isImage: false);
SetHandle(resourceManager, gpuAccessor, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
}
return true;
}
}
else if (texOp.Inst.IsImage())
{
Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
if (src0.Type == OperandType.ConstantBuffer)
{
int cbufOffset = src0.GetCbufOffset();
int cbufSlot = src0.GetCbufSlot();
if (texOp.Format == TextureFormat.Unknown)
{
if (texOp.Inst == Instruction.ImageAtomic)
{
texOp.Format = ShaderProperties.GetTextureFormatAtomic(gpuAccessor, cbufOffset, cbufSlot);
}
else
{
texOp.Format = ShaderProperties.GetTextureFormat(gpuAccessor, cbufOffset, cbufSlot);
}
}
bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer;
SetHandle(resourceManager, gpuAccessor, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
return true;
}
}
return false;
}
private static bool TryGetOperation(INode asgOp, out Operation outOperation)

View file

@ -0,0 +1,236 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class BindlessToArray
{
private const int NvnTextureBufferIndex = 2;
private const int HardcodedArrayLengthOgl = 4;
// 1 and 0 elements are not considered arrays anymore.
private const int MinimumArrayLength = 2;
public static void RunPassOgl(BasicBlock block, ResourceManager resourceManager)
{
// We can turn a bindless texture access into a indexed access,
// as long the following conditions are true:
// - The handle is loaded using a LDC instruction.
// - The handle is loaded from the constant buffer with the handles (CB2 for NVN).
// - The load has a constant offset.
// The base offset of the array of handles on the constant buffer is the constant offset.
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (node.Value is not TextureOperation texOp)
{
continue;
}
if ((texOp.Flags & TextureFlags.Bindless) == 0)
{
continue;
}
if (texOp.GetSource(0).AsgOp is not Operation handleAsgOp)
{
continue;
}
if (handleAsgOp.Inst != Instruction.Load ||
handleAsgOp.StorageKind != StorageKind.ConstantBuffer ||
handleAsgOp.SourcesCount != 4)
{
continue;
}
Operand ldcSrc0 = handleAsgOp.GetSource(0);
if (ldcSrc0.Type != OperandType.Constant ||
!resourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) ||
src0CbufSlot != NvnTextureBufferIndex)
{
continue;
}
Operand ldcSrc1 = handleAsgOp.GetSource(1);
// We expect field index 0 to be accessed.
if (ldcSrc1.Type != OperandType.Constant || ldcSrc1.Value != 0)
{
continue;
}
Operand ldcSrc2 = handleAsgOp.GetSource(2);
// FIXME: This is missing some checks, for example, a check to ensure that the shift value is 2.
// Might be not worth fixing since if that doesn't kick in, the result will be no texture
// to access anyway which is also wrong.
// Plus this whole transform is fundamentally flawed as-is since we have no way to know the array size.
// Eventually, this should be entirely removed in favor of a implementation that supports true bindless
// texture access.
if (ldcSrc2.AsgOp is not Operation shrOp || shrOp.Inst != Instruction.ShiftRightU32)
{
continue;
}
if (shrOp.GetSource(0).AsgOp is not Operation shrOp2 || shrOp2.Inst != Instruction.ShiftRightU32)
{
continue;
}
if (shrOp2.GetSource(0).AsgOp is not Operation addOp || addOp.Inst != Instruction.Add)
{
continue;
}
Operand addSrc1 = addOp.GetSource(1);
if (addSrc1.Type != OperandType.Constant)
{
continue;
}
TurnIntoArray(resourceManager, texOp, NvnTextureBufferIndex, addSrc1.Value / 4, HardcodedArrayLengthOgl);
Operand index = Local();
Operand source = addOp.GetSource(0);
Operation shrBy3 = new(Instruction.ShiftRightU32, index, source, Const(3));
block.Operations.AddBefore(node, shrBy3);
texOp.SetSource(0, index);
}
}
public static void RunPass(BasicBlock block, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
{
// We can turn a bindless texture access into a indexed access,
// as long the following conditions are true:
// - The handle is loaded using a LDC instruction.
// - The handle is loaded from the constant buffer with the handles (CB2 for NVN).
// - The load has a constant offset.
// The base offset of the array of handles on the constant buffer is the constant offset.
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (node.Value is not TextureOperation texOp)
{
continue;
}
if ((texOp.Flags & TextureFlags.Bindless) == 0)
{
continue;
}
if (texOp.GetSource(0).AsgOp is not Operation handleAsgOp)
{
continue;
}
int secondaryCbufSlot = 0;
int secondaryCbufOffset = 0;
bool hasSecondaryHandle = false;
if (handleAsgOp.Inst == Instruction.BitwiseOr)
{
Operand src0 = handleAsgOp.GetSource(0);
Operand src1 = handleAsgOp.GetSource(1);
if (src0.Type == OperandType.ConstantBuffer && src1.AsgOp is Operation)
{
handleAsgOp = src1.AsgOp as Operation;
secondaryCbufSlot = src0.GetCbufSlot();
secondaryCbufOffset = src0.GetCbufOffset();
hasSecondaryHandle = true;
}
else if (src0.AsgOp is Operation && src1.Type == OperandType.ConstantBuffer)
{
handleAsgOp = src0.AsgOp as Operation;
secondaryCbufSlot = src1.GetCbufSlot();
secondaryCbufOffset = src1.GetCbufOffset();
hasSecondaryHandle = true;
}
}
if (handleAsgOp.Inst != Instruction.Load ||
handleAsgOp.StorageKind != StorageKind.ConstantBuffer ||
handleAsgOp.SourcesCount != 4)
{
continue;
}
Operand ldcSrc0 = handleAsgOp.GetSource(0);
if (ldcSrc0.Type != OperandType.Constant ||
!resourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot))
{
continue;
}
Operand ldcSrc1 = handleAsgOp.GetSource(1);
// We expect field index 0 to be accessed.
if (ldcSrc1.Type != OperandType.Constant || ldcSrc1.Value != 0)
{
continue;
}
Operand ldcVecIndex = handleAsgOp.GetSource(2);
Operand ldcElemIndex = handleAsgOp.GetSource(3);
if (ldcVecIndex.Type != OperandType.LocalVariable || ldcElemIndex.Type != OperandType.LocalVariable)
{
continue;
}
int cbufSlot;
int handleIndex;
if (hasSecondaryHandle)
{
cbufSlot = TextureHandle.PackSlots(src0CbufSlot, secondaryCbufSlot);
handleIndex = TextureHandle.PackOffsets(0, secondaryCbufOffset, TextureHandleType.SeparateSamplerHandle);
}
else
{
cbufSlot = src0CbufSlot;
handleIndex = 0;
}
int length = Math.Max(MinimumArrayLength, gpuAccessor.QueryTextureArrayLengthFromBuffer(src0CbufSlot));
TurnIntoArray(resourceManager, texOp, cbufSlot, handleIndex, length);
Operand vecIndex = Local();
Operand elemIndex = Local();
Operand index = Local();
Operand indexMin = Local();
block.Operations.AddBefore(node, new Operation(Instruction.ShiftLeft, vecIndex, ldcVecIndex, Const(1)));
block.Operations.AddBefore(node, new Operation(Instruction.ShiftRightU32, elemIndex, ldcElemIndex, Const(1)));
block.Operations.AddBefore(node, new Operation(Instruction.Add, index, vecIndex, elemIndex));
block.Operations.AddBefore(node, new Operation(Instruction.MinimumU32, indexMin, index, Const(length - 1)));
texOp.SetSource(0, indexMin);
}
}
private static void TurnIntoArray(ResourceManager resourceManager, TextureOperation texOp, int cbufSlot, int handleIndex, int length)
{
int binding = resourceManager.GetTextureOrImageBinding(
texOp.Inst,
texOp.Type,
texOp.Format,
texOp.Flags & ~TextureFlags.Bindless,
cbufSlot,
handleIndex,
length);
texOp.TurnIntoArray(binding);
}
}
}

View file

@ -1,118 +0,0 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class BindlessToIndexed
{
private const int NvnTextureBufferIndex = 2;
public static void RunPass(BasicBlock block, ResourceManager resourceManager)
{
// We can turn a bindless texture access into a indexed access,
// as long the following conditions are true:
// - The handle is loaded using a LDC instruction.
// - The handle is loaded from the constant buffer with the handles (CB2 for NVN).
// - The load has a constant offset.
// The base offset of the array of handles on the constant buffer is the constant offset.
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (node.Value is not TextureOperation texOp)
{
continue;
}
if ((texOp.Flags & TextureFlags.Bindless) == 0)
{
continue;
}
if (texOp.GetSource(0).AsgOp is not Operation handleAsgOp)
{
continue;
}
if (handleAsgOp.Inst != Instruction.Load ||
handleAsgOp.StorageKind != StorageKind.ConstantBuffer ||
handleAsgOp.SourcesCount != 4)
{
continue;
}
Operand ldcSrc0 = handleAsgOp.GetSource(0);
if (ldcSrc0.Type != OperandType.Constant ||
!resourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) ||
src0CbufSlot != NvnTextureBufferIndex)
{
continue;
}
Operand ldcSrc1 = handleAsgOp.GetSource(1);
// We expect field index 0 to be accessed.
if (ldcSrc1.Type != OperandType.Constant || ldcSrc1.Value != 0)
{
continue;
}
Operand ldcSrc2 = handleAsgOp.GetSource(2);
// FIXME: This is missing some checks, for example, a check to ensure that the shift value is 2.
// Might be not worth fixing since if that doesn't kick in, the result will be no texture
// to access anyway which is also wrong.
// Plus this whole transform is fundamentally flawed as-is since we have no way to know the array size.
// Eventually, this should be entirely removed in favor of a implementation that supports true bindless
// texture access.
if (ldcSrc2.AsgOp is not Operation shrOp || shrOp.Inst != Instruction.ShiftRightU32)
{
continue;
}
if (shrOp.GetSource(0).AsgOp is not Operation shrOp2 || shrOp2.Inst != Instruction.ShiftRightU32)
{
continue;
}
if (shrOp2.GetSource(0).AsgOp is not Operation addOp || addOp.Inst != Instruction.Add)
{
continue;
}
Operand addSrc1 = addOp.GetSource(1);
if (addSrc1.Type != OperandType.Constant)
{
continue;
}
TurnIntoIndexed(resourceManager, texOp, addSrc1.Value / 4);
Operand index = Local();
Operand source = addOp.GetSource(0);
Operation shrBy3 = new(Instruction.ShiftRightU32, index, source, Const(3));
block.Operations.AddBefore(node, shrBy3);
texOp.SetSource(0, index);
}
}
private static void TurnIntoIndexed(ResourceManager resourceManager, TextureOperation texOp, int handle)
{
int binding = resourceManager.GetTextureOrImageBinding(
texOp.Inst,
texOp.Type | SamplerType.Indexed,
texOp.Format,
texOp.Flags & ~TextureFlags.Bindless,
NvnTextureBufferIndex,
handle);
texOp.TurnIntoIndexed(binding);
}
}
}

View file

@ -20,7 +20,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
// Those passes are looking for specific patterns and only needs to run once.
for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++)
{
BindlessToIndexed.RunPass(context.Blocks[blkIndex], context.ResourceManager);
if (context.TargetApi == TargetApi.OpenGL)
{
BindlessToArray.RunPassOgl(context.Blocks[blkIndex], context.ResourceManager);
}
else
{
BindlessToArray.RunPass(context.Blocks[blkIndex], context.ResourceManager, context.GpuAccessor);
}
BindlessElimination.RunPass(context.Blocks[blkIndex], context.ResourceManager, context.GpuAccessor);
// FragmentCoord only exists on fragment shaders, so we don't need to check other stages.

View file

@ -14,9 +14,6 @@ namespace Ryujinx.Graphics.Shader.Translation
private const int DefaultLocalMemorySize = 128;
private const int DefaultSharedMemorySize = 4096;
// TODO: Non-hardcoded array size.
public const int SamplerArraySize = 4;
private static readonly string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" };
private readonly IGpuAccessor _gpuAccessor;
@ -32,7 +29,7 @@ namespace Ryujinx.Graphics.Shader.Translation
private readonly HashSet<int> _usedConstantBufferBindings;
private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
private readonly record struct TextureInfo(int CbufSlot, int Handle, int ArrayLength, SamplerType Type, TextureFormat Format);
private struct TextureMeta
{
@ -152,7 +149,7 @@ namespace Ryujinx.Graphics.Shader.Translation
int binding = _cbSlotToBindingMap[slot];
if (binding < 0)
{
binding = _gpuAccessor.QueryBindingConstantBuffer(slot);
binding = _gpuAccessor.CreateConstantBufferBinding(slot);
_cbSlotToBindingMap[slot] = binding;
string slotNumber = slot.ToString(CultureInfo.InvariantCulture);
AddNewConstantBuffer(binding, $"{_stagePrefix}_c{slotNumber}");
@ -173,7 +170,7 @@ namespace Ryujinx.Graphics.Shader.Translation
if (binding < 0)
{
binding = _gpuAccessor.QueryBindingStorageBuffer(slot);
binding = _gpuAccessor.CreateStorageBufferBinding(slot);
_sbSlotToBindingMap[slot] = binding;
string slotNumber = slot.ToString(CultureInfo.InvariantCulture);
AddNewStorageBuffer(binding, $"{_stagePrefix}_s{slotNumber}");
@ -227,11 +224,12 @@ namespace Ryujinx.Graphics.Shader.Translation
TextureFormat format,
TextureFlags flags,
int cbufSlot,
int handle)
int handle,
int arrayLength = 1)
{
inst &= Instruction.Mask;
bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
bool isImage = inst.IsImage();
bool isWrite = inst.IsImageStore();
bool accurateType = !inst.IsTextureQuery();
bool intCoords = isImage || flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureQuerySize;
bool coherent = flags.HasFlag(TextureFlags.Coherent);
@ -241,7 +239,7 @@ namespace Ryujinx.Graphics.Shader.Translation
format = TextureFormat.Unknown;
}
int binding = GetTextureOrImageBinding(cbufSlot, handle, type, format, isImage, intCoords, isWrite, accurateType, coherent);
int binding = GetTextureOrImageBinding(cbufSlot, handle, arrayLength, type, format, isImage, intCoords, isWrite, accurateType, coherent);
_gpuAccessor.RegisterTexture(handle, cbufSlot);
@ -251,6 +249,7 @@ namespace Ryujinx.Graphics.Shader.Translation
private int GetTextureOrImageBinding(
int cbufSlot,
int handle,
int arrayLength,
SamplerType type,
TextureFormat format,
bool isImage,
@ -260,7 +259,6 @@ namespace Ryujinx.Graphics.Shader.Translation
bool coherent)
{
var dimensions = type.GetDimensions();
var isIndexed = type.HasFlag(SamplerType.Indexed);
var dict = isImage ? _usedImages : _usedTextures;
var usageFlags = TextureUsageFlags.None;
@ -269,7 +267,7 @@ namespace Ryujinx.Graphics.Shader.Translation
{
usageFlags |= TextureUsageFlags.NeedsScaleValue;
var canScale = _stage.SupportsRenderScale() && !isIndexed && !write && dimensions == 2;
var canScale = _stage.SupportsRenderScale() && arrayLength == 1 && !write && dimensions == 2;
if (!canScale)
{
@ -289,76 +287,75 @@ namespace Ryujinx.Graphics.Shader.Translation
usageFlags |= TextureUsageFlags.ImageCoherent;
}
int arraySize = isIndexed ? SamplerArraySize : 1;
int firstBinding = -1;
for (int layer = 0; layer < arraySize; layer++)
// For array textures, we also want to use type as key,
// since we may have texture handles stores in the same buffer, but for textures with different types.
var keyType = arrayLength > 1 ? type : SamplerType.None;
var info = new TextureInfo(cbufSlot, handle, arrayLength, keyType, format);
var meta = new TextureMeta()
{
var info = new TextureInfo(cbufSlot, handle + layer * 2, isIndexed, format);
var meta = new TextureMeta()
{
AccurateType = accurateType,
Type = type,
UsageFlags = usageFlags,
};
AccurateType = accurateType,
Type = type,
UsageFlags = usageFlags,
};
int binding;
int binding;
if (dict.TryGetValue(info, out var existingMeta))
{
dict[info] = MergeTextureMeta(meta, existingMeta);
binding = existingMeta.Binding;
}
else
{
bool isBuffer = (type & SamplerType.Mask) == SamplerType.TextureBuffer;
if (dict.TryGetValue(info, out var existingMeta))
{
dict[info] = MergeTextureMeta(meta, existingMeta);
binding = existingMeta.Binding;
}
else
{
bool isBuffer = (type & SamplerType.Mask) == SamplerType.TextureBuffer;
binding = isImage
? _gpuAccessor.QueryBindingImage(dict.Count, isBuffer)
: _gpuAccessor.QueryBindingTexture(dict.Count, isBuffer);
binding = isImage
? _gpuAccessor.CreateImageBinding(arrayLength, isBuffer)
: _gpuAccessor.CreateTextureBinding(arrayLength, isBuffer);
meta.Binding = binding;
meta.Binding = binding;
dict.Add(info, meta);
}
string nameSuffix;
if (isImage)
{
nameSuffix = cbufSlot < 0
? $"i_tcb_{handle:X}_{format.ToGlslFormat()}"
: $"i_cb{cbufSlot}_{handle:X}_{format.ToGlslFormat()}";
}
else
{
nameSuffix = cbufSlot < 0 ? $"t_tcb_{handle:X}" : $"t_cb{cbufSlot}_{handle:X}";
}
var definition = new TextureDefinition(
isImage ? 3 : 2,
binding,
$"{_stagePrefix}_{nameSuffix}",
meta.Type,
info.Format,
meta.UsageFlags);
if (isImage)
{
Properties.AddOrUpdateImage(definition);
}
else
{
Properties.AddOrUpdateTexture(definition);
}
if (layer == 0)
{
firstBinding = binding;
}
dict.Add(info, meta);
}
return firstBinding;
string nameSuffix;
string prefix = isImage ? "i" : "t";
if (arrayLength != 1 && type != SamplerType.None)
{
prefix += type.ToShortSamplerType();
}
if (isImage)
{
nameSuffix = cbufSlot < 0
? $"{prefix}_tcb_{handle:X}_{format.ToGlslFormat()}"
: $"{prefix}_cb{cbufSlot}_{handle:X}_{format.ToGlslFormat()}";
}
else
{
nameSuffix = cbufSlot < 0 ? $"{prefix}_tcb_{handle:X}" : $"{prefix}_cb{cbufSlot}_{handle:X}";
}
var definition = new TextureDefinition(
isImage ? 3 : 2,
binding,
arrayLength,
$"{_stagePrefix}_{nameSuffix}",
meta.Type,
info.Format,
meta.UsageFlags);
if (isImage)
{
Properties.AddOrUpdateImage(definition);
}
else
{
Properties.AddOrUpdateTexture(definition);
}
return binding;
}
private static TextureMeta MergeTextureMeta(TextureMeta meta, TextureMeta existingMeta)
@ -399,8 +396,7 @@ namespace Ryujinx.Graphics.Shader.Translation
selectedMeta.UsageFlags |= TextureUsageFlags.NeedsScaleValue;
var dimensions = type.GetDimensions();
var isIndexed = type.HasFlag(SamplerType.Indexed);
var canScale = _stage.SupportsRenderScale() && !isIndexed && dimensions == 2;
var canScale = _stage.SupportsRenderScale() && selectedInfo.ArrayLength == 1 && dimensions == 2;
if (!canScale)
{
@ -468,34 +464,61 @@ namespace Ryujinx.Graphics.Shader.Translation
return descriptors;
}
public TextureDescriptor[] GetTextureDescriptors()
public TextureDescriptor[] GetTextureDescriptors(bool includeArrays = true)
{
return GetDescriptors(_usedTextures, _usedTextures.Count);
return GetDescriptors(_usedTextures, includeArrays);
}
public TextureDescriptor[] GetImageDescriptors()
public TextureDescriptor[] GetImageDescriptors(bool includeArrays = true)
{
return GetDescriptors(_usedImages, _usedImages.Count);
return GetDescriptors(_usedImages, includeArrays);
}
private static TextureDescriptor[] GetDescriptors(IReadOnlyDictionary<TextureInfo, TextureMeta> usedResources, int count)
private static TextureDescriptor[] GetDescriptors(IReadOnlyDictionary<TextureInfo, TextureMeta> usedResources, bool includeArrays)
{
TextureDescriptor[] descriptors = new TextureDescriptor[count];
List<TextureDescriptor> descriptors = new();
int descriptorIndex = 0;
bool hasAnyArray = false;
foreach ((TextureInfo info, TextureMeta meta) in usedResources)
{
descriptors[descriptorIndex++] = new TextureDescriptor(
if (info.ArrayLength > 1)
{
hasAnyArray = true;
continue;
}
descriptors.Add(new TextureDescriptor(
meta.Binding,
meta.Type,
info.Format,
info.CbufSlot,
info.Handle,
meta.UsageFlags);
info.ArrayLength,
meta.UsageFlags));
}
return descriptors;
if (hasAnyArray && includeArrays)
{
foreach ((TextureInfo info, TextureMeta meta) in usedResources)
{
if (info.ArrayLength <= 1)
{
continue;
}
descriptors.Add(new TextureDescriptor(
meta.Binding,
meta.Type,
info.Format,
info.CbufSlot,
info.Handle,
info.ArrayLength,
meta.UsageFlags));
}
}
return descriptors.ToArray();
}
public bool TryGetCbufSlotAndHandleForTexture(int binding, out int cbufSlot, out int handle)
@ -531,6 +554,19 @@ namespace Ryujinx.Graphics.Shader.Translation
return FindDescriptorIndex(GetImageDescriptors(), binding);
}
public bool IsArrayOfTexturesOrImages(int binding, bool isImage)
{
foreach ((TextureInfo info, TextureMeta meta) in isImage ? _usedImages : _usedTextures)
{
if (meta.Binding == binding)
{
return info.ArrayLength != 1;
}
}
return false;
}
private void AddNewConstantBuffer(int binding, string name)
{
StructureType type = new(new[]

View file

@ -9,6 +9,7 @@ namespace Ryujinx.Graphics.Shader.Translation
public readonly ShaderDefinitions Definitions;
public readonly ResourceManager ResourceManager;
public readonly IGpuAccessor GpuAccessor;
public readonly TargetApi TargetApi;
public readonly TargetLanguage TargetLanguage;
public readonly ShaderStage Stage;
public readonly ref FeatureFlags UsedFeatures;
@ -19,6 +20,7 @@ namespace Ryujinx.Graphics.Shader.Translation
ShaderDefinitions definitions,
ResourceManager resourceManager,
IGpuAccessor gpuAccessor,
TargetApi targetApi,
TargetLanguage targetLanguage,
ShaderStage stage,
ref FeatureFlags usedFeatures)
@ -28,6 +30,7 @@ namespace Ryujinx.Graphics.Shader.Translation
Definitions = definitions;
ResourceManager = resourceManager;
GpuAccessor = gpuAccessor;
TargetApi = targetApi;
TargetLanguage = targetLanguage;
Stage = stage;
UsedFeatures = ref usedFeatures;

View file

@ -23,7 +23,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
node = InsertCoordNormalization(context.Hfm, node, context.ResourceManager, context.GpuAccessor, context.Stage);
node = InsertCoordGatherBias(node, context.ResourceManager, context.GpuAccessor);
node = InsertConstOffsets(node, context.GpuAccessor, context.Stage);
node = InsertConstOffsets(node, context.ResourceManager, context.GpuAccessor, context.Stage);
if (texOp.Type == SamplerType.TextureBuffer && !context.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat())
{
@ -45,13 +45,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
int coordsCount = texOp.Type.GetDimensions();
int coordsIndex = isBindless || isIndexed ? 1 : 0;
bool isImage = IsImageInstructionWithScale(texOp.Inst);
bool isIndexed = resourceManager.IsArrayOfTexturesOrImages(texOp.Binding, isImage);
if ((texOp.Inst == Instruction.TextureSample || isImage) &&
(intCoords || isImage) &&
@ -62,9 +58,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TexelFetchScale);
int samplerIndex = isImage
? resourceManager.GetTextureDescriptors().Length + resourceManager.FindImageDescriptorIndex(texOp.Binding)
? resourceManager.GetTextureDescriptors(includeArrays: false).Length + resourceManager.FindImageDescriptorIndex(texOp.Binding)
: resourceManager.FindTextureDescriptorIndex(texOp.Binding);
int coordsCount = texOp.Type.GetDimensions();
int coordsIndex = isBindless ? 1 : 0;
for (int index = 0; index < coordsCount; index++)
{
Operand scaledCoord = Local();
@ -97,7 +96,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
TextureOperation texOp = (TextureOperation)node.Value;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
bool isIndexed = resourceManager.IsArrayOfTexturesOrImages(texOp.Binding, isImage: false);
if (texOp.Inst == Instruction.TextureQuerySize &&
texOp.Index < 2 &&
@ -152,8 +151,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
TextureOperation texOp = (TextureOperation)node.Value;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isIndexed = resourceManager.IsArrayOfTexturesOrImages(texOp.Binding, isImage: false);
if (isBindless || !resourceManager.TryGetCbufSlotAndHandleForTexture(texOp.Binding, out int cbufSlot, out int handle))
if (isBindless || isIndexed || !resourceManager.TryGetCbufSlotAndHandleForTexture(texOp.Binding, out int cbufSlot, out int handle))
{
return node;
}
@ -167,10 +167,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
return node;
}
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
int coordsCount = texOp.Type.GetDimensions();
int coordsIndex = isBindless || isIndexed ? 1 : 0;
int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
@ -178,16 +175,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
Operand coordSize = Local();
Operand[] texSizeSources;
if (isBindless || isIndexed)
{
texSizeSources = new Operand[] { texOp.GetSource(0), Const(0) };
}
else
{
texSizeSources = new Operand[] { Const(0) };
}
Operand[] texSizeSources = new Operand[] { Const(0) };
LinkedListNode<INode> textureSizeNode = node.List.AddBefore(node, new TextureOperation(
Instruction.TextureQuerySize,
@ -201,13 +189,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
resourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type);
Operand source = texOp.GetSource(coordsIndex + index);
Operand source = texOp.GetSource(index);
Operand coordNormalized = Local();
node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, GenerateI2f(node, coordSize)));
texOp.SetSource(coordsIndex + index, coordNormalized);
texOp.SetSource(index, coordNormalized);
InsertTextureSizeUnscale(hfm, textureSizeNode, resourceManager, stage);
}
@ -234,7 +222,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
return node;
}
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
bool isIndexed = resourceManager.IsArrayOfTexturesOrImages(texOp.Binding, isImage: false);
int coordsCount = texOp.Type.GetDimensions();
int coordsIndex = isBindless || isIndexed ? 1 : 0;
@ -287,7 +275,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
return node;
}
private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, IGpuAccessor gpuAccessor, ShaderStage stage)
private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor, ShaderStage stage)
{
// Non-constant texture offsets are not allowed (according to the spec),
// however some GPUs does support that.
@ -321,7 +309,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
@ -342,6 +329,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
offsetsCount = 0;
}
bool isIndexed = resourceManager.IsArrayOfTexturesOrImages(texOp.Binding, isImage: false);
Operand[] offsets = new Operand[offsetsCount];
Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];

View file

@ -294,6 +294,7 @@ namespace Ryujinx.Graphics.Shader.Translation
Definitions,
resourceManager,
GpuAccessor,
Options.TargetApi,
Options.TargetLanguage,
Definitions.Stage,
ref usedFeatures);
@ -412,7 +413,7 @@ namespace Ryujinx.Graphics.Shader.Translation
if (Stage == ShaderStage.Vertex)
{
int ibBinding = resourceManager.Reservations.IndexBufferTextureBinding;
TextureDefinition indexBuffer = new(2, ibBinding, "ib_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None);
TextureDefinition indexBuffer = new(2, ibBinding, 1, "ib_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None);
resourceManager.Properties.AddOrUpdateTexture(indexBuffer);
int inputMap = _program.AttributeUsage.UsedInputAttributes;
@ -421,7 +422,7 @@ namespace Ryujinx.Graphics.Shader.Translation
{
int location = BitOperations.TrailingZeroCount(inputMap);
int binding = resourceManager.Reservations.GetVertexBufferTextureBinding(location);
TextureDefinition vaBuffer = new(2, binding, $"vb_data{location}", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None);
TextureDefinition vaBuffer = new(2, binding, 1, $"vb_data{location}", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None);
resourceManager.Properties.AddOrUpdateTexture(vaBuffer);
inputMap &= ~(1 << location);
@ -430,7 +431,7 @@ namespace Ryujinx.Graphics.Shader.Translation
else if (Stage == ShaderStage.Geometry)
{
int trbBinding = resourceManager.Reservations.TopologyRemapBufferTextureBinding;
TextureDefinition remapBuffer = new(2, trbBinding, "trb_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None);
TextureDefinition remapBuffer = new(2, trbBinding, 1, "trb_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None);
resourceManager.Properties.AddOrUpdateTexture(remapBuffer);
int geometryVbOutputSbBinding = resourceManager.Reservations.GeometryVertexOutputStorageBufferBinding;