Implement shader storage buffer operations using new Load/Store instructions (#4993)

* Implement storage buffer operations using new Load/Store instruction

* Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments

* Remove now unused code

* Catch more complex cases of global memory usage

* Shader cache version bump

* Extend global access elimination to work with more shared memory cases

* Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers

* Tweak preferencing to catch more cases

* Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel)

* Fix storage buffer bindings

* Simplify some code

* Shader cache version bump

* Fix typo

* Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
This commit is contained in:
gdkchan 2023-06-03 20:12:18 -03:00 committed by GitHub
parent 81c9052847
commit 21c9ac6240
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
42 changed files with 1468 additions and 1259 deletions

View file

@ -57,6 +57,56 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c);
}
public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicAdd, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value)
{
return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value);
}
public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicMinS32, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicMinU32, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicOr, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicSwap, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.AtomicXor, storageKind, Local(), Const(binding), e0, e1, value);
}
public static Operand Ballot(this EmitterContext context, Operand a)
{
return context.Add(Instruction.Ballot, Local(), a);
@ -554,6 +604,11 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(fpType | Instruction.IsNan, Local(), a);
}
public static Operand Load(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1)
{
return context.Add(Instruction.Load, storageKind, Local(), e0, e1);
}
public static Operand Load(this EmitterContext context, StorageKind storageKind, int binding)
{
return context.Add(Instruction.Load, storageKind, Local(), Const(binding));
@ -606,11 +661,6 @@ namespace Ryujinx.Graphics.Shader.Translation
: context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex);
}
public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.LoadGlobal, Local(), a, b);
}
public static Operand LoadLocal(this EmitterContext context, Operand a)
{
return context.Add(Instruction.LoadLocal, Local(), a);
@ -655,7 +705,6 @@ namespace Ryujinx.Graphics.Shader.Translation
public static void Return(this EmitterContext context)
{
context.PrepareForReturn();
context.Add(Instruction.Return);
}
@ -699,6 +748,16 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c);
}
public static Operand Store(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.Store, storageKind, null, e0, e1, value);
}
public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
{
return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value);
}
public static Operand Store(
this EmitterContext context,
StorageKind storageKind,
@ -738,21 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation
: context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value);
}
public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.StoreGlobal, null, a, b, c);
}
public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.StoreGlobal16, null, a, b, c);
}
public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.StoreGlobal8, null, a, b, c);
}
public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreLocal, null, a, b);

View file

@ -1,54 +0,0 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
namespace Ryujinx.Graphics.Shader.Translation
{
static class GlobalMemory
{
private const int StorageDescsBaseOffset = 0x44; // In words.
public const int StorageDescSize = 4; // In words.
public const int StorageMaxCount = 16;
public const int StorageDescsSize = StorageDescSize * StorageMaxCount;
public const int UbeBaseOffset = 0x98; // In words.
public const int UbeMaxCount = 9;
public const int UbeDescsSize = StorageDescSize * UbeMaxCount;
public const int UbeFirstCbuf = 8;
public const int DriverReservedCb = 0;
public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
{
return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||
inst == Instruction.LoadGlobal ||
inst == Instruction.StoreGlobal ||
inst == Instruction.StoreGlobal16 ||
inst == Instruction.StoreGlobal8;
}
public static int GetStorageCbOffset(ShaderStage stage, int slot)
{
return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
}
public static int GetStorageBaseCbOffset(ShaderStage stage)
{
return stage switch
{
ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize,
ShaderStage.Vertex => StorageDescsBaseOffset,
ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize,
ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize,
ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize,
ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize,
_ => 0
};
}
public static int GetConstantUbeOffset(int slot)
{
return UbeBaseOffset + slot * StorageDescSize;
}
}
}

View file

@ -19,6 +19,14 @@ namespace Ryujinx.Graphics.Shader.Translation
_stage = stage;
}
public int AddFunction(Function function)
{
int functionId = _functionList.Count;
_functionList.Add(function);
return functionId;
}
public int GetOrCreateFunctionId(HelperFunctionName functionName)
{
if (_functionIds.TryGetValue(functionName, out int functionId))
@ -27,8 +35,7 @@ namespace Ryujinx.Graphics.Shader.Translation
}
Function function = GenerateFunction(functionName);
functionId = _functionList.Count;
_functionList.Add(function);
functionId = AddFunction(function);
_functionIds.Add(functionName, functionId);
return functionId;

View file

@ -7,17 +7,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class Optimizer
{
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
{
RunOptimizationPasses(blocks, config);
int sbUseMask = 0;
int ubeUseMask = 0;
GlobalToStorage.RunPass(hfm, blocks, config);
// Those passes are looking for specific patterns and only needs to run once.
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
BindlessToIndexed.RunPass(blocks[blkIndex], config);
BindlessElimination.RunPass(blocks[blkIndex], config);
@ -28,8 +26,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
// Run optimizations one last time to remove any code that is now optimizable after above passes.
RunOptimizationPasses(blocks, config);
}

View file

@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
switch (operation.Inst)
{
case Instruction.Add:
case Instruction.BitwiseExclusiveOr:
TryEliminateBinaryOpCommutative(operation, 0);
break;
@ -21,6 +20,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
TryEliminateBitwiseAnd(operation);
break;
case Instruction.BitwiseExclusiveOr:
if (!TryEliminateXorSwap(operation))
{
TryEliminateBinaryOpCommutative(operation, 0);
}
break;
case Instruction.BitwiseOr:
TryEliminateBitwiseOr(operation);
break;
@ -49,8 +55,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
private static void TryEliminateBitwiseAnd(Operation operation)
{
// Try to recognize and optimize those 3 patterns (in order):
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
Operand x = operation.GetSource(0);
Operand y = operation.GetSource(1);
@ -68,11 +75,62 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
private static bool TryEliminateXorSwap(Operation xCopyOp)
{
// Try to recognize XOR swap pattern:
// x = x ^ y
// y = x ^ y
// x = x ^ y
// Or, in SSA:
// x2 = x ^ y
// y2 = x2 ^ y
// x3 = x2 ^ y2
// Transform it into something more sane:
// temp = y
// y = x
// x = temp
// Note that because XOR is commutative, there are actually
// multiple possible combinations of this pattern, for
// simplicity this only catches one of them.
Operand x = xCopyOp.GetSource(0);
Operand y = xCopyOp.GetSource(1);
if (x.AsgOp is not Operation tCopyOp || tCopyOp.Inst != Instruction.BitwiseExclusiveOr ||
y.AsgOp is not Operation yCopyOp || yCopyOp.Inst != Instruction.BitwiseExclusiveOr)
{
return false;
}
if (tCopyOp == yCopyOp)
{
return false;
}
if (yCopyOp.GetSource(0) != x ||
yCopyOp.GetSource(1) != tCopyOp.GetSource(1) ||
x.UseOps.Count != 2)
{
return false;
}
x = tCopyOp.GetSource(0);
y = tCopyOp.GetSource(1);
tCopyOp.TurnIntoCopy(y); // Temp = Y
yCopyOp.TurnIntoCopy(x); // Y = X
xCopyOp.TurnIntoCopy(tCopyOp.Dest); // X = Temp
return true;
}
private static void TryEliminateBitwiseOr(Operation operation)
{
// Try to recognize and optimize those 3 patterns (in order):
// x | 0x00000000 == x, 0x00000000 | y == y,
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
// x | 0x00000000 == x, 0x00000000 | y == y,
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
Operand x = operation.GetSource(0);
Operand y = operation.GetSource(1);

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
@ -93,5 +94,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return source;
}
public static void DeleteNode(LinkedListNode<INode> node, Operation operation)
{
node.List.Remove(node);
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
operation.SetSource(srcIndex, null);
}
operation.Dest = null;
}
}
}

View file

@ -14,6 +14,11 @@ namespace Ryujinx.Graphics.Shader.Translation
private readonly string _stagePrefix;
private readonly int[] _cbSlotToBindingMap;
private readonly int[] _sbSlotToBindingMap;
private uint _sbSlotWritten;
private readonly Dictionary<int, int> _sbSlots;
private readonly Dictionary<int, int> _sbSlotsReverse;
private readonly HashSet<int> _usedConstantBufferBindings;
@ -26,7 +31,12 @@ namespace Ryujinx.Graphics.Shader.Translation
_stagePrefix = GetShaderStagePrefix(stage);
_cbSlotToBindingMap = new int[18];
_sbSlotToBindingMap = new int[16];
_cbSlotToBindingMap.AsSpan().Fill(-1);
_sbSlotToBindingMap.AsSpan().Fill(-1);
_sbSlots = new Dictionary<int, int>();
_sbSlotsReverse = new Dictionary<int, int>();
_usedConstantBufferBindings = new HashSet<int>();
@ -47,6 +57,52 @@ namespace Ryujinx.Graphics.Shader.Translation
return binding;
}
public bool TryGetStorageBufferBinding(int sbCbSlot, int sbCbOffset, bool write, out int binding)
{
if (!TryGetSbSlot((byte)sbCbSlot, (ushort)sbCbOffset, out int slot))
{
binding = 0;
return false;
}
binding = _sbSlotToBindingMap[slot];
if (binding < 0)
{
binding = _gpuAccessor.QueryBindingStorageBuffer(slot);
_sbSlotToBindingMap[slot] = binding;
string slotNumber = slot.ToString(CultureInfo.InvariantCulture);
AddNewStorageBuffer(binding, $"{_stagePrefix}_s{slotNumber}");
}
if (write)
{
_sbSlotWritten |= 1u << slot;
}
return true;
}
private bool TryGetSbSlot(byte sbCbSlot, ushort sbCbOffset, out int slot)
{
int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
if (!_sbSlots.TryGetValue(key, out slot))
{
slot = _sbSlots.Count;
if (slot >= _sbSlotToBindingMap.Length)
{
return false;
}
_sbSlots.Add(key, slot);
_sbSlotsReverse.Add(slot, key);
}
return true;
}
public bool TryGetConstantBufferSlot(int binding, out int slot)
{
for (slot = 0; slot < _cbSlotToBindingMap.Length; slot++)
@ -90,6 +146,34 @@ namespace Ryujinx.Graphics.Shader.Translation
return descriptors;
}
public BufferDescriptor[] GetStorageBufferDescriptors()
{
var descriptors = new BufferDescriptor[_sbSlots.Count];
int descriptorIndex = 0;
foreach ((int key, int slot) in _sbSlots)
{
int binding = _sbSlotToBindingMap[slot];
if (binding >= 0)
{
(int sbCbSlot, int sbCbOffset) = UnpackSbCbInfo(key);
descriptors[descriptorIndex++] = new BufferDescriptor(binding, slot, sbCbSlot, sbCbOffset)
{
Flags = (_sbSlotWritten & (1u << slot)) != 0 ? BufferUsageFlags.Write : BufferUsageFlags.None
};
}
}
if (descriptors.Length != descriptorIndex)
{
Array.Resize(ref descriptors, descriptorIndex);
}
return descriptors;
}
private void AddNewConstantBuffer(int binding, string name)
{
StructureType type = new StructureType(new[]
@ -100,6 +184,16 @@ namespace Ryujinx.Graphics.Shader.Translation
_properties.AddConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type));
}
private void AddNewStorageBuffer(int binding, string name)
{
StructureType type = new StructureType(new[]
{
new StructureField(AggregateType.Array | AggregateType.U32, "data", 0)
});
_properties.AddStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type));
}
public static string GetShaderStagePrefix(ShaderStage stage)
{
uint index = (uint)stage;
@ -111,5 +205,15 @@ namespace Ryujinx.Graphics.Shader.Translation
return _stagePrefixes[index];
}
private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
{
return sbCbOffset | ((int)sbCbSlot << 16);
}
private static (int, int) UnpackSbCbInfo(int key)
{
return ((byte)(key >> 16), (ushort)key);
}
}
}

View file

@ -2,10 +2,8 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
namespace Ryujinx.Graphics.Shader.Translation
{
@ -23,11 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation
{
BasicBlock block = blocks[blkIndex];
for (LinkedListNode<INode> node = block.Operations.First; node != null;)
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (node.Value is not Operation operation)
{
node = node.Next;
continue;
}
@ -56,8 +53,6 @@ namespace Ryujinx.Graphics.Shader.Translation
InsertVectorComponentSelect(node, config);
}
LinkedListNode<INode> nextNode = node.Next;
if (operation is TextureOperation texOp)
{
node = InsertTexelFetchScale(hfm, node, config);
@ -74,15 +69,7 @@ namespace Ryujinx.Graphics.Shader.Translation
node = InsertSnormNormalization(node, config);
}
}
nextNode = node.Next;
}
else if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
{
nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode;
}
node = nextNode;
}
}
}
@ -184,196 +171,6 @@ namespace Ryujinx.Graphics.Shader.Translation
operation.TurnIntoCopy(result);
}
private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
{
Operation operation = (Operation)node.Value;
bool isAtomic = operation.Inst.IsAtomic();
bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
Operation storageOp = null;
Operand PrependOperation(Instruction inst, params Operand[] sources)
{
Operand local = Local();
node.List.AddBefore(node, new Operation(inst, local, sources));
return local;
}
Operand PrependStorageOperation(Instruction inst, StorageKind storageKind, params Operand[] sources)
{
Operand local = Local();
node.List.AddBefore(node, new Operation(inst, storageKind, local, sources));
return local;
}
Operand PrependExistingOperation(Operation operation)
{
Operand local = Local();
operation.Dest = local;
node.List.AddBefore(node, operation);
return local;
}
Operand addrLow = operation.GetSource(0);
Operand addrHigh = operation.GetSource(1);
Operand sbBaseAddrLow = Const(0);
Operand sbSlot = Const(0);
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
{
baseAddrLow = Cbuf(DriverReservedCb, cbOffset);
Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1);
Operand size = Cbuf(DriverReservedCb, cbOffset + 2);
Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
}
int sbUseMask = config.AccessibleStorageBuffersMask;
while (sbUseMask != 0)
{
int slot = BitOperations.TrailingZeroCount(sbUseMask);
sbUseMask &= ~(1 << slot);
int cbOffset = GetStorageCbOffset(config.Stage, slot);
slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset);
config.SetUsedStorageBuffer(slot, isWrite);
Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
}
if (config.AccessibleStorageBuffersMask != 0)
{
Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
Operand[] sources = new Operand[operation.SourcesCount];
sources[0] = sbSlot;
if (isStg16Or8)
{
sources[1] = byteOffset;
}
else
{
sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
}
for (int index = 2; index < operation.SourcesCount; index++)
{
sources[index] = operation.GetSource(index);
}
if (isAtomic)
{
storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
}
else if (operation.Inst == Instruction.LoadGlobal)
{
storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
}
else
{
Instruction storeInst = operation.Inst switch
{
Instruction.StoreGlobal16 => Instruction.StoreStorage16,
Instruction.StoreGlobal8 => Instruction.StoreStorage8,
_ => Instruction.StoreStorage
};
storageOp = new Operation(storeInst, null, sources);
}
}
else if (operation.Dest != null)
{
storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0));
}
if (operation.Inst == Instruction.LoadGlobal)
{
int cbeUseMask = config.AccessibleConstantBuffersMask;
while (cbeUseMask != 0)
{
int slot = BitOperations.TrailingZeroCount(cbeUseMask);
int cbSlot = UbeFirstCbuf + slot;
cbeUseMask &= ~(1 << slot);
Operand previousResult = PrependExistingOperation(storageOp);
int cbOffset = GetConstantUbeOffset(slot);
Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask);
Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst);
Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2));
Operand vecIndex = PrependOperation(Instruction.ShiftRightU32, cbIndex, Const(2));
Operand elemIndex = PrependOperation(Instruction.BitwiseAnd, cbIndex, Const(3));
Operand[] sourcesCb = new Operand[4];
sourcesCb[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot));
sourcesCb[1] = Const(0);
sourcesCb[2] = vecIndex;
sourcesCb[3] = elemIndex;
Operand ldcResult = PrependStorageOperation(Instruction.Load, StorageKind.ConstantBuffer, sourcesCb);
storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult);
}
}
for (int index = 0; index < operation.SourcesCount; index++)
{
operation.SetSource(index, null);
}
LinkedListNode<INode> oldNode = node;
LinkedList<INode> oldNodeList = oldNode.List;
if (storageOp != null)
{
node = node.List.AddBefore(node, storageOp);
}
else
{
node = null;
}
oldNodeList.Remove(oldNode);
return node;
}
private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
{
TextureOperation texOp = (TextureOperation)node.Value;

View file

@ -110,12 +110,6 @@ namespace Ryujinx.Graphics.Shader.Translation
public UInt128 NextInputAttributesComponents { get; private set; }
public UInt128 ThisInputAttributesComponents { get; private set; }
public int AccessibleStorageBuffersMask { get; private set; }
public int AccessibleConstantBuffersMask { get; private set; }
private int _usedStorageBuffers;
private int _usedStorageBuffersWrite;
private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
private struct TextureMeta
@ -127,18 +121,9 @@ namespace Ryujinx.Graphics.Shader.Translation
private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
private readonly Dictionary<int, int> _sbSlots;
private readonly Dictionary<int, int> _sbSlotsReverse;
private BufferDescriptor[] _cachedStorageBufferDescriptors;
private TextureDescriptor[] _cachedTextureDescriptors;
private TextureDescriptor[] _cachedImageDescriptors;
private int _firstStorageBufferBinding;
public int FirstStorageBufferBinding => _firstStorageBufferBinding;
public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options)
{
Stage = stage;
@ -147,18 +132,12 @@ namespace Ryujinx.Graphics.Shader.Translation
_transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1;
AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1;
UsedInputAttributesPerPatch = new HashSet<int>();
UsedOutputAttributesPerPatch = new HashSet<int>();
_usedTextures = new Dictionary<TextureInfo, TextureMeta>();
_usedImages = new Dictionary<TextureInfo, TextureMeta>();
_sbSlots = new Dictionary<int, int>();
_sbSlotsReverse = new Dictionary<int, int>();
ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties());
}
@ -173,11 +152,6 @@ namespace Ryujinx.Graphics.Shader.Translation
OutputTopology = outputTopology;
MaxOutputVertices = maxOutputVertices;
TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
if (Stage != ShaderStage.Compute)
{
AccessibleConstantBuffersMask = 0;
}
}
public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options)
@ -433,8 +407,6 @@ namespace Ryujinx.Graphics.Shader.Translation
UsedInputAttributes |= other.UsedInputAttributes;
UsedOutputAttributes |= other.UsedOutputAttributes;
_usedStorageBuffers |= other._usedStorageBuffers;
_usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
foreach (var kv in other._usedTextures)
{
@ -634,23 +606,6 @@ namespace Ryujinx.Graphics.Shader.Translation
UsedFeatures |= flags;
}
public void SetAccessibleBufferMasks(int sbMask, int ubeMask)
{
AccessibleStorageBuffersMask = sbMask;
AccessibleConstantBuffersMask = ubeMask;
}
public void SetUsedStorageBuffer(int slot, bool write)
{
int mask = 1 << slot;
_usedStorageBuffers |= mask;
if (write)
{
_usedStorageBuffersWrite |= mask;
}
}
public void SetUsedTexture(
Instruction inst,
SamplerType type,
@ -756,76 +711,6 @@ namespace Ryujinx.Graphics.Shader.Translation
return meta;
}
public BufferDescriptor[] GetStorageBufferDescriptors()
{
if (_cachedStorageBufferDescriptors != null)
{
return _cachedStorageBufferDescriptors;
}
return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors(
_usedStorageBuffers,
_usedStorageBuffersWrite,
true,
out _firstStorageBufferBinding,
GpuAccessor.QueryBindingStorageBuffer);
}
private BufferDescriptor[] GetStorageBufferDescriptors(
int usedMask,
int writtenMask,
bool isArray,
out int firstBinding,
Func<int, int> getBindingCallback)
{
firstBinding = 0;
bool hasFirstBinding = false;
var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
int lastSlot = -1;
for (int i = 0; i < descriptors.Length; i++)
{
int slot = BitOperations.TrailingZeroCount(usedMask);
if (isArray)
{
// The next array entries also consumes bindings, even if they are unused.
for (int j = lastSlot + 1; j < slot; j++)
{
int binding = getBindingCallback(j);
if (!hasFirstBinding)
{
firstBinding = binding;
hasFirstBinding = true;
}
}
}
lastSlot = slot;
(int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot);
descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset);
if (!hasFirstBinding)
{
firstBinding = descriptors[i].Binding;
hasFirstBinding = true;
}
if ((writtenMask & (1 << slot)) != 0)
{
descriptors[i].SetFlag(BufferUsageFlags.Write);
}
usedMask &= ~(1 << slot);
}
return descriptors;
}
public TextureDescriptor[] GetTextureDescriptors()
{
return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture);
@ -922,45 +807,11 @@ namespace Ryujinx.Graphics.Shader.Translation
return FindDescriptorIndex(GetImageDescriptors(), texOp);
}
public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset)
{
int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
if (!_sbSlots.TryGetValue(key, out int slot))
{
slot = _sbSlots.Count;
_sbSlots.Add(key, slot);
_sbSlotsReverse.Add(slot, key);
}
return slot;
}
public (int, int) GetSbCbInfo(int slot)
{
if (_sbSlotsReverse.TryGetValue(slot, out int key))
{
return UnpackSbCbInfo(key);
}
throw new ArgumentException($"Invalid slot {slot}.", nameof(slot));
}
private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
{
return sbCbOffset | ((int)sbCbSlot << 16);
}
private static (int, int) UnpackSbCbInfo(int key)
{
return ((byte)(key >> 16), (ushort)key);
}
public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
{
return new ShaderProgramInfo(
ResourceManager.GetConstantBufferDescriptors(),
GetStorageBufferDescriptors(),
ResourceManager.GetStorageBufferDescriptors(),
GetTextureDescriptors(),
GetImageDescriptors(),
identification,

View file

@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.Translation
continue;
}
if (IsResourceWrite(operation.Inst))
if (IsResourceWrite(operation.Inst, operation.StorageKind))
{
return false;
}
@ -154,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Translation
return totalVerticesCount + verticesCount == 3 && writesLayer;
}
private static bool IsResourceWrite(Instruction inst)
private static bool IsResourceWrite(Instruction inst, StorageKind storageKind)
{
switch (inst)
{
@ -170,13 +170,11 @@ namespace Ryujinx.Graphics.Shader.Translation
case Instruction.AtomicXor:
case Instruction.ImageAtomic:
case Instruction.ImageStore:
case Instruction.StoreGlobal:
case Instruction.StoreGlobal16:
case Instruction.StoreGlobal8:
case Instruction.StoreStorage:
case Instruction.StoreStorage16:
case Instruction.StoreStorage8:
return true;
case Instruction.Store:
return storageKind == StorageKind.StorageBuffer ||
storageKind == StorageKind.SharedMemory ||
storageKind == StorageKind.LocalMemory;
}
return false;

View file

@ -78,7 +78,7 @@ namespace Ryujinx.Graphics.Shader.Translation
Ssa.Rename(cfg.Blocks);
Optimizer.RunPass(cfg.Blocks, config);
Optimizer.RunPass(hfm, cfg.Blocks, config);
Rewriter.RunPass(hfm, cfg.Blocks, config);
}