Implement shader storage buffer operations using new Load/Store instructions (#4993)
* Implement storage buffer operations using new Load/Store instruction * Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments * Remove now unused code * Catch more complex cases of global memory usage * Shader cache version bump * Extend global access elimination to work with more shared memory cases * Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers * Tweak preferencing to catch more cases * Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel) * Fix storage buffer bindings * Simplify some code * Shader cache version bump * Fix typo * Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
This commit is contained in:
parent
81c9052847
commit
21c9ac6240
42 changed files with 1468 additions and 1259 deletions
|
@ -57,6 +57,56 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicAdd, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMinS32, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicMinU32, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicOr, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicSwap, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.AtomicXor, storageKind, Local(), Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand Ballot(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.Ballot, Local(), a);
|
||||
|
@ -554,6 +604,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return context.Add(fpType | Instruction.IsNan, Local(), a);
|
||||
}
|
||||
|
||||
public static Operand Load(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1)
|
||||
{
|
||||
return context.Add(Instruction.Load, storageKind, Local(), e0, e1);
|
||||
}
|
||||
|
||||
public static Operand Load(this EmitterContext context, StorageKind storageKind, int binding)
|
||||
{
|
||||
return context.Add(Instruction.Load, storageKind, Local(), Const(binding));
|
||||
|
@ -606,11 +661,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
: context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex);
|
||||
}
|
||||
|
||||
public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.LoadGlobal, Local(), a, b);
|
||||
}
|
||||
|
||||
public static Operand LoadLocal(this EmitterContext context, Operand a)
|
||||
{
|
||||
return context.Add(Instruction.LoadLocal, Local(), a);
|
||||
|
@ -655,7 +705,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
public static void Return(this EmitterContext context)
|
||||
{
|
||||
context.PrepareForReturn();
|
||||
context.Add(Instruction.Return);
|
||||
}
|
||||
|
||||
|
@ -699,6 +748,16 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c);
|
||||
}
|
||||
|
||||
public static Operand Store(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.Store, storageKind, null, e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value)
|
||||
{
|
||||
return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value);
|
||||
}
|
||||
|
||||
public static Operand Store(
|
||||
this EmitterContext context,
|
||||
StorageKind storageKind,
|
||||
|
@ -738,21 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
: context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value);
|
||||
}
|
||||
|
||||
public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.StoreGlobal, null, a, b, c);
|
||||
}
|
||||
|
||||
public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.StoreGlobal16, null, a, b, c);
|
||||
}
|
||||
|
||||
public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
|
||||
{
|
||||
return context.Add(Instruction.StoreGlobal8, null, a, b, c);
|
||||
}
|
||||
|
||||
public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
|
||||
{
|
||||
return context.Add(Instruction.StoreLocal, null, a, b);
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
static class GlobalMemory
|
||||
{
|
||||
private const int StorageDescsBaseOffset = 0x44; // In words.
|
||||
|
||||
public const int StorageDescSize = 4; // In words.
|
||||
public const int StorageMaxCount = 16;
|
||||
|
||||
public const int StorageDescsSize = StorageDescSize * StorageMaxCount;
|
||||
|
||||
public const int UbeBaseOffset = 0x98; // In words.
|
||||
public const int UbeMaxCount = 9;
|
||||
public const int UbeDescsSize = StorageDescSize * UbeMaxCount;
|
||||
public const int UbeFirstCbuf = 8;
|
||||
|
||||
public const int DriverReservedCb = 0;
|
||||
|
||||
public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
|
||||
{
|
||||
return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||
|
||||
inst == Instruction.LoadGlobal ||
|
||||
inst == Instruction.StoreGlobal ||
|
||||
inst == Instruction.StoreGlobal16 ||
|
||||
inst == Instruction.StoreGlobal8;
|
||||
}
|
||||
|
||||
public static int GetStorageCbOffset(ShaderStage stage, int slot)
|
||||
{
|
||||
return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
|
||||
}
|
||||
|
||||
public static int GetStorageBaseCbOffset(ShaderStage stage)
|
||||
{
|
||||
return stage switch
|
||||
{
|
||||
ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize,
|
||||
ShaderStage.Vertex => StorageDescsBaseOffset,
|
||||
ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize,
|
||||
ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize,
|
||||
ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize,
|
||||
ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize,
|
||||
_ => 0
|
||||
};
|
||||
}
|
||||
|
||||
public static int GetConstantUbeOffset(int slot)
|
||||
{
|
||||
return UbeBaseOffset + slot * StorageDescSize;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,6 +19,14 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
_stage = stage;
|
||||
}
|
||||
|
||||
public int AddFunction(Function function)
|
||||
{
|
||||
int functionId = _functionList.Count;
|
||||
_functionList.Add(function);
|
||||
|
||||
return functionId;
|
||||
}
|
||||
|
||||
public int GetOrCreateFunctionId(HelperFunctionName functionName)
|
||||
{
|
||||
if (_functionIds.TryGetValue(functionName, out int functionId))
|
||||
|
@ -27,8 +35,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
}
|
||||
|
||||
Function function = GenerateFunction(functionName);
|
||||
functionId = _functionList.Count;
|
||||
_functionList.Add(function);
|
||||
functionId = AddFunction(function);
|
||||
_functionIds.Add(functionName, functionId);
|
||||
|
||||
return functionId;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -7,17 +7,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
static class Optimizer
|
||||
{
|
||||
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
|
||||
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
|
||||
{
|
||||
RunOptimizationPasses(blocks, config);
|
||||
|
||||
int sbUseMask = 0;
|
||||
int ubeUseMask = 0;
|
||||
GlobalToStorage.RunPass(hfm, blocks, config);
|
||||
|
||||
// Those passes are looking for specific patterns and only needs to run once.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
|
||||
BindlessToIndexed.RunPass(blocks[blkIndex], config);
|
||||
BindlessElimination.RunPass(blocks[blkIndex], config);
|
||||
|
||||
|
@ -28,8 +26,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
|
||||
config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
|
||||
|
||||
// Run optimizations one last time to remove any code that is now optimizable after above passes.
|
||||
RunOptimizationPasses(blocks, config);
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
switch (operation.Inst)
|
||||
{
|
||||
case Instruction.Add:
|
||||
case Instruction.BitwiseExclusiveOr:
|
||||
TryEliminateBinaryOpCommutative(operation, 0);
|
||||
break;
|
||||
|
||||
|
@ -21,6 +20,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
TryEliminateBitwiseAnd(operation);
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseExclusiveOr:
|
||||
if (!TryEliminateXorSwap(operation))
|
||||
{
|
||||
TryEliminateBinaryOpCommutative(operation, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseOr:
|
||||
TryEliminateBitwiseOr(operation);
|
||||
break;
|
||||
|
@ -49,8 +55,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
private static void TryEliminateBitwiseAnd(Operation operation)
|
||||
{
|
||||
// Try to recognize and optimize those 3 patterns (in order):
|
||||
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
|
||||
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
|
||||
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
|
||||
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
|
||||
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
|
@ -68,11 +75,62 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
|
||||
private static bool TryEliminateXorSwap(Operation xCopyOp)
|
||||
{
|
||||
// Try to recognize XOR swap pattern:
|
||||
// x = x ^ y
|
||||
// y = x ^ y
|
||||
// x = x ^ y
|
||||
// Or, in SSA:
|
||||
// x2 = x ^ y
|
||||
// y2 = x2 ^ y
|
||||
// x3 = x2 ^ y2
|
||||
// Transform it into something more sane:
|
||||
// temp = y
|
||||
// y = x
|
||||
// x = temp
|
||||
|
||||
// Note that because XOR is commutative, there are actually
|
||||
// multiple possible combinations of this pattern, for
|
||||
// simplicity this only catches one of them.
|
||||
|
||||
Operand x = xCopyOp.GetSource(0);
|
||||
Operand y = xCopyOp.GetSource(1);
|
||||
|
||||
if (x.AsgOp is not Operation tCopyOp || tCopyOp.Inst != Instruction.BitwiseExclusiveOr ||
|
||||
y.AsgOp is not Operation yCopyOp || yCopyOp.Inst != Instruction.BitwiseExclusiveOr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tCopyOp == yCopyOp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (yCopyOp.GetSource(0) != x ||
|
||||
yCopyOp.GetSource(1) != tCopyOp.GetSource(1) ||
|
||||
x.UseOps.Count != 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
x = tCopyOp.GetSource(0);
|
||||
y = tCopyOp.GetSource(1);
|
||||
|
||||
tCopyOp.TurnIntoCopy(y); // Temp = Y
|
||||
yCopyOp.TurnIntoCopy(x); // Y = X
|
||||
xCopyOp.TurnIntoCopy(tCopyOp.Dest); // X = Temp
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void TryEliminateBitwiseOr(Operation operation)
|
||||
{
|
||||
// Try to recognize and optimize those 3 patterns (in order):
|
||||
// x | 0x00000000 == x, 0x00000000 | y == y,
|
||||
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
|
||||
// x | 0x00000000 == x, 0x00000000 | y == y,
|
||||
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
|
||||
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
|
@ -93,5 +94,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
return source;
|
||||
}
|
||||
|
||||
public static void DeleteNode(LinkedListNode<INode> node, Operation operation)
|
||||
{
|
||||
node.List.Remove(node);
|
||||
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
operation.SetSource(srcIndex, null);
|
||||
}
|
||||
|
||||
operation.Dest = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
private readonly string _stagePrefix;
|
||||
|
||||
private readonly int[] _cbSlotToBindingMap;
|
||||
private readonly int[] _sbSlotToBindingMap;
|
||||
private uint _sbSlotWritten;
|
||||
|
||||
private readonly Dictionary<int, int> _sbSlots;
|
||||
private readonly Dictionary<int, int> _sbSlotsReverse;
|
||||
|
||||
private readonly HashSet<int> _usedConstantBufferBindings;
|
||||
|
||||
|
@ -26,7 +31,12 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
_stagePrefix = GetShaderStagePrefix(stage);
|
||||
|
||||
_cbSlotToBindingMap = new int[18];
|
||||
_sbSlotToBindingMap = new int[16];
|
||||
_cbSlotToBindingMap.AsSpan().Fill(-1);
|
||||
_sbSlotToBindingMap.AsSpan().Fill(-1);
|
||||
|
||||
_sbSlots = new Dictionary<int, int>();
|
||||
_sbSlotsReverse = new Dictionary<int, int>();
|
||||
|
||||
_usedConstantBufferBindings = new HashSet<int>();
|
||||
|
||||
|
@ -47,6 +57,52 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return binding;
|
||||
}
|
||||
|
||||
public bool TryGetStorageBufferBinding(int sbCbSlot, int sbCbOffset, bool write, out int binding)
|
||||
{
|
||||
if (!TryGetSbSlot((byte)sbCbSlot, (ushort)sbCbOffset, out int slot))
|
||||
{
|
||||
binding = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
binding = _sbSlotToBindingMap[slot];
|
||||
|
||||
if (binding < 0)
|
||||
{
|
||||
binding = _gpuAccessor.QueryBindingStorageBuffer(slot);
|
||||
_sbSlotToBindingMap[slot] = binding;
|
||||
string slotNumber = slot.ToString(CultureInfo.InvariantCulture);
|
||||
AddNewStorageBuffer(binding, $"{_stagePrefix}_s{slotNumber}");
|
||||
}
|
||||
|
||||
if (write)
|
||||
{
|
||||
_sbSlotWritten |= 1u << slot;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private bool TryGetSbSlot(byte sbCbSlot, ushort sbCbOffset, out int slot)
|
||||
{
|
||||
int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
|
||||
|
||||
if (!_sbSlots.TryGetValue(key, out slot))
|
||||
{
|
||||
slot = _sbSlots.Count;
|
||||
|
||||
if (slot >= _sbSlotToBindingMap.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
_sbSlots.Add(key, slot);
|
||||
_sbSlotsReverse.Add(slot, key);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public bool TryGetConstantBufferSlot(int binding, out int slot)
|
||||
{
|
||||
for (slot = 0; slot < _cbSlotToBindingMap.Length; slot++)
|
||||
|
@ -90,6 +146,34 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return descriptors;
|
||||
}
|
||||
|
||||
public BufferDescriptor[] GetStorageBufferDescriptors()
|
||||
{
|
||||
var descriptors = new BufferDescriptor[_sbSlots.Count];
|
||||
|
||||
int descriptorIndex = 0;
|
||||
|
||||
foreach ((int key, int slot) in _sbSlots)
|
||||
{
|
||||
int binding = _sbSlotToBindingMap[slot];
|
||||
|
||||
if (binding >= 0)
|
||||
{
|
||||
(int sbCbSlot, int sbCbOffset) = UnpackSbCbInfo(key);
|
||||
descriptors[descriptorIndex++] = new BufferDescriptor(binding, slot, sbCbSlot, sbCbOffset)
|
||||
{
|
||||
Flags = (_sbSlotWritten & (1u << slot)) != 0 ? BufferUsageFlags.Write : BufferUsageFlags.None
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (descriptors.Length != descriptorIndex)
|
||||
{
|
||||
Array.Resize(ref descriptors, descriptorIndex);
|
||||
}
|
||||
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
private void AddNewConstantBuffer(int binding, string name)
|
||||
{
|
||||
StructureType type = new StructureType(new[]
|
||||
|
@ -100,6 +184,16 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
_properties.AddConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type));
|
||||
}
|
||||
|
||||
private void AddNewStorageBuffer(int binding, string name)
|
||||
{
|
||||
StructureType type = new StructureType(new[]
|
||||
{
|
||||
new StructureField(AggregateType.Array | AggregateType.U32, "data", 0)
|
||||
});
|
||||
|
||||
_properties.AddStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type));
|
||||
}
|
||||
|
||||
public static string GetShaderStagePrefix(ShaderStage stage)
|
||||
{
|
||||
uint index = (uint)stage;
|
||||
|
@ -111,5 +205,15 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
return _stagePrefixes[index];
|
||||
}
|
||||
|
||||
private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
|
||||
{
|
||||
return sbCbOffset | ((int)sbCbSlot << 16);
|
||||
}
|
||||
|
||||
private static (int, int) UnpackSbCbInfo(int key)
|
||||
{
|
||||
return ((byte)(key >> 16), (ushort)key);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,10 +2,8 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
|||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
|
@ -23,11 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null;)
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
if (node.Value is not Operation operation)
|
||||
{
|
||||
node = node.Next;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -56,8 +53,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
InsertVectorComponentSelect(node, config);
|
||||
}
|
||||
|
||||
LinkedListNode<INode> nextNode = node.Next;
|
||||
|
||||
if (operation is TextureOperation texOp)
|
||||
{
|
||||
node = InsertTexelFetchScale(hfm, node, config);
|
||||
|
@ -74,15 +69,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
node = InsertSnormNormalization(node, config);
|
||||
}
|
||||
}
|
||||
|
||||
nextNode = node.Next;
|
||||
}
|
||||
else if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
|
||||
{
|
||||
nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode;
|
||||
}
|
||||
|
||||
node = nextNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -184,196 +171,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
operation.TurnIntoCopy(result);
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
bool isAtomic = operation.Inst.IsAtomic();
|
||||
bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
|
||||
bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
|
||||
|
||||
Operation storageOp = null;
|
||||
|
||||
Operand PrependOperation(Instruction inst, params Operand[] sources)
|
||||
{
|
||||
Operand local = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(inst, local, sources));
|
||||
|
||||
return local;
|
||||
}
|
||||
|
||||
Operand PrependStorageOperation(Instruction inst, StorageKind storageKind, params Operand[] sources)
|
||||
{
|
||||
Operand local = Local();
|
||||
|
||||
node.List.AddBefore(node, new Operation(inst, storageKind, local, sources));
|
||||
|
||||
return local;
|
||||
}
|
||||
|
||||
Operand PrependExistingOperation(Operation operation)
|
||||
{
|
||||
Operand local = Local();
|
||||
|
||||
operation.Dest = local;
|
||||
node.List.AddBefore(node, operation);
|
||||
|
||||
return local;
|
||||
}
|
||||
|
||||
Operand addrLow = operation.GetSource(0);
|
||||
Operand addrHigh = operation.GetSource(1);
|
||||
|
||||
Operand sbBaseAddrLow = Const(0);
|
||||
Operand sbSlot = Const(0);
|
||||
|
||||
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
|
||||
|
||||
Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
|
||||
{
|
||||
baseAddrLow = Cbuf(DriverReservedCb, cbOffset);
|
||||
Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1);
|
||||
Operand size = Cbuf(DriverReservedCb, cbOffset + 2);
|
||||
|
||||
Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
|
||||
Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
|
||||
|
||||
Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
|
||||
|
||||
Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
|
||||
|
||||
Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
|
||||
|
||||
return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
|
||||
}
|
||||
|
||||
int sbUseMask = config.AccessibleStorageBuffersMask;
|
||||
|
||||
while (sbUseMask != 0)
|
||||
{
|
||||
int slot = BitOperations.TrailingZeroCount(sbUseMask);
|
||||
|
||||
sbUseMask &= ~(1 << slot);
|
||||
|
||||
int cbOffset = GetStorageCbOffset(config.Stage, slot);
|
||||
slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset);
|
||||
|
||||
config.SetUsedStorageBuffer(slot, isWrite);
|
||||
|
||||
Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
|
||||
|
||||
sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
|
||||
sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
|
||||
}
|
||||
|
||||
if (config.AccessibleStorageBuffersMask != 0)
|
||||
{
|
||||
Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
|
||||
Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
|
||||
|
||||
Operand[] sources = new Operand[operation.SourcesCount];
|
||||
|
||||
sources[0] = sbSlot;
|
||||
|
||||
if (isStg16Or8)
|
||||
{
|
||||
sources[1] = byteOffset;
|
||||
}
|
||||
else
|
||||
{
|
||||
sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
|
||||
}
|
||||
|
||||
for (int index = 2; index < operation.SourcesCount; index++)
|
||||
{
|
||||
sources[index] = operation.GetSource(index);
|
||||
}
|
||||
|
||||
if (isAtomic)
|
||||
{
|
||||
storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
|
||||
}
|
||||
else if (operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
|
||||
}
|
||||
else
|
||||
{
|
||||
Instruction storeInst = operation.Inst switch
|
||||
{
|
||||
Instruction.StoreGlobal16 => Instruction.StoreStorage16,
|
||||
Instruction.StoreGlobal8 => Instruction.StoreStorage8,
|
||||
_ => Instruction.StoreStorage
|
||||
};
|
||||
|
||||
storageOp = new Operation(storeInst, null, sources);
|
||||
}
|
||||
}
|
||||
else if (operation.Dest != null)
|
||||
{
|
||||
storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0));
|
||||
}
|
||||
|
||||
if (operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
int cbeUseMask = config.AccessibleConstantBuffersMask;
|
||||
|
||||
while (cbeUseMask != 0)
|
||||
{
|
||||
int slot = BitOperations.TrailingZeroCount(cbeUseMask);
|
||||
int cbSlot = UbeFirstCbuf + slot;
|
||||
|
||||
cbeUseMask &= ~(1 << slot);
|
||||
|
||||
Operand previousResult = PrependExistingOperation(storageOp);
|
||||
|
||||
int cbOffset = GetConstantUbeOffset(slot);
|
||||
|
||||
Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
|
||||
|
||||
Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask);
|
||||
Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst);
|
||||
|
||||
Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2));
|
||||
Operand vecIndex = PrependOperation(Instruction.ShiftRightU32, cbIndex, Const(2));
|
||||
Operand elemIndex = PrependOperation(Instruction.BitwiseAnd, cbIndex, Const(3));
|
||||
|
||||
Operand[] sourcesCb = new Operand[4];
|
||||
|
||||
sourcesCb[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot));
|
||||
sourcesCb[1] = Const(0);
|
||||
sourcesCb[2] = vecIndex;
|
||||
sourcesCb[3] = elemIndex;
|
||||
|
||||
Operand ldcResult = PrependStorageOperation(Instruction.Load, StorageKind.ConstantBuffer, sourcesCb);
|
||||
|
||||
storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult);
|
||||
}
|
||||
}
|
||||
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
operation.SetSource(index, null);
|
||||
}
|
||||
|
||||
LinkedListNode<INode> oldNode = node;
|
||||
LinkedList<INode> oldNodeList = oldNode.List;
|
||||
|
||||
if (storageOp != null)
|
||||
{
|
||||
node = node.List.AddBefore(node, storageOp);
|
||||
}
|
||||
else
|
||||
{
|
||||
node = null;
|
||||
}
|
||||
|
||||
oldNodeList.Remove(oldNode);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
|
||||
{
|
||||
TextureOperation texOp = (TextureOperation)node.Value;
|
||||
|
|
|
@ -110,12 +110,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
public UInt128 NextInputAttributesComponents { get; private set; }
|
||||
public UInt128 ThisInputAttributesComponents { get; private set; }
|
||||
|
||||
public int AccessibleStorageBuffersMask { get; private set; }
|
||||
public int AccessibleConstantBuffersMask { get; private set; }
|
||||
|
||||
private int _usedStorageBuffers;
|
||||
private int _usedStorageBuffersWrite;
|
||||
|
||||
private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
|
||||
|
||||
private struct TextureMeta
|
||||
|
@ -127,18 +121,9 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
|
||||
private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
|
||||
|
||||
private readonly Dictionary<int, int> _sbSlots;
|
||||
private readonly Dictionary<int, int> _sbSlotsReverse;
|
||||
|
||||
private BufferDescriptor[] _cachedStorageBufferDescriptors;
|
||||
private TextureDescriptor[] _cachedTextureDescriptors;
|
||||
private TextureDescriptor[] _cachedImageDescriptors;
|
||||
|
||||
private int _firstStorageBufferBinding;
|
||||
|
||||
public int FirstStorageBufferBinding => _firstStorageBufferBinding;
|
||||
|
||||
public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options)
|
||||
{
|
||||
Stage = stage;
|
||||
|
@ -147,18 +132,12 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
_transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
|
||||
|
||||
AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1;
|
||||
AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1;
|
||||
|
||||
UsedInputAttributesPerPatch = new HashSet<int>();
|
||||
UsedOutputAttributesPerPatch = new HashSet<int>();
|
||||
|
||||
_usedTextures = new Dictionary<TextureInfo, TextureMeta>();
|
||||
_usedImages = new Dictionary<TextureInfo, TextureMeta>();
|
||||
|
||||
_sbSlots = new Dictionary<int, int>();
|
||||
_sbSlotsReverse = new Dictionary<int, int>();
|
||||
|
||||
ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties());
|
||||
}
|
||||
|
||||
|
@ -173,11 +152,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
OutputTopology = outputTopology;
|
||||
MaxOutputVertices = maxOutputVertices;
|
||||
TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
|
||||
|
||||
if (Stage != ShaderStage.Compute)
|
||||
{
|
||||
AccessibleConstantBuffersMask = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options)
|
||||
|
@ -433,8 +407,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
UsedInputAttributes |= other.UsedInputAttributes;
|
||||
UsedOutputAttributes |= other.UsedOutputAttributes;
|
||||
_usedStorageBuffers |= other._usedStorageBuffers;
|
||||
_usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
|
||||
|
||||
foreach (var kv in other._usedTextures)
|
||||
{
|
||||
|
@ -634,23 +606,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
UsedFeatures |= flags;
|
||||
}
|
||||
|
||||
public void SetAccessibleBufferMasks(int sbMask, int ubeMask)
|
||||
{
|
||||
AccessibleStorageBuffersMask = sbMask;
|
||||
AccessibleConstantBuffersMask = ubeMask;
|
||||
}
|
||||
|
||||
public void SetUsedStorageBuffer(int slot, bool write)
|
||||
{
|
||||
int mask = 1 << slot;
|
||||
_usedStorageBuffers |= mask;
|
||||
|
||||
if (write)
|
||||
{
|
||||
_usedStorageBuffersWrite |= mask;
|
||||
}
|
||||
}
|
||||
|
||||
public void SetUsedTexture(
|
||||
Instruction inst,
|
||||
SamplerType type,
|
||||
|
@ -756,76 +711,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return meta;
|
||||
}
|
||||
|
||||
public BufferDescriptor[] GetStorageBufferDescriptors()
|
||||
{
|
||||
if (_cachedStorageBufferDescriptors != null)
|
||||
{
|
||||
return _cachedStorageBufferDescriptors;
|
||||
}
|
||||
|
||||
return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors(
|
||||
_usedStorageBuffers,
|
||||
_usedStorageBuffersWrite,
|
||||
true,
|
||||
out _firstStorageBufferBinding,
|
||||
GpuAccessor.QueryBindingStorageBuffer);
|
||||
}
|
||||
|
||||
private BufferDescriptor[] GetStorageBufferDescriptors(
|
||||
int usedMask,
|
||||
int writtenMask,
|
||||
bool isArray,
|
||||
out int firstBinding,
|
||||
Func<int, int> getBindingCallback)
|
||||
{
|
||||
firstBinding = 0;
|
||||
bool hasFirstBinding = false;
|
||||
var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
|
||||
|
||||
int lastSlot = -1;
|
||||
|
||||
for (int i = 0; i < descriptors.Length; i++)
|
||||
{
|
||||
int slot = BitOperations.TrailingZeroCount(usedMask);
|
||||
|
||||
if (isArray)
|
||||
{
|
||||
// The next array entries also consumes bindings, even if they are unused.
|
||||
for (int j = lastSlot + 1; j < slot; j++)
|
||||
{
|
||||
int binding = getBindingCallback(j);
|
||||
|
||||
if (!hasFirstBinding)
|
||||
{
|
||||
firstBinding = binding;
|
||||
hasFirstBinding = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lastSlot = slot;
|
||||
|
||||
(int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot);
|
||||
|
||||
descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset);
|
||||
|
||||
if (!hasFirstBinding)
|
||||
{
|
||||
firstBinding = descriptors[i].Binding;
|
||||
hasFirstBinding = true;
|
||||
}
|
||||
|
||||
if ((writtenMask & (1 << slot)) != 0)
|
||||
{
|
||||
descriptors[i].SetFlag(BufferUsageFlags.Write);
|
||||
}
|
||||
|
||||
usedMask &= ~(1 << slot);
|
||||
}
|
||||
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
public TextureDescriptor[] GetTextureDescriptors()
|
||||
{
|
||||
return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture);
|
||||
|
@ -922,45 +807,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return FindDescriptorIndex(GetImageDescriptors(), texOp);
|
||||
}
|
||||
|
||||
public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset)
|
||||
{
|
||||
int key = PackSbCbInfo(sbCbSlot, sbCbOffset);
|
||||
|
||||
if (!_sbSlots.TryGetValue(key, out int slot))
|
||||
{
|
||||
slot = _sbSlots.Count;
|
||||
_sbSlots.Add(key, slot);
|
||||
_sbSlotsReverse.Add(slot, key);
|
||||
}
|
||||
|
||||
return slot;
|
||||
}
|
||||
|
||||
public (int, int) GetSbCbInfo(int slot)
|
||||
{
|
||||
if (_sbSlotsReverse.TryGetValue(slot, out int key))
|
||||
{
|
||||
return UnpackSbCbInfo(key);
|
||||
}
|
||||
|
||||
throw new ArgumentException($"Invalid slot {slot}.", nameof(slot));
|
||||
}
|
||||
|
||||
private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset)
|
||||
{
|
||||
return sbCbOffset | ((int)sbCbSlot << 16);
|
||||
}
|
||||
|
||||
private static (int, int) UnpackSbCbInfo(int key)
|
||||
{
|
||||
return ((byte)(key >> 16), (ushort)key);
|
||||
}
|
||||
|
||||
public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
|
||||
{
|
||||
return new ShaderProgramInfo(
|
||||
ResourceManager.GetConstantBufferDescriptors(),
|
||||
GetStorageBufferDescriptors(),
|
||||
ResourceManager.GetStorageBufferDescriptors(),
|
||||
GetTextureDescriptors(),
|
||||
GetImageDescriptors(),
|
||||
identification,
|
||||
|
|
|
@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
continue;
|
||||
}
|
||||
|
||||
if (IsResourceWrite(operation.Inst))
|
||||
if (IsResourceWrite(operation.Inst, operation.StorageKind))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -154,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return totalVerticesCount + verticesCount == 3 && writesLayer;
|
||||
}
|
||||
|
||||
private static bool IsResourceWrite(Instruction inst)
|
||||
private static bool IsResourceWrite(Instruction inst, StorageKind storageKind)
|
||||
{
|
||||
switch (inst)
|
||||
{
|
||||
|
@ -170,13 +170,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
case Instruction.AtomicXor:
|
||||
case Instruction.ImageAtomic:
|
||||
case Instruction.ImageStore:
|
||||
case Instruction.StoreGlobal:
|
||||
case Instruction.StoreGlobal16:
|
||||
case Instruction.StoreGlobal8:
|
||||
case Instruction.StoreStorage:
|
||||
case Instruction.StoreStorage16:
|
||||
case Instruction.StoreStorage8:
|
||||
return true;
|
||||
case Instruction.Store:
|
||||
return storageKind == StorageKind.StorageBuffer ||
|
||||
storageKind == StorageKind.SharedMemory ||
|
||||
storageKind == StorageKind.LocalMemory;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -78,7 +78,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
Ssa.Rename(cfg.Blocks);
|
||||
|
||||
Optimizer.RunPass(cfg.Blocks, config);
|
||||
Optimizer.RunPass(hfm, cfg.Blocks, config);
|
||||
Rewriter.RunPass(hfm, cfg.Blocks, config);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue