ryujinx/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
gdkchan 21c9ac6240
Implement shader storage buffer operations using new Load/Store instructions (#4993)
* Implement storage buffer operations using new Load/Store instruction

* Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments

* Remove now unused code

* Catch more complex cases of global memory usage

* Shader cache version bump

* Extend global access elimination to work with more shared memory cases

* Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers

* Tweak preferencing to catch more cases

* Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel)

* Fix storage buffer bindings

* Simplify some code

* Shader cache version bump

* Fix typo

* Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
2023-06-03 20:12:18 -03:00

183 lines
7.3 KiB
C#

using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation
{
static class ShaderIdentifier
{
public static ShaderIdentification Identify(IReadOnlyList<Function> functions, ShaderConfig config)
{
if (config.Stage == ShaderStage.Geometry &&
config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles &&
!config.GpuAccessor.QueryHostSupportsGeometryShader() &&
IsLayerPassthroughGeometryShader(functions, out int layerInputAttr))
{
config.SetGeometryShaderLayerInputAttribute(layerInputAttr);
return ShaderIdentification.GeometryLayerPassthrough;
}
return ShaderIdentification.None;
}
private static bool IsLayerPassthroughGeometryShader(IReadOnlyList<Function> functions, out int layerInputAttr)
{
bool writesLayer = false;
layerInputAttr = 0;
if (functions.Count != 1)
{
return false;
}
int verticesCount = 0;
int totalVerticesCount = 0;
foreach (BasicBlock block in functions[0].Blocks)
{
// We are not expecting loops or any complex control flow here, so fail in those cases.
if (block.Branch != null && block.Branch.Index <= block.Index)
{
return false;
}
foreach (INode node in block.Operations)
{
if (!(node is Operation operation))
{
continue;
}
if (IsResourceWrite(operation.Inst, operation.StorageKind))
{
return false;
}
if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output)
{
Operand src = operation.GetSource(operation.SourcesCount - 1);
Operation srcAttributeAsgOp = null;
if (src.Type == OperandType.LocalVariable &&
src.AsgOp is Operation asgOp &&
asgOp.Inst == Instruction.Load &&
asgOp.StorageKind.IsInputOrOutput())
{
if (asgOp.StorageKind != StorageKind.Input)
{
return false;
}
srcAttributeAsgOp = asgOp;
}
if (srcAttributeAsgOp != null)
{
IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value;
IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value;
if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined)
{
if (srcAttributeAsgOp.SourcesCount != 4)
{
return false;
}
writesLayer = true;
layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value;;
}
else
{
if (dstAttribute != srcAttribute)
{
return false;
}
int inputsCount = operation.SourcesCount - 2;
if (dstAttribute == IoVariable.UserDefined)
{
if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value)
{
return false;
}
inputsCount--;
}
for (int i = 0; i < inputsCount; i++)
{
int dstIndex = operation.SourcesCount - 2 - i;
int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i;
if ((dstIndex | srcIndex) < 0)
{
return false;
}
if (operation.GetSource(dstIndex).Type != OperandType.Constant ||
srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant ||
operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value)
{
return false;
}
}
}
}
else if (src.Type == OperandType.Constant)
{
int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value;
float expectedValue = dstComponent == 3 ? 1f : 0f;
if (src.AsFloat() != expectedValue)
{
return false;
}
}
else
{
return false;
}
}
else if (operation.Inst == Instruction.EmitVertex)
{
verticesCount++;
}
else if (operation.Inst == Instruction.EndPrimitive)
{
totalVerticesCount += verticesCount;
verticesCount = 0;
}
}
}
return totalVerticesCount + verticesCount == 3 && writesLayer;
}
private static bool IsResourceWrite(Instruction inst, StorageKind storageKind)
{
switch (inst)
{
case Instruction.AtomicAdd:
case Instruction.AtomicAnd:
case Instruction.AtomicCompareAndSwap:
case Instruction.AtomicMaxS32:
case Instruction.AtomicMaxU32:
case Instruction.AtomicMinS32:
case Instruction.AtomicMinU32:
case Instruction.AtomicOr:
case Instruction.AtomicSwap:
case Instruction.AtomicXor:
case Instruction.ImageAtomic:
case Instruction.ImageStore:
return true;
case Instruction.Store:
return storageKind == StorageKind.StorageBuffer ||
storageKind == StorageKind.SharedMemory ||
storageKind == StorageKind.LocalMemory;
}
return false;
}
}
}