Delete ShaderConfig and organize shader resources/definitions better (#5509)
* Move some properties out of ShaderConfig * Stop using ShaderConfig on backends * Replace ShaderConfig usages on Translator and passes * Move remaining properties out of ShaderConfig and delete ShaderConfig * Remove ResourceManager property from TranslatorContext * Move Rewriter passes to separate transform pass files * Fix TransformPasses.RunPass on cases where a node is removed * Move remaining ClipDistancePrimitivesWritten and UsedFeatures updates to decode stage * Reduce excessive parameter passing a bit by using structs more * Remove binding parameter from ShaderProperties methods since it is redundant * Replace decoder instruction checks with switch statement * Put GLSL on the same plan as SPIR-V for input/output declaration * Stop mutating TranslatorContext state when Translate is called * Pass most of the graphics state using a struct instead of individual query methods * Auto-format * Auto-format * Add backend logging interface * Auto-format * Remove unnecessary use of interpolated strings * Remove more modifications of AttributeUsage after decode * PR feedback * gl_Layer is not supported on compute
This commit is contained in:
parent
8edfb2bc7b
commit
b423197619
68 changed files with 2653 additions and 2407 deletions
168
src/Ryujinx.Graphics.Shader/Translation/AttributeUsage.cs
Normal file
168
src/Ryujinx.Graphics.Shader/Translation/AttributeUsage.cs
Normal file
|
@ -0,0 +1,168 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
class AttributeUsage
|
||||
{
|
||||
public bool NextUsesFixedFuncAttributes { get; private set; }
|
||||
public int UsedInputAttributes { get; private set; }
|
||||
public int UsedOutputAttributes { get; private set; }
|
||||
public HashSet<int> UsedInputAttributesPerPatch { get; }
|
||||
public HashSet<int> UsedOutputAttributesPerPatch { get; }
|
||||
public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
|
||||
public int PassthroughAttributes { get; private set; }
|
||||
private int _nextUsedInputAttributes;
|
||||
private int _thisUsedInputAttributes;
|
||||
private Dictionary<int, int> _perPatchAttributeLocations;
|
||||
private readonly IGpuAccessor _gpuAccessor;
|
||||
|
||||
public UInt128 NextInputAttributesComponents { get; private set; }
|
||||
public UInt128 ThisInputAttributesComponents { get; private set; }
|
||||
|
||||
public AttributeUsage(IGpuAccessor gpuAccessor)
|
||||
{
|
||||
_gpuAccessor = gpuAccessor;
|
||||
|
||||
UsedInputAttributesPerPatch = new();
|
||||
UsedOutputAttributesPerPatch = new();
|
||||
}
|
||||
|
||||
public void SetInputUserAttribute(int index, int component)
|
||||
{
|
||||
int mask = 1 << index;
|
||||
|
||||
UsedInputAttributes |= mask;
|
||||
_thisUsedInputAttributes |= mask;
|
||||
ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
|
||||
}
|
||||
|
||||
public void SetInputUserAttributePerPatch(int index)
|
||||
{
|
||||
UsedInputAttributesPerPatch.Add(index);
|
||||
}
|
||||
|
||||
public void SetOutputUserAttribute(int index)
|
||||
{
|
||||
UsedOutputAttributes |= 1 << index;
|
||||
}
|
||||
|
||||
public void SetOutputUserAttributePerPatch(int index)
|
||||
{
|
||||
UsedOutputAttributesPerPatch.Add(index);
|
||||
}
|
||||
|
||||
public void MergeFromtNextStage(bool gpPassthrough, bool nextUsesFixedFunctionAttributes, AttributeUsage nextStage)
|
||||
{
|
||||
NextInputAttributesComponents = nextStage.ThisInputAttributesComponents;
|
||||
NextUsedInputAttributesPerPatch = nextStage.UsedInputAttributesPerPatch;
|
||||
NextUsesFixedFuncAttributes = nextUsesFixedFunctionAttributes;
|
||||
MergeOutputUserAttributes(gpPassthrough, nextStage.UsedInputAttributes, nextStage.UsedInputAttributesPerPatch);
|
||||
|
||||
if (UsedOutputAttributesPerPatch.Count != 0)
|
||||
{
|
||||
// Regular and per-patch input/output locations can't overlap,
|
||||
// so we must assign on our location using unused regular input/output locations.
|
||||
|
||||
Dictionary<int, int> locationsMap = new();
|
||||
|
||||
int freeMask = ~UsedOutputAttributes;
|
||||
|
||||
foreach (int attr in UsedOutputAttributesPerPatch)
|
||||
{
|
||||
int location = BitOperations.TrailingZeroCount(freeMask);
|
||||
if (location == 32)
|
||||
{
|
||||
_gpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
|
||||
break;
|
||||
}
|
||||
|
||||
locationsMap.Add(attr, location);
|
||||
freeMask &= ~(1 << location);
|
||||
}
|
||||
|
||||
// Both stages must agree on the locations, so use the same "map" for both.
|
||||
_perPatchAttributeLocations = locationsMap;
|
||||
nextStage._perPatchAttributeLocations = locationsMap;
|
||||
}
|
||||
}
|
||||
|
||||
private void MergeOutputUserAttributes(bool gpPassthrough, int mask, IEnumerable<int> perPatch)
|
||||
{
|
||||
_nextUsedInputAttributes = mask;
|
||||
|
||||
if (gpPassthrough)
|
||||
{
|
||||
PassthroughAttributes = mask & ~UsedOutputAttributes;
|
||||
}
|
||||
else
|
||||
{
|
||||
UsedOutputAttributes |= mask;
|
||||
UsedOutputAttributesPerPatch.UnionWith(perPatch);
|
||||
}
|
||||
}
|
||||
|
||||
public int GetPerPatchAttributeLocation(int index)
|
||||
{
|
||||
if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
return location;
|
||||
}
|
||||
|
||||
public bool IsUsedOutputAttribute(int attr)
|
||||
{
|
||||
// The check for fixed function attributes on the next stage is conservative,
|
||||
// returning false if the output is just not used by the next stage is also valid.
|
||||
if (NextUsesFixedFuncAttributes &&
|
||||
attr >= AttributeConsts.UserAttributeBase &&
|
||||
attr < AttributeConsts.UserAttributeEnd)
|
||||
{
|
||||
int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
|
||||
return (_nextUsedInputAttributes & (1 << index)) != 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int GetFreeUserAttribute(bool isOutput, int index)
|
||||
{
|
||||
int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
|
||||
int bit = -1;
|
||||
|
||||
while (useMask != -1)
|
||||
{
|
||||
bit = BitOperations.TrailingZeroCount(~useMask);
|
||||
|
||||
if (bit == 32)
|
||||
{
|
||||
bit = -1;
|
||||
break;
|
||||
}
|
||||
else if (index < 1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
useMask |= 1 << bit;
|
||||
index--;
|
||||
}
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
public void SetAllInputUserAttributes()
|
||||
{
|
||||
UsedInputAttributes |= Constants.AllAttributesMask;
|
||||
ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
|
||||
}
|
||||
|
||||
public void SetAllOutputUserAttributes()
|
||||
{
|
||||
UsedOutputAttributes |= Constants.AllAttributesMask;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -11,7 +11,8 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
class EmitterContext
|
||||
{
|
||||
public DecodedProgram Program { get; }
|
||||
public ShaderConfig Config { get; }
|
||||
public TranslatorContext TranslatorContext { get; }
|
||||
public ResourceManager ResourceManager { get; }
|
||||
|
||||
public bool IsNonMain { get; }
|
||||
|
||||
|
@ -54,10 +55,15 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
_labels = new Dictionary<ulong, BlockLabel>();
|
||||
}
|
||||
|
||||
public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain) : this()
|
||||
public EmitterContext(
|
||||
TranslatorContext translatorContext,
|
||||
ResourceManager resourceManager,
|
||||
DecodedProgram program,
|
||||
bool isNonMain) : this()
|
||||
{
|
||||
TranslatorContext = translatorContext;
|
||||
ResourceManager = resourceManager;
|
||||
Program = program;
|
||||
Config = config;
|
||||
IsNonMain = isNonMain;
|
||||
|
||||
EmitStart();
|
||||
|
@ -65,12 +71,12 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
private void EmitStart()
|
||||
{
|
||||
if (Config.Stage == ShaderStage.Vertex &&
|
||||
Config.Options.TargetApi == TargetApi.Vulkan &&
|
||||
(Config.Options.Flags & TranslationFlags.VertexA) == 0)
|
||||
if (TranslatorContext.Definitions.Stage == ShaderStage.Vertex &&
|
||||
TranslatorContext.Options.TargetApi == TargetApi.Vulkan &&
|
||||
(TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0)
|
||||
{
|
||||
// Vulkan requires the point size to be always written on the shader if the primitive topology is points.
|
||||
this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize()));
|
||||
this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(TranslatorContext.Definitions.PointSize));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -115,49 +121,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
_operations.Add(operation);
|
||||
}
|
||||
|
||||
public void FlagAttributeRead(int attribute)
|
||||
{
|
||||
if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId)
|
||||
{
|
||||
Config.SetUsedFeature(FeatureFlags.InstanceId);
|
||||
}
|
||||
else if (Config.Stage == ShaderStage.Fragment)
|
||||
{
|
||||
switch (attribute)
|
||||
{
|
||||
case AttributeConsts.PositionX:
|
||||
case AttributeConsts.PositionY:
|
||||
Config.SetUsedFeature(FeatureFlags.FragCoordXY);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void FlagAttributeWritten(int attribute)
|
||||
{
|
||||
if (Config.Stage == ShaderStage.Vertex)
|
||||
{
|
||||
switch (attribute)
|
||||
{
|
||||
case AttributeConsts.ClipDistance0:
|
||||
case AttributeConsts.ClipDistance1:
|
||||
case AttributeConsts.ClipDistance2:
|
||||
case AttributeConsts.ClipDistance3:
|
||||
case AttributeConsts.ClipDistance4:
|
||||
case AttributeConsts.ClipDistance5:
|
||||
case AttributeConsts.ClipDistance6:
|
||||
case AttributeConsts.ClipDistance7:
|
||||
Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer)
|
||||
{
|
||||
Config.SetUsedFeature(FeatureFlags.RtLayer);
|
||||
}
|
||||
}
|
||||
|
||||
public void MarkLabel(Operand label)
|
||||
{
|
||||
Add(Instruction.MarkLabel, label);
|
||||
|
@ -203,14 +166,14 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
public void PrepareForVertexReturn()
|
||||
{
|
||||
if (!Config.GpuAccessor.QueryHostSupportsTransformFeedback() && Config.GpuAccessor.QueryTransformFeedbackEnabled())
|
||||
if (!TranslatorContext.GpuAccessor.QueryHostSupportsTransformFeedback() && TranslatorContext.GpuAccessor.QueryTransformFeedbackEnabled())
|
||||
{
|
||||
Operand vertexCount = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(1));
|
||||
|
||||
for (int tfbIndex = 0; tfbIndex < Constants.TfeBuffersCount; tfbIndex++)
|
||||
{
|
||||
var locations = Config.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
|
||||
var stride = Config.GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
|
||||
var locations = TranslatorContext.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
|
||||
var stride = TranslatorContext.GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
|
||||
|
||||
Operand baseOffset = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(0), Const(tfbIndex));
|
||||
Operand baseVertex = this.Load(StorageKind.Input, IoVariable.BaseVertex);
|
||||
|
@ -242,7 +205,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
}
|
||||
}
|
||||
|
||||
if (Config.GpuAccessor.QueryViewportTransformDisable())
|
||||
if (TranslatorContext.Definitions.ViewportTransformDisable)
|
||||
{
|
||||
Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0));
|
||||
Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1));
|
||||
|
@ -254,7 +217,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne));
|
||||
}
|
||||
|
||||
if (Config.GpuAccessor.QueryTransformDepthMinusOneToOne() && !Config.GpuAccessor.QueryHostSupportsDepthClipControl())
|
||||
if (TranslatorContext.Definitions.DepthMode && !TranslatorContext.GpuAccessor.QueryHostSupportsDepthClipControl())
|
||||
{
|
||||
Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2));
|
||||
Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3));
|
||||
|
@ -263,12 +226,10 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW));
|
||||
}
|
||||
|
||||
if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute)
|
||||
if (TranslatorContext.Definitions.Stage != ShaderStage.Geometry && TranslatorContext.HasLayerInputAttribute)
|
||||
{
|
||||
Config.SetUsedFeature(FeatureFlags.RtLayer);
|
||||
|
||||
int attrVecIndex = Config.GpLayerInputAttribute >> 2;
|
||||
int attrComponentIndex = Config.GpLayerInputAttribute & 3;
|
||||
int attrVecIndex = TranslatorContext.GpLayerInputAttribute >> 2;
|
||||
int attrComponentIndex = TranslatorContext.GpLayerInputAttribute & 3;
|
||||
|
||||
Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex));
|
||||
|
||||
|
@ -278,7 +239,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal)
|
||||
{
|
||||
if (Config.GpuAccessor.QueryViewportTransformDisable())
|
||||
if (TranslatorContext.Definitions.ViewportTransformDisable)
|
||||
{
|
||||
oldXLocal = Local();
|
||||
this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)));
|
||||
|
@ -291,7 +252,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
oldYLocal = null;
|
||||
}
|
||||
|
||||
if (Config.GpuAccessor.QueryTransformDepthMinusOneToOne() && !Config.GpuAccessor.QueryHostSupportsDepthClipControl())
|
||||
if (TranslatorContext.Definitions.DepthMode && !TranslatorContext.GpuAccessor.QueryHostSupportsDepthClipControl())
|
||||
{
|
||||
oldZLocal = Local();
|
||||
this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)));
|
||||
|
@ -311,13 +272,13 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return true;
|
||||
}
|
||||
|
||||
if (Config.LastInVertexPipeline &&
|
||||
(Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) &&
|
||||
(Config.Options.Flags & TranslationFlags.VertexA) == 0)
|
||||
if (TranslatorContext.Definitions.LastInVertexPipeline &&
|
||||
(TranslatorContext.Definitions.Stage == ShaderStage.Vertex || TranslatorContext.Definitions.Stage == ShaderStage.TessellationEvaluation) &&
|
||||
(TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0)
|
||||
{
|
||||
PrepareForVertexReturn();
|
||||
}
|
||||
else if (Config.Stage == ShaderStage.Geometry)
|
||||
else if (TranslatorContext.Definitions.Stage == ShaderStage.Geometry)
|
||||
{
|
||||
void WritePositionOutput(int primIndex)
|
||||
{
|
||||
|
@ -345,20 +306,19 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w);
|
||||
}
|
||||
|
||||
if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
|
||||
if (TranslatorContext.Definitions.GpPassthrough && !TranslatorContext.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
|
||||
{
|
||||
int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices();
|
||||
int inputVertices = TranslatorContext.Definitions.InputTopology.ToInputVertices();
|
||||
|
||||
for (int primIndex = 0; primIndex < inputVertices; primIndex++)
|
||||
{
|
||||
WritePositionOutput(primIndex);
|
||||
|
||||
int passthroughAttributes = Config.PassthroughAttributes;
|
||||
int passthroughAttributes = TranslatorContext.AttributeUsage.PassthroughAttributes;
|
||||
while (passthroughAttributes != 0)
|
||||
{
|
||||
int index = BitOperations.TrailingZeroCount(passthroughAttributes);
|
||||
WriteUserDefinedOutput(index, primIndex);
|
||||
Config.SetOutputUserAttribute(index);
|
||||
passthroughAttributes &= ~(1 << index);
|
||||
}
|
||||
|
||||
|
@ -368,20 +328,20 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
this.EndPrimitive();
|
||||
}
|
||||
}
|
||||
else if (Config.Stage == ShaderStage.Fragment)
|
||||
else if (TranslatorContext.Definitions.Stage == ShaderStage.Fragment)
|
||||
{
|
||||
GenerateAlphaToCoverageDitherDiscard();
|
||||
|
||||
bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat();
|
||||
bool supportsBgra = TranslatorContext.GpuAccessor.QueryHostSupportsBgraFormat();
|
||||
|
||||
if (Config.OmapDepth)
|
||||
if (TranslatorContext.Definitions.OmapDepth)
|
||||
{
|
||||
Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr);
|
||||
Operand src = Register(TranslatorContext.GetDepthRegister(), RegisterType.Gpr);
|
||||
|
||||
this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src);
|
||||
}
|
||||
|
||||
AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare();
|
||||
AlphaTestOp alphaTestOp = TranslatorContext.Definitions.AlphaTestCompare;
|
||||
|
||||
if (alphaTestOp != AlphaTestOp.Always)
|
||||
{
|
||||
|
@ -389,7 +349,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
this.Discard();
|
||||
}
|
||||
else if ((Config.OmapTargets & 8) != 0)
|
||||
else if ((TranslatorContext.Definitions.OmapTargets & 8) != 0)
|
||||
{
|
||||
Instruction comparator = alphaTestOp switch
|
||||
{
|
||||
|
@ -405,7 +365,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\".");
|
||||
|
||||
Operand alpha = Register(3, RegisterType.Gpr);
|
||||
Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference());
|
||||
Operand alphaRef = ConstF(TranslatorContext.Definitions.AlphaTestReference);
|
||||
Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef);
|
||||
Operand alphaPassLabel = Label();
|
||||
|
||||
|
@ -427,7 +387,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
for (int component = 0; component < 4; component++)
|
||||
{
|
||||
bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0;
|
||||
bool componentEnabled = (TranslatorContext.Definitions.OmapTargets & (1 << (rtIndex * 4 + component))) != 0;
|
||||
if (!componentEnabled)
|
||||
{
|
||||
continue;
|
||||
|
@ -460,10 +420,9 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
}
|
||||
}
|
||||
|
||||
bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0;
|
||||
bool targetEnabled = (TranslatorContext.Definitions.OmapTargets & (0xf << (rtIndex * 4))) != 0;
|
||||
if (targetEnabled)
|
||||
{
|
||||
Config.SetOutputUserAttribute(rtIndex);
|
||||
regIndexBase += 4;
|
||||
}
|
||||
}
|
||||
|
@ -475,7 +434,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
private void GenerateAlphaToCoverageDitherDiscard()
|
||||
{
|
||||
// If the feature is disabled, or alpha is not written, then we're done.
|
||||
if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0)
|
||||
if (!TranslatorContext.Definitions.AlphaToCoverageDitherEnable || (TranslatorContext.Definitions.OmapTargets & 8) == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -12,15 +12,12 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
None = 0,
|
||||
|
||||
// Affected by resolution scaling.
|
||||
IntegerSampling = 1 << 0,
|
||||
FragCoordXY = 1 << 1,
|
||||
|
||||
Bindless = 1 << 2,
|
||||
InstanceId = 1 << 3,
|
||||
DrawParameters = 1 << 4,
|
||||
RtLayer = 1 << 5,
|
||||
IaIndexing = 1 << 7,
|
||||
OaIndexing = 1 << 8,
|
||||
FixedFuncAttr = 1 << 9,
|
||||
LocalMemory = 1 << 10,
|
||||
SharedMemory = 1 << 11,
|
||||
|
|
34
src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
Normal file
34
src/Ryujinx.Graphics.Shader/Translation/HostCapabilities.cs
Normal file
|
@ -0,0 +1,34 @@
|
|||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
class HostCapabilities
|
||||
{
|
||||
public readonly bool ReducedPrecision;
|
||||
public readonly bool SupportsFragmentShaderInterlock;
|
||||
public readonly bool SupportsFragmentShaderOrderingIntel;
|
||||
public readonly bool SupportsGeometryShaderPassthrough;
|
||||
public readonly bool SupportsShaderBallot;
|
||||
public readonly bool SupportsShaderBarrierDivergence;
|
||||
public readonly bool SupportsTextureShadowLod;
|
||||
public readonly bool SupportsViewportMask;
|
||||
|
||||
public HostCapabilities(
|
||||
bool reducedPrecision,
|
||||
bool supportsFragmentShaderInterlock,
|
||||
bool supportsFragmentShaderOrderingIntel,
|
||||
bool supportsGeometryShaderPassthrough,
|
||||
bool supportsShaderBallot,
|
||||
bool supportsShaderBarrierDivergence,
|
||||
bool supportsTextureShadowLod,
|
||||
bool supportsViewportMask)
|
||||
{
|
||||
ReducedPrecision = reducedPrecision;
|
||||
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;
|
||||
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
|
||||
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
|
||||
SupportsShaderBallot = supportsShaderBallot;
|
||||
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||
SupportsTextureShadowLod = supportsTextureShadowLod;
|
||||
SupportsViewportMask = supportsViewportMask;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,12 +1,13 @@
|
|||
using Ryujinx.Graphics.Shader.Instructions;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
class BindlessElimination
|
||||
{
|
||||
public static void RunPass(BasicBlock block, ShaderConfig config)
|
||||
public static void RunPass(BasicBlock block, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
|
||||
{
|
||||
// We can turn a bindless into regular access by recognizing the pattern
|
||||
// produced by the compiler for separate texture and sampler.
|
||||
|
@ -43,7 +44,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
if (bindlessHandle.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false);
|
||||
SetHandle(
|
||||
resourceManager,
|
||||
gpuAccessor,
|
||||
texOp,
|
||||
bindlessHandle.GetCbufOffset(),
|
||||
bindlessHandle.GetCbufSlot(),
|
||||
rewriteSamplerType,
|
||||
isImage: false);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -140,7 +149,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
if (handleType == TextureHandleType.SeparateConstantSamplerHandle)
|
||||
{
|
||||
SetHandle(
|
||||
config,
|
||||
resourceManager,
|
||||
gpuAccessor,
|
||||
texOp,
|
||||
TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType),
|
||||
TextureHandle.PackSlots(src0.GetCbufSlot(), 0),
|
||||
|
@ -150,7 +160,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
else if (src1.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
SetHandle(
|
||||
config,
|
||||
resourceManager,
|
||||
gpuAccessor,
|
||||
texOp,
|
||||
TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
|
||||
TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
|
||||
|
@ -173,17 +184,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
if (texOp.Inst == Instruction.ImageAtomic)
|
||||
{
|
||||
texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot);
|
||||
texOp.Format = ShaderProperties.GetTextureFormatAtomic(gpuAccessor, cbufOffset, cbufSlot);
|
||||
}
|
||||
else
|
||||
{
|
||||
texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
|
||||
texOp.Format = ShaderProperties.GetTextureFormat(gpuAccessor, cbufOffset, cbufSlot);
|
||||
}
|
||||
}
|
||||
|
||||
bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer;
|
||||
|
||||
SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
|
||||
SetHandle(resourceManager, gpuAccessor, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -220,11 +231,18 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
return null;
|
||||
}
|
||||
|
||||
private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage)
|
||||
private static void SetHandle(
|
||||
ResourceManager resourceManager,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TextureOperation texOp,
|
||||
int cbufOffset,
|
||||
int cbufSlot,
|
||||
bool rewriteSamplerType,
|
||||
bool isImage)
|
||||
{
|
||||
if (rewriteSamplerType)
|
||||
{
|
||||
SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);
|
||||
SamplerType newType = gpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);
|
||||
|
||||
if (texOp.Inst.IsTextureQuery())
|
||||
{
|
||||
|
@ -253,7 +271,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
|
||||
int binding = config.ResourceManager.GetTextureOrImageBinding(
|
||||
int binding = resourceManager.GetTextureOrImageBinding(
|
||||
texOp.Inst,
|
||||
texOp.Type,
|
||||
texOp.Format,
|
||||
|
|
|
@ -9,7 +9,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
private const int NvnTextureBufferIndex = 2;
|
||||
|
||||
public static void RunPass(BasicBlock block, ShaderConfig config)
|
||||
public static void RunPass(BasicBlock block, ResourceManager resourceManager)
|
||||
{
|
||||
// We can turn a bindless texture access into a indexed access,
|
||||
// as long the following conditions are true:
|
||||
|
@ -44,7 +44,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
Operand ldcSrc0 = handleAsgOp.GetSource(0);
|
||||
|
||||
if (ldcSrc0.Type != OperandType.Constant ||
|
||||
!config.ResourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) ||
|
||||
!resourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) ||
|
||||
src0CbufSlot != NvnTextureBufferIndex)
|
||||
{
|
||||
continue;
|
||||
|
@ -88,7 +88,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
continue;
|
||||
}
|
||||
|
||||
TurnIntoIndexed(config, texOp, addSrc1.Value / 4);
|
||||
TurnIntoIndexed(resourceManager, texOp, addSrc1.Value / 4);
|
||||
|
||||
Operand index = Local();
|
||||
|
||||
|
@ -102,9 +102,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
|
||||
private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle)
|
||||
private static void TurnIntoIndexed(ResourceManager resourceManager, TextureOperation texOp, int handle)
|
||||
{
|
||||
int binding = config.ResourceManager.GetTextureOrImageBinding(
|
||||
int binding = resourceManager.GetTextureOrImageBinding(
|
||||
texOp.Inst,
|
||||
texOp.Type | SamplerType.Indexed,
|
||||
texOp.Format,
|
||||
|
|
|
@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
static class ConstantFolding
|
||||
{
|
||||
public static void RunPass(ShaderConfig config, Operation operation)
|
||||
public static void RunPass(ResourceManager resourceManager, Operation operation)
|
||||
{
|
||||
if (!AreAllSourcesConstant(operation))
|
||||
{
|
||||
|
@ -158,7 +158,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
int binding = operation.GetSource(0).Value;
|
||||
int fieldIndex = operation.GetSource(1).Value;
|
||||
|
||||
if (config.ResourceManager.TryGetConstantBufferSlot(binding, out int cbufSlot) && fieldIndex == 0)
|
||||
if (resourceManager.TryGetConstantBufferSlot(binding, out int cbufSlot) && fieldIndex == 0)
|
||||
{
|
||||
int vecIndex = operation.GetSource(2).Value;
|
||||
int elemIndex = operation.GetSource(3).Value;
|
||||
|
|
|
@ -205,7 +205,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
|
||||
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
|
||||
public static void RunPass(
|
||||
HelperFunctionManager hfm,
|
||||
BasicBlock[] blocks,
|
||||
ResourceManager resourceManager,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TargetLanguage targetLanguage)
|
||||
{
|
||||
GtsContext gtsContext = new(hfm);
|
||||
|
||||
|
@ -220,14 +225,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
if (IsGlobalMemory(operation.StorageKind))
|
||||
{
|
||||
LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node);
|
||||
LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(
|
||||
gtsContext,
|
||||
resourceManager,
|
||||
gpuAccessor,
|
||||
targetLanguage,
|
||||
block,
|
||||
node);
|
||||
|
||||
if (nextNode == null)
|
||||
{
|
||||
// The returned value being null means that the global memory replacement failed,
|
||||
// so we just make loads read 0 and stores do nothing.
|
||||
|
||||
config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\".");
|
||||
gpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\".");
|
||||
|
||||
if (operation.Dest != null)
|
||||
{
|
||||
|
@ -286,7 +297,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
private static LinkedListNode<INode> ReplaceGlobalMemoryWithStorage(
|
||||
GtsContext gtsContext,
|
||||
ShaderConfig config,
|
||||
ResourceManager resourceManager,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TargetLanguage targetLanguage,
|
||||
BasicBlock block,
|
||||
LinkedListNode<INode> node)
|
||||
{
|
||||
|
@ -303,7 +316,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
Operand offset = result.Offset;
|
||||
|
||||
bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer();
|
||||
bool storageUnaligned = gpuAccessor.QueryHasUnalignedStorageBuffer();
|
||||
|
||||
if (storageUnaligned)
|
||||
{
|
||||
|
@ -312,7 +325,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
Operand baseAddressMasked = Local();
|
||||
Operand hostOffset = Local();
|
||||
|
||||
int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
|
||||
int alignment = gpuAccessor.QueryHostStorageBufferOffsetAlignment();
|
||||
|
||||
Operation maskOp = new(Instruction.BitwiseAnd, baseAddressMasked, baseAddress, Const(-alignment));
|
||||
Operation subOp = new(Instruction.Subtract, hostOffset, globalAddress, baseAddressMasked);
|
||||
|
@ -333,13 +346,19 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
offset = newOffset;
|
||||
}
|
||||
|
||||
if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage))
|
||||
if (CanUseInlineStorageOp(operation, targetLanguage))
|
||||
{
|
||||
return GenerateInlineStorageOp(config, node, operation, offset, result);
|
||||
return GenerateInlineStorageOp(resourceManager, node, operation, offset, result);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId))
|
||||
if (!TryGenerateSingleTargetStorageOp(
|
||||
gtsContext,
|
||||
resourceManager,
|
||||
targetLanguage,
|
||||
operation,
|
||||
result,
|
||||
out int functionId))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
@ -354,7 +373,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
// the base address might be stored.
|
||||
// Generate a helper function that will check all possible storage buffers and use the right one.
|
||||
|
||||
if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId))
|
||||
if (!TryGenerateMultiTargetStorageOp(
|
||||
gtsContext,
|
||||
resourceManager,
|
||||
gpuAccessor,
|
||||
targetLanguage,
|
||||
block,
|
||||
operation,
|
||||
out int functionId))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
@ -375,14 +401,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
|
||||
private static LinkedListNode<INode> GenerateInlineStorageOp(
|
||||
ShaderConfig config,
|
||||
ResourceManager resourceManager,
|
||||
LinkedListNode<INode> node,
|
||||
Operation operation,
|
||||
Operand offset,
|
||||
SearchResult result)
|
||||
{
|
||||
bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
|
||||
if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
|
||||
if (!resourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
@ -474,7 +500,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
private static bool TryGenerateSingleTargetStorageOp(
|
||||
GtsContext gtsContext,
|
||||
ShaderConfig config,
|
||||
ResourceManager resourceManager,
|
||||
TargetLanguage targetLanguage,
|
||||
Operation operation,
|
||||
SearchResult result,
|
||||
out int functionId)
|
||||
|
@ -514,7 +541,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
|
||||
if (!TryGenerateStorageOp(
|
||||
config,
|
||||
resourceManager,
|
||||
targetLanguage,
|
||||
context,
|
||||
operation.Inst,
|
||||
operation.StorageKind,
|
||||
|
@ -555,7 +583,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
private static bool TryGenerateMultiTargetStorageOp(
|
||||
GtsContext gtsContext,
|
||||
ShaderConfig config,
|
||||
ResourceManager resourceManager,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TargetLanguage targetLanguage,
|
||||
BasicBlock block,
|
||||
Operation operation,
|
||||
out int functionId)
|
||||
|
@ -624,7 +654,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
if (targetCbs.Count == 0)
|
||||
{
|
||||
config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\".");
|
||||
gpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\".");
|
||||
}
|
||||
|
||||
if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId))
|
||||
|
@ -685,13 +715,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
SearchResult result = new(sbCbSlot, sbCbOffset);
|
||||
|
||||
int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
|
||||
int alignment = gpuAccessor.QueryHostStorageBufferOffsetAlignment();
|
||||
|
||||
Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment));
|
||||
Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked);
|
||||
|
||||
if (!TryGenerateStorageOp(
|
||||
config,
|
||||
resourceManager,
|
||||
targetLanguage,
|
||||
context,
|
||||
operation.Inst,
|
||||
operation.StorageKind,
|
||||
|
@ -781,7 +812,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
|
||||
private static bool TryGenerateStorageOp(
|
||||
ShaderConfig config,
|
||||
ResourceManager resourceManager,
|
||||
TargetLanguage targetLanguage,
|
||||
EmitterContext context,
|
||||
Instruction inst,
|
||||
StorageKind storageKind,
|
||||
|
@ -794,7 +826,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
resultValue = null;
|
||||
bool isStore = inst.IsAtomic() || inst == Instruction.Store;
|
||||
|
||||
if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
|
||||
if (!resourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -820,7 +852,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value);
|
||||
break;
|
||||
case Instruction.AtomicMaxS32:
|
||||
if (config.Options.TargetLanguage == TargetLanguage.Spirv)
|
||||
if (targetLanguage == TargetLanguage.Spirv)
|
||||
{
|
||||
resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
||||
}
|
||||
|
@ -836,7 +868,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
||||
break;
|
||||
case Instruction.AtomicMinS32:
|
||||
if (config.Options.TargetLanguage == TargetLanguage.Spirv)
|
||||
if (targetLanguage == TargetLanguage.Spirv)
|
||||
{
|
||||
resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
|
||||
}
|
||||
|
|
|
@ -7,40 +7,40 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
static class Optimizer
|
||||
{
|
||||
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
|
||||
public static void RunPass(TransformContext context)
|
||||
{
|
||||
RunOptimizationPasses(blocks, config);
|
||||
RunOptimizationPasses(context.Blocks, context.ResourceManager);
|
||||
|
||||
// TODO: Some of those are not optimizations and shouldn't be here.
|
||||
|
||||
GlobalToStorage.RunPass(hfm, blocks, config);
|
||||
GlobalToStorage.RunPass(context.Hfm, context.Blocks, context.ResourceManager, context.GpuAccessor, context.TargetLanguage);
|
||||
|
||||
bool hostSupportsShaderFloat64 = config.GpuAccessor.QueryHostSupportsShaderFloat64();
|
||||
bool hostSupportsShaderFloat64 = context.GpuAccessor.QueryHostSupportsShaderFloat64();
|
||||
|
||||
// Those passes are looking for specific patterns and only needs to run once.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++)
|
||||
{
|
||||
BindlessToIndexed.RunPass(blocks[blkIndex], config);
|
||||
BindlessElimination.RunPass(blocks[blkIndex], config);
|
||||
BindlessToIndexed.RunPass(context.Blocks[blkIndex], context.ResourceManager);
|
||||
BindlessElimination.RunPass(context.Blocks[blkIndex], context.ResourceManager, context.GpuAccessor);
|
||||
|
||||
// FragmentCoord only exists on fragment shaders, so we don't need to check other stages.
|
||||
if (config.Stage == ShaderStage.Fragment)
|
||||
if (context.Stage == ShaderStage.Fragment)
|
||||
{
|
||||
EliminateMultiplyByFragmentCoordW(blocks[blkIndex]);
|
||||
EliminateMultiplyByFragmentCoordW(context.Blocks[blkIndex]);
|
||||
}
|
||||
|
||||
// If the host does not support double operations, we need to turn them into float operations.
|
||||
if (!hostSupportsShaderFloat64)
|
||||
{
|
||||
DoubleToFloat.RunPass(hfm, blocks[blkIndex]);
|
||||
DoubleToFloat.RunPass(context.Hfm, context.Blocks[blkIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
// Run optimizations one last time to remove any code that is now optimizable after above passes.
|
||||
RunOptimizationPasses(blocks, config);
|
||||
RunOptimizationPasses(context.Blocks, context.ResourceManager);
|
||||
}
|
||||
|
||||
private static void RunOptimizationPasses(BasicBlock[] blocks, ShaderConfig config)
|
||||
private static void RunOptimizationPasses(BasicBlock[] blocks, ResourceManager resourceManager)
|
||||
{
|
||||
bool modified;
|
||||
|
||||
|
@ -79,7 +79,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
continue;
|
||||
}
|
||||
|
||||
ConstantFolding.RunPass(config, operation);
|
||||
ConstantFolding.RunPass(resourceManager, operation);
|
||||
Simplification.RunPass(operation);
|
||||
|
||||
if (DestIsLocalVar(operation))
|
||||
|
|
|
@ -50,10 +50,10 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
public ShaderProperties Properties { get; }
|
||||
|
||||
public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties)
|
||||
public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor)
|
||||
{
|
||||
_gpuAccessor = gpuAccessor;
|
||||
Properties = properties;
|
||||
Properties = new();
|
||||
_stage = stage;
|
||||
_stagePrefix = GetShaderStagePrefix(stage);
|
||||
|
||||
|
@ -62,15 +62,15 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
_cbSlotToBindingMap.AsSpan().Fill(-1);
|
||||
_sbSlotToBindingMap.AsSpan().Fill(-1);
|
||||
|
||||
_sbSlots = new Dictionary<int, int>();
|
||||
_sbSlotsReverse = new Dictionary<int, int>();
|
||||
_sbSlots = new();
|
||||
_sbSlotsReverse = new();
|
||||
|
||||
_usedConstantBufferBindings = new HashSet<int>();
|
||||
_usedConstantBufferBindings = new();
|
||||
|
||||
_usedTextures = new Dictionary<TextureInfo, TextureMeta>();
|
||||
_usedImages = new Dictionary<TextureInfo, TextureMeta>();
|
||||
_usedTextures = new();
|
||||
_usedImages = new();
|
||||
|
||||
properties.AddOrUpdateConstantBuffer(0, new BufferDefinition(BufferLayout.Std140, 0, 0, "support_buffer", SupportBuffer.GetStructureType()));
|
||||
Properties.AddOrUpdateConstantBuffer(new(BufferLayout.Std140, 0, SupportBuffer.Binding, "support_buffer", SupportBuffer.GetStructureType()));
|
||||
|
||||
LocalMemoryId = -1;
|
||||
SharedMemoryId = -1;
|
||||
|
@ -312,11 +312,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
if (isImage)
|
||||
{
|
||||
Properties.AddOrUpdateImage(binding, definition);
|
||||
Properties.AddOrUpdateImage(definition);
|
||||
}
|
||||
else
|
||||
{
|
||||
Properties.AddOrUpdateTexture(binding, definition);
|
||||
Properties.AddOrUpdateTexture(definition);
|
||||
}
|
||||
|
||||
if (layer == 0)
|
||||
|
@ -500,7 +500,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
new StructureField(AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32, "data", Constants.ConstantBufferSize / 16),
|
||||
});
|
||||
|
||||
Properties.AddOrUpdateConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type));
|
||||
Properties.AddOrUpdateConstantBuffer(new(BufferLayout.Std140, 0, binding, name, type));
|
||||
}
|
||||
|
||||
private void AddNewStorageBuffer(int binding, string name)
|
||||
|
@ -510,7 +510,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
new StructureField(AggregateType.Array | AggregateType.U32, "data", 0),
|
||||
});
|
||||
|
||||
Properties.AddOrUpdateStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type));
|
||||
Properties.AddOrUpdateStorageBuffer(new(BufferLayout.Std430, 1, binding, name, type));
|
||||
}
|
||||
|
||||
public static string GetShaderStagePrefix(ShaderStage stage)
|
||||
|
|
|
@ -1,639 +0,0 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
class ShaderConfig
|
||||
{
|
||||
private const int ThreadsPerWarp = 32;
|
||||
|
||||
public ShaderStage Stage { get; }
|
||||
|
||||
public bool GpPassthrough { get; }
|
||||
public bool LastInVertexPipeline { get; private set; }
|
||||
|
||||
public bool HasLayerInputAttribute { get; private set; }
|
||||
public int GpLayerInputAttribute { get; private set; }
|
||||
public int ThreadsPerInputPrimitive { get; }
|
||||
|
||||
public OutputTopology OutputTopology { get; }
|
||||
|
||||
public int MaxOutputVertices { get; }
|
||||
|
||||
public int LocalMemorySize { get; }
|
||||
|
||||
public ImapPixelType[] ImapTypes { get; }
|
||||
|
||||
public int OmapTargets { get; }
|
||||
public bool OmapSampleMask { get; }
|
||||
public bool OmapDepth { get; }
|
||||
|
||||
public IGpuAccessor GpuAccessor { get; }
|
||||
|
||||
public TranslationOptions Options { get; }
|
||||
|
||||
public ShaderProperties Properties => ResourceManager.Properties;
|
||||
|
||||
public ResourceManager ResourceManager { get; set; }
|
||||
|
||||
public bool TransformFeedbackEnabled { get; }
|
||||
|
||||
private TransformFeedbackOutput[] _transformFeedbackOutputs;
|
||||
|
||||
readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
|
||||
{
|
||||
public IoVariable IoVariable { get; }
|
||||
public int Location { get; }
|
||||
public int Component { get; }
|
||||
|
||||
public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
|
||||
{
|
||||
IoVariable = ioVariable;
|
||||
Location = location;
|
||||
Component = component;
|
||||
}
|
||||
|
||||
public override bool Equals(object other)
|
||||
{
|
||||
return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
|
||||
}
|
||||
|
||||
public bool Equals(TransformFeedbackVariable other)
|
||||
{
|
||||
return IoVariable == other.IoVariable &&
|
||||
Location == other.Location &&
|
||||
Component == other.Component;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return (int)IoVariable | (Location << 8) | (Component << 16);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{IoVariable}.{Location}.{Component}";
|
||||
}
|
||||
}
|
||||
|
||||
private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
|
||||
|
||||
public int Size { get; private set; }
|
||||
|
||||
public byte ClipDistancesWritten { get; private set; }
|
||||
|
||||
public FeatureFlags UsedFeatures { get; private set; }
|
||||
|
||||
public int Cb1DataSize { get; private set; }
|
||||
|
||||
public bool LayerOutputWritten { get; private set; }
|
||||
public int LayerOutputAttribute { get; private set; }
|
||||
|
||||
public bool NextUsesFixedFuncAttributes { get; private set; }
|
||||
public int UsedInputAttributes { get; private set; }
|
||||
public int UsedOutputAttributes { get; private set; }
|
||||
public HashSet<int> UsedInputAttributesPerPatch { get; }
|
||||
public HashSet<int> UsedOutputAttributesPerPatch { get; }
|
||||
public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
|
||||
public int PassthroughAttributes { get; private set; }
|
||||
private int _nextUsedInputAttributes;
|
||||
private int _thisUsedInputAttributes;
|
||||
private Dictionary<int, int> _perPatchAttributeLocations;
|
||||
|
||||
public UInt128 NextInputAttributesComponents { get; private set; }
|
||||
public UInt128 ThisInputAttributesComponents { get; private set; }
|
||||
|
||||
public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize)
|
||||
{
|
||||
Stage = stage;
|
||||
GpuAccessor = gpuAccessor;
|
||||
Options = options;
|
||||
LocalMemorySize = localMemorySize;
|
||||
|
||||
_transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
|
||||
|
||||
TransformFeedbackEnabled =
|
||||
stage != ShaderStage.Compute &&
|
||||
gpuAccessor.QueryTransformFeedbackEnabled() &&
|
||||
gpuAccessor.QueryHostSupportsTransformFeedback();
|
||||
|
||||
UsedInputAttributesPerPatch = new HashSet<int>();
|
||||
UsedOutputAttributesPerPatch = new HashSet<int>();
|
||||
|
||||
ShaderProperties properties;
|
||||
|
||||
switch (stage)
|
||||
{
|
||||
case ShaderStage.Fragment:
|
||||
bool originUpperLeft = options.TargetApi == TargetApi.Vulkan || gpuAccessor.QueryYNegateEnabled();
|
||||
properties = new ShaderProperties(originUpperLeft);
|
||||
break;
|
||||
default:
|
||||
properties = new ShaderProperties();
|
||||
break;
|
||||
}
|
||||
|
||||
ResourceManager = new ResourceManager(stage, gpuAccessor, properties);
|
||||
|
||||
if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled())
|
||||
{
|
||||
StructureType tfeInfoStruct = new(new StructureField[]
|
||||
{
|
||||
new(AggregateType.Array | AggregateType.U32, "base_offset", 4),
|
||||
new(AggregateType.U32, "vertex_count"),
|
||||
});
|
||||
|
||||
BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct);
|
||||
|
||||
properties.AddOrUpdateStorageBuffer(Constants.TfeInfoBinding, tfeInfoBuffer);
|
||||
|
||||
StructureType tfeDataStruct = new(new StructureField[]
|
||||
{
|
||||
new(AggregateType.Array | AggregateType.U32, "data", 0),
|
||||
});
|
||||
|
||||
for (int i = 0; i < Constants.TfeBuffersCount; i++)
|
||||
{
|
||||
int binding = Constants.TfeBufferBaseBinding + i;
|
||||
BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct);
|
||||
properties.AddOrUpdateStorageBuffer(binding, tfeDataBuffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public ShaderConfig(
|
||||
ShaderStage stage,
|
||||
OutputTopology outputTopology,
|
||||
int maxOutputVertices,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TranslationOptions options) : this(stage, gpuAccessor, options, 0)
|
||||
{
|
||||
ThreadsPerInputPrimitive = 1;
|
||||
OutputTopology = outputTopology;
|
||||
MaxOutputVertices = maxOutputVertices;
|
||||
}
|
||||
|
||||
public ShaderConfig(
|
||||
ShaderHeader header,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TranslationOptions options) : this(header.Stage, gpuAccessor, options, GetLocalMemorySize(header))
|
||||
{
|
||||
GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
|
||||
ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
|
||||
OutputTopology = header.OutputTopology;
|
||||
MaxOutputVertices = header.MaxOutputVertexCount;
|
||||
ImapTypes = header.ImapTypes;
|
||||
OmapTargets = header.OmapTargets;
|
||||
OmapSampleMask = header.OmapSampleMask;
|
||||
OmapDepth = header.OmapDepth;
|
||||
LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
|
||||
}
|
||||
|
||||
private static int GetLocalMemorySize(ShaderHeader header)
|
||||
{
|
||||
return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
|
||||
}
|
||||
|
||||
private void EnsureTransformFeedbackInitialized()
|
||||
{
|
||||
if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null)
|
||||
{
|
||||
TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
|
||||
ulong vecMap = 0UL;
|
||||
|
||||
for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
|
||||
{
|
||||
var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
|
||||
var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
|
||||
|
||||
for (int i = 0; i < locations.Length; i++)
|
||||
{
|
||||
byte wordOffset = locations[i];
|
||||
if (wordOffset < 0xc0)
|
||||
{
|
||||
transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
|
||||
vecMap |= 1UL << (wordOffset / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_transformFeedbackOutputs = transformFeedbackOutputs;
|
||||
|
||||
while (vecMap != 0)
|
||||
{
|
||||
int vecIndex = BitOperations.TrailingZeroCount(vecMap);
|
||||
|
||||
for (int subIndex = 0; subIndex < 4; subIndex++)
|
||||
{
|
||||
int wordOffset = vecIndex * 4 + subIndex;
|
||||
int byteOffset = wordOffset * 4;
|
||||
|
||||
if (transformFeedbackOutputs[wordOffset].Valid)
|
||||
{
|
||||
IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
|
||||
int component = 0;
|
||||
|
||||
if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
|
||||
{
|
||||
component = subIndex;
|
||||
}
|
||||
|
||||
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
|
||||
_transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
|
||||
}
|
||||
}
|
||||
|
||||
vecMap &= ~(1UL << vecIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
return _transformFeedbackOutputs;
|
||||
}
|
||||
|
||||
public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
|
||||
return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
|
||||
}
|
||||
|
||||
private bool HasTransformFeedbackOutputs()
|
||||
{
|
||||
return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
|
||||
}
|
||||
|
||||
public bool HasTransformFeedbackOutputs(bool isOutput)
|
||||
{
|
||||
return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
|
||||
}
|
||||
|
||||
public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
|
||||
{
|
||||
if (ioVariable == IoVariable.UserDefined)
|
||||
{
|
||||
return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
|
||||
(isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing));
|
||||
}
|
||||
|
||||
return ioVariable == IoVariable.FragmentOutputColor;
|
||||
}
|
||||
|
||||
public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
|
||||
{
|
||||
if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return GetTransformFeedbackOutputComponents(location, component) == 1;
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
|
||||
return _transformFeedbackOutputs[wordOffset];
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
|
||||
{
|
||||
return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
|
||||
}
|
||||
|
||||
public int GetTransformFeedbackOutputComponents(int location, int component)
|
||||
{
|
||||
EnsureTransformFeedbackInitialized();
|
||||
|
||||
int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
|
||||
int index = baseIndex + component;
|
||||
int count = 1;
|
||||
|
||||
for (; count < 4; count++)
|
||||
{
|
||||
ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
|
||||
ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
|
||||
|
||||
int prevOffset = prev.Offset;
|
||||
int currOffset = curr.Offset;
|
||||
|
||||
if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (baseIndex + count <= index)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public AggregateType GetFragmentOutputColorType(int location)
|
||||
{
|
||||
return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType();
|
||||
}
|
||||
|
||||
public AggregateType GetUserDefinedType(int location, bool isOutput)
|
||||
{
|
||||
if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
|
||||
(isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing)))
|
||||
{
|
||||
return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
|
||||
}
|
||||
|
||||
AggregateType type = AggregateType.Vector4;
|
||||
|
||||
if (Stage == ShaderStage.Vertex && !isOutput)
|
||||
{
|
||||
type |= GpuAccessor.QueryAttributeType(location).ToAggregateType();
|
||||
}
|
||||
else
|
||||
{
|
||||
type |= AggregateType.FP32;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
public int GetDepthRegister()
|
||||
{
|
||||
// The depth register is always two registers after the last color output.
|
||||
return BitOperations.PopCount((uint)OmapTargets) + 1;
|
||||
}
|
||||
|
||||
public uint ConstantBuffer1Read(int offset)
|
||||
{
|
||||
if (Cb1DataSize < offset + 4)
|
||||
{
|
||||
Cb1DataSize = offset + 4;
|
||||
}
|
||||
|
||||
return GpuAccessor.ConstantBuffer1Read(offset);
|
||||
}
|
||||
|
||||
public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1)
|
||||
{
|
||||
// When the formatted load extension is supported, we don't need to
|
||||
// specify a format, we can just declare it without a format and the GPU will handle it.
|
||||
if (GpuAccessor.QueryHostSupportsImageLoadFormatted())
|
||||
{
|
||||
return TextureFormat.Unknown;
|
||||
}
|
||||
|
||||
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
|
||||
|
||||
if (format == TextureFormat.Unknown)
|
||||
{
|
||||
GpuAccessor.Log($"Unknown format for texture {handle}.");
|
||||
|
||||
format = TextureFormat.R8G8B8A8Unorm;
|
||||
}
|
||||
|
||||
return format;
|
||||
}
|
||||
|
||||
private static bool FormatSupportsAtomic(TextureFormat format)
|
||||
{
|
||||
return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
|
||||
}
|
||||
|
||||
public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
|
||||
{
|
||||
// Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
|
||||
// and must have a type specified. Default to R32Sint if not available.
|
||||
|
||||
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
|
||||
|
||||
if (!FormatSupportsAtomic(format))
|
||||
{
|
||||
GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
|
||||
|
||||
format = TextureFormat.R32Sint;
|
||||
}
|
||||
|
||||
return format;
|
||||
}
|
||||
|
||||
public void SizeAdd(int size)
|
||||
{
|
||||
Size += size;
|
||||
}
|
||||
|
||||
public void InheritFrom(ShaderConfig other)
|
||||
{
|
||||
ClipDistancesWritten |= other.ClipDistancesWritten;
|
||||
UsedFeatures |= other.UsedFeatures;
|
||||
|
||||
UsedInputAttributes |= other.UsedInputAttributes;
|
||||
UsedOutputAttributes |= other.UsedOutputAttributes;
|
||||
}
|
||||
|
||||
public void SetLayerOutputAttribute(int attr)
|
||||
{
|
||||
LayerOutputWritten = true;
|
||||
LayerOutputAttribute = attr;
|
||||
}
|
||||
|
||||
public void SetGeometryShaderLayerInputAttribute(int attr)
|
||||
{
|
||||
HasLayerInputAttribute = true;
|
||||
GpLayerInputAttribute = attr;
|
||||
}
|
||||
|
||||
public void SetLastInVertexPipeline()
|
||||
{
|
||||
LastInVertexPipeline = true;
|
||||
}
|
||||
|
||||
public void SetInputUserAttributeFixedFunc(int index)
|
||||
{
|
||||
UsedInputAttributes |= 1 << index;
|
||||
}
|
||||
|
||||
public void SetOutputUserAttributeFixedFunc(int index)
|
||||
{
|
||||
UsedOutputAttributes |= 1 << index;
|
||||
}
|
||||
|
||||
public void SetInputUserAttribute(int index, int component)
|
||||
{
|
||||
int mask = 1 << index;
|
||||
|
||||
UsedInputAttributes |= mask;
|
||||
_thisUsedInputAttributes |= mask;
|
||||
ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
|
||||
}
|
||||
|
||||
public void SetInputUserAttributePerPatch(int index)
|
||||
{
|
||||
UsedInputAttributesPerPatch.Add(index);
|
||||
}
|
||||
|
||||
public void SetOutputUserAttribute(int index)
|
||||
{
|
||||
UsedOutputAttributes |= 1 << index;
|
||||
}
|
||||
|
||||
public void SetOutputUserAttributePerPatch(int index)
|
||||
{
|
||||
UsedOutputAttributesPerPatch.Add(index);
|
||||
}
|
||||
|
||||
public void MergeFromtNextStage(ShaderConfig config)
|
||||
{
|
||||
NextInputAttributesComponents = config.ThisInputAttributesComponents;
|
||||
NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch;
|
||||
NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr);
|
||||
MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch);
|
||||
|
||||
if (UsedOutputAttributesPerPatch.Count != 0)
|
||||
{
|
||||
// Regular and per-patch input/output locations can't overlap,
|
||||
// so we must assign on our location using unused regular input/output locations.
|
||||
|
||||
Dictionary<int, int> locationsMap = new();
|
||||
|
||||
int freeMask = ~UsedOutputAttributes;
|
||||
|
||||
foreach (int attr in UsedOutputAttributesPerPatch)
|
||||
{
|
||||
int location = BitOperations.TrailingZeroCount(freeMask);
|
||||
if (location == 32)
|
||||
{
|
||||
config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
|
||||
break;
|
||||
}
|
||||
|
||||
locationsMap.Add(attr, location);
|
||||
freeMask &= ~(1 << location);
|
||||
}
|
||||
|
||||
// Both stages must agree on the locations, so use the same "map" for both.
|
||||
_perPatchAttributeLocations = locationsMap;
|
||||
config._perPatchAttributeLocations = locationsMap;
|
||||
}
|
||||
|
||||
// We don't consider geometry shaders using the geometry shader passthrough feature
|
||||
// as being the last because when this feature is used, it can't actually modify any of the outputs,
|
||||
// so the stage that comes before it is the last one that can do modifications.
|
||||
if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough))
|
||||
{
|
||||
LastInVertexPipeline = false;
|
||||
}
|
||||
}
|
||||
|
||||
public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch)
|
||||
{
|
||||
_nextUsedInputAttributes = mask;
|
||||
|
||||
if (GpPassthrough)
|
||||
{
|
||||
PassthroughAttributes = mask & ~UsedOutputAttributes;
|
||||
}
|
||||
else
|
||||
{
|
||||
UsedOutputAttributes |= mask;
|
||||
UsedOutputAttributesPerPatch.UnionWith(perPatch);
|
||||
}
|
||||
}
|
||||
|
||||
public int GetPerPatchAttributeLocation(int index)
|
||||
{
|
||||
if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
|
||||
{
|
||||
return index;
|
||||
}
|
||||
|
||||
return location;
|
||||
}
|
||||
|
||||
public bool IsUsedOutputAttribute(int attr)
|
||||
{
|
||||
// The check for fixed function attributes on the next stage is conservative,
|
||||
// returning false if the output is just not used by the next stage is also valid.
|
||||
if (NextUsesFixedFuncAttributes &&
|
||||
attr >= AttributeConsts.UserAttributeBase &&
|
||||
attr < AttributeConsts.UserAttributeEnd)
|
||||
{
|
||||
int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
|
||||
return (_nextUsedInputAttributes & (1 << index)) != 0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int GetFreeUserAttribute(bool isOutput, int index)
|
||||
{
|
||||
int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
|
||||
int bit = -1;
|
||||
|
||||
while (useMask != -1)
|
||||
{
|
||||
bit = BitOperations.TrailingZeroCount(~useMask);
|
||||
|
||||
if (bit == 32)
|
||||
{
|
||||
bit = -1;
|
||||
break;
|
||||
}
|
||||
else if (index < 1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
useMask |= 1 << bit;
|
||||
index--;
|
||||
}
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
public void SetAllInputUserAttributes()
|
||||
{
|
||||
UsedInputAttributes |= Constants.AllAttributesMask;
|
||||
ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
|
||||
}
|
||||
|
||||
public void SetAllOutputUserAttributes()
|
||||
{
|
||||
UsedOutputAttributes |= Constants.AllAttributesMask;
|
||||
}
|
||||
|
||||
public void SetClipDistanceWritten(int index)
|
||||
{
|
||||
ClipDistancesWritten |= (byte)(1 << index);
|
||||
}
|
||||
|
||||
public void SetUsedFeature(FeatureFlags flags)
|
||||
{
|
||||
UsedFeatures |= flags;
|
||||
}
|
||||
|
||||
public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
|
||||
{
|
||||
return new ShaderProgramInfo(
|
||||
ResourceManager.GetConstantBufferDescriptors(),
|
||||
ResourceManager.GetStorageBufferDescriptors(),
|
||||
ResourceManager.GetTextureDescriptors(),
|
||||
ResourceManager.GetImageDescriptors(),
|
||||
identification,
|
||||
GpLayerInputAttribute,
|
||||
Stage,
|
||||
UsedFeatures.HasFlag(FeatureFlags.FragCoordXY),
|
||||
UsedFeatures.HasFlag(FeatureFlags.InstanceId),
|
||||
UsedFeatures.HasFlag(FeatureFlags.DrawParameters),
|
||||
UsedFeatures.HasFlag(FeatureFlags.RtLayer),
|
||||
ClipDistancesWritten,
|
||||
OmapTargets);
|
||||
}
|
||||
}
|
||||
}
|
315
src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs
Normal file
315
src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs
Normal file
|
@ -0,0 +1,315 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
class ShaderDefinitions
|
||||
{
|
||||
private readonly GpuGraphicsState _graphicsState;
|
||||
|
||||
public ShaderStage Stage { get; }
|
||||
|
||||
public int ComputeLocalSizeX { get; }
|
||||
public int ComputeLocalSizeY { get; }
|
||||
public int ComputeLocalSizeZ { get; }
|
||||
|
||||
public bool TessCw => _graphicsState.TessCw;
|
||||
public TessPatchType TessPatchType => _graphicsState.TessPatchType;
|
||||
public TessSpacing TessSpacing => _graphicsState.TessSpacing;
|
||||
|
||||
public bool AlphaToCoverageDitherEnable => _graphicsState.AlphaToCoverageEnable && _graphicsState.AlphaToCoverageDitherEnable;
|
||||
public bool ViewportTransformDisable => _graphicsState.ViewportTransformDisable;
|
||||
|
||||
public bool DepthMode => _graphicsState.DepthMode;
|
||||
|
||||
public float PointSize => _graphicsState.PointSize;
|
||||
|
||||
public AlphaTestOp AlphaTestCompare => _graphicsState.AlphaTestCompare;
|
||||
public float AlphaTestReference => _graphicsState.AlphaTestReference;
|
||||
|
||||
public bool GpPassthrough { get; }
|
||||
public bool LastInVertexPipeline { get; set; }
|
||||
|
||||
public int ThreadsPerInputPrimitive { get; }
|
||||
|
||||
public InputTopology InputTopology => _graphicsState.Topology;
|
||||
public OutputTopology OutputTopology { get; }
|
||||
|
||||
public int MaxOutputVertices { get; }
|
||||
|
||||
public bool DualSourceBlend => _graphicsState.DualSourceBlendEnable;
|
||||
public bool EarlyZForce => _graphicsState.EarlyZForce;
|
||||
|
||||
public bool YNegateEnabled => _graphicsState.YNegateEnabled;
|
||||
public bool OriginUpperLeft => _graphicsState.OriginUpperLeft;
|
||||
|
||||
public ImapPixelType[] ImapTypes { get; }
|
||||
public bool IaIndexing { get; private set; }
|
||||
public bool OaIndexing { get; private set; }
|
||||
|
||||
public int OmapTargets { get; }
|
||||
public bool OmapSampleMask { get; }
|
||||
public bool OmapDepth { get; }
|
||||
|
||||
public bool TransformFeedbackEnabled { get; }
|
||||
|
||||
private readonly TransformFeedbackOutput[] _transformFeedbackOutputs;
|
||||
|
||||
readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
|
||||
{
|
||||
public IoVariable IoVariable { get; }
|
||||
public int Location { get; }
|
||||
public int Component { get; }
|
||||
|
||||
public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
|
||||
{
|
||||
IoVariable = ioVariable;
|
||||
Location = location;
|
||||
Component = component;
|
||||
}
|
||||
|
||||
public override bool Equals(object other)
|
||||
{
|
||||
return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
|
||||
}
|
||||
|
||||
public bool Equals(TransformFeedbackVariable other)
|
||||
{
|
||||
return IoVariable == other.IoVariable &&
|
||||
Location == other.Location &&
|
||||
Component == other.Component;
|
||||
}
|
||||
|
||||
public override int GetHashCode()
|
||||
{
|
||||
return (int)IoVariable | (Location << 8) | (Component << 16);
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return $"{IoVariable}.{Location}.{Component}";
|
||||
}
|
||||
}
|
||||
|
||||
private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
|
||||
|
||||
public ShaderDefinitions(ShaderStage stage)
|
||||
{
|
||||
Stage = stage;
|
||||
}
|
||||
|
||||
public ShaderDefinitions(
|
||||
ShaderStage stage,
|
||||
int computeLocalSizeX,
|
||||
int computeLocalSizeY,
|
||||
int computeLocalSizeZ)
|
||||
{
|
||||
Stage = stage;
|
||||
ComputeLocalSizeX = computeLocalSizeX;
|
||||
ComputeLocalSizeY = computeLocalSizeY;
|
||||
ComputeLocalSizeZ = computeLocalSizeZ;
|
||||
}
|
||||
|
||||
public ShaderDefinitions(
|
||||
ShaderStage stage,
|
||||
GpuGraphicsState graphicsState,
|
||||
bool gpPassthrough,
|
||||
int threadsPerInputPrimitive,
|
||||
OutputTopology outputTopology,
|
||||
int maxOutputVertices)
|
||||
{
|
||||
Stage = stage;
|
||||
_graphicsState = graphicsState;
|
||||
GpPassthrough = gpPassthrough;
|
||||
ThreadsPerInputPrimitive = threadsPerInputPrimitive;
|
||||
OutputTopology = outputTopology;
|
||||
MaxOutputVertices = maxOutputVertices;
|
||||
}
|
||||
|
||||
public ShaderDefinitions(
|
||||
ShaderStage stage,
|
||||
GpuGraphicsState graphicsState,
|
||||
bool gpPassthrough,
|
||||
int threadsPerInputPrimitive,
|
||||
OutputTopology outputTopology,
|
||||
int maxOutputVertices,
|
||||
ImapPixelType[] imapTypes,
|
||||
int omapTargets,
|
||||
bool omapSampleMask,
|
||||
bool omapDepth,
|
||||
bool transformFeedbackEnabled,
|
||||
ulong transformFeedbackVecMap,
|
||||
TransformFeedbackOutput[] transformFeedbackOutputs)
|
||||
{
|
||||
Stage = stage;
|
||||
_graphicsState = graphicsState;
|
||||
GpPassthrough = gpPassthrough;
|
||||
ThreadsPerInputPrimitive = threadsPerInputPrimitive;
|
||||
OutputTopology = outputTopology;
|
||||
MaxOutputVertices = maxOutputVertices;
|
||||
ImapTypes = imapTypes;
|
||||
OmapTargets = omapTargets;
|
||||
OmapSampleMask = omapSampleMask;
|
||||
OmapDepth = omapDepth;
|
||||
LastInVertexPipeline = stage < ShaderStage.Fragment;
|
||||
TransformFeedbackEnabled = transformFeedbackEnabled;
|
||||
_transformFeedbackOutputs = transformFeedbackOutputs;
|
||||
_transformFeedbackDefinitions = new();
|
||||
|
||||
while (transformFeedbackVecMap != 0)
|
||||
{
|
||||
int vecIndex = BitOperations.TrailingZeroCount(transformFeedbackVecMap);
|
||||
|
||||
for (int subIndex = 0; subIndex < 4; subIndex++)
|
||||
{
|
||||
int wordOffset = vecIndex * 4 + subIndex;
|
||||
int byteOffset = wordOffset * 4;
|
||||
|
||||
if (transformFeedbackOutputs[wordOffset].Valid)
|
||||
{
|
||||
IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
|
||||
int component = 0;
|
||||
|
||||
if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
|
||||
{
|
||||
component = subIndex;
|
||||
}
|
||||
|
||||
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
|
||||
_transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
|
||||
}
|
||||
}
|
||||
|
||||
transformFeedbackVecMap &= ~(1UL << vecIndex);
|
||||
}
|
||||
}
|
||||
|
||||
public void EnableInputIndexing()
|
||||
{
|
||||
IaIndexing = true;
|
||||
}
|
||||
|
||||
public void EnableOutputIndexing()
|
||||
{
|
||||
OaIndexing = true;
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
|
||||
{
|
||||
if (!HasTransformFeedbackOutputs())
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
return _transformFeedbackOutputs;
|
||||
}
|
||||
|
||||
public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
|
||||
{
|
||||
if (!HasTransformFeedbackOutputs())
|
||||
{
|
||||
transformFeedbackOutput = default;
|
||||
return false;
|
||||
}
|
||||
|
||||
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
|
||||
return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
|
||||
}
|
||||
|
||||
private bool HasTransformFeedbackOutputs()
|
||||
{
|
||||
return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
|
||||
}
|
||||
|
||||
public bool HasTransformFeedbackOutputs(bool isOutput)
|
||||
{
|
||||
return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
|
||||
}
|
||||
|
||||
public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
|
||||
{
|
||||
if (ioVariable == IoVariable.UserDefined)
|
||||
{
|
||||
return (!isOutput && !IaIndexing) || (isOutput && !OaIndexing);
|
||||
}
|
||||
|
||||
return ioVariable == IoVariable.FragmentOutputColor;
|
||||
}
|
||||
|
||||
public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
|
||||
{
|
||||
if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return GetTransformFeedbackOutputComponents(location, component) == 1;
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
|
||||
{
|
||||
return _transformFeedbackOutputs[wordOffset];
|
||||
}
|
||||
|
||||
public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
|
||||
{
|
||||
return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
|
||||
}
|
||||
|
||||
public int GetTransformFeedbackOutputComponents(int location, int component)
|
||||
{
|
||||
int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
|
||||
int index = baseIndex + component;
|
||||
int count = 1;
|
||||
|
||||
for (; count < 4; count++)
|
||||
{
|
||||
ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
|
||||
ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
|
||||
|
||||
int prevOffset = prev.Offset;
|
||||
int currOffset = curr.Offset;
|
||||
|
||||
if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (baseIndex + count <= index)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public AggregateType GetFragmentOutputColorType(int location)
|
||||
{
|
||||
return AggregateType.Vector4 | _graphicsState.FragmentOutputTypes[location].ToAggregateType();
|
||||
}
|
||||
|
||||
public AggregateType GetUserDefinedType(int location, bool isOutput)
|
||||
{
|
||||
if ((!isOutput && IaIndexing) || (isOutput && OaIndexing))
|
||||
{
|
||||
return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
|
||||
}
|
||||
|
||||
AggregateType type = AggregateType.Vector4;
|
||||
|
||||
if (Stage == ShaderStage.Vertex && !isOutput)
|
||||
{
|
||||
type |= _graphicsState.AttributeTypes[location].ToAggregateType();
|
||||
}
|
||||
else
|
||||
{
|
||||
type |= AggregateType.FP32;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -5,18 +5,22 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
static class ShaderIdentifier
|
||||
{
|
||||
public static ShaderIdentification Identify(IReadOnlyList<Function> functions, ShaderConfig config)
|
||||
public static ShaderIdentification Identify(
|
||||
IReadOnlyList<Function> functions,
|
||||
IGpuAccessor gpuAccessor,
|
||||
ShaderStage stage,
|
||||
InputTopology inputTopology,
|
||||
out int layerInputAttr)
|
||||
{
|
||||
if (config.Stage == ShaderStage.Geometry &&
|
||||
config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles &&
|
||||
!config.GpuAccessor.QueryHostSupportsGeometryShader() &&
|
||||
IsLayerPassthroughGeometryShader(functions, out int layerInputAttr))
|
||||
if (stage == ShaderStage.Geometry &&
|
||||
inputTopology == InputTopology.Triangles &&
|
||||
!gpuAccessor.QueryHostSupportsGeometryShader() &&
|
||||
IsLayerPassthroughGeometryShader(functions, out layerInputAttr))
|
||||
{
|
||||
config.SetGeometryShaderLayerInputAttribute(layerInputAttr);
|
||||
|
||||
return ShaderIdentification.GeometryLayerPassthrough;
|
||||
}
|
||||
|
||||
layerInputAttr = 0;
|
||||
return ShaderIdentification.None;
|
||||
}
|
||||
|
||||
|
|
33
src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs
Normal file
33
src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs
Normal file
|
@ -0,0 +1,33 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
readonly ref struct TransformContext
|
||||
{
|
||||
public readonly HelperFunctionManager Hfm;
|
||||
public readonly BasicBlock[] Blocks;
|
||||
public readonly ResourceManager ResourceManager;
|
||||
public readonly IGpuAccessor GpuAccessor;
|
||||
public readonly TargetLanguage TargetLanguage;
|
||||
public readonly ShaderStage Stage;
|
||||
public readonly ref FeatureFlags UsedFeatures;
|
||||
|
||||
public TransformContext(
|
||||
HelperFunctionManager hfm,
|
||||
BasicBlock[] blocks,
|
||||
ResourceManager resourceManager,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TargetLanguage targetLanguage,
|
||||
ShaderStage stage,
|
||||
ref FeatureFlags usedFeatures)
|
||||
{
|
||||
Hfm = hfm;
|
||||
Blocks = blocks;
|
||||
ResourceManager = resourceManager;
|
||||
GpuAccessor = gpuAccessor;
|
||||
TargetLanguage = targetLanguage;
|
||||
Stage = stage;
|
||||
UsedFeatures = ref usedFeatures;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
{
|
||||
readonly struct TransformFeedbackOutput
|
||||
{
|
||||
public readonly bool Valid;
|
||||
public readonly int Buffer;
|
||||
public readonly int Offset;
|
||||
public readonly int Stride;
|
||||
|
||||
public TransformFeedbackOutput(int buffer, int offset, int stride)
|
||||
{
|
||||
Valid = true;
|
||||
Buffer = buffer;
|
||||
Offset = offset;
|
||||
Stride = stride;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
class DrawParametersReplace : ITransformPass
|
||||
{
|
||||
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
|
||||
{
|
||||
return stage == ShaderStage.Vertex;
|
||||
}
|
||||
|
||||
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
if (context.GpuAccessor.QueryHasConstantBufferDrawParameters())
|
||||
{
|
||||
if (ReplaceConstantBufferWithDrawParameters(node, operation))
|
||||
{
|
||||
context.UsedFeatures |= FeatureFlags.DrawParameters;
|
||||
}
|
||||
}
|
||||
else if (HasConstantBufferDrawParameters(operation))
|
||||
{
|
||||
context.UsedFeatures |= FeatureFlags.DrawParameters;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
|
||||
{
|
||||
Operand GenerateLoad(IoVariable ioVariable)
|
||||
{
|
||||
Operand value = Local();
|
||||
node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
|
||||
return value;
|
||||
}
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
Operand src = operation.GetSource(srcIndex);
|
||||
|
||||
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
|
||||
{
|
||||
switch (src.GetCbufOffset())
|
||||
{
|
||||
case Constants.NvnBaseVertexByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
|
||||
modified = true;
|
||||
break;
|
||||
case Constants.NvnBaseInstanceByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
|
||||
modified = true;
|
||||
break;
|
||||
case Constants.NvnDrawIndexByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
|
||||
modified = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
private static bool HasConstantBufferDrawParameters(Operation operation)
|
||||
{
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
Operand src = operation.GetSource(srcIndex);
|
||||
|
||||
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
|
||||
{
|
||||
switch (src.GetCbufOffset())
|
||||
{
|
||||
case Constants.NvnBaseVertexByteOffset / 4:
|
||||
case Constants.NvnBaseInstanceByteOffset / 4:
|
||||
case Constants.NvnDrawIndexByteOffset / 4:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
class ForcePreciseEnable : ITransformPass
|
||||
{
|
||||
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
|
||||
{
|
||||
return stage == ShaderStage.Fragment && gpuAccessor.QueryHostReducedPrecision();
|
||||
}
|
||||
|
||||
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
|
||||
{
|
||||
// There are some cases where a small bias is added to values to prevent division by zero.
|
||||
// When operating with reduced precision, it is possible for this bias to get rounded to 0
|
||||
// and cause a division by zero.
|
||||
// To prevent that, we force those operations to be precise even if the host wants
|
||||
// imprecise operations for performance.
|
||||
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
if (operation.Inst == (Instruction.FP32 | Instruction.Divide) &&
|
||||
operation.GetSource(0).Type == OperandType.Constant &&
|
||||
operation.GetSource(0).AsFloat() == 1f &&
|
||||
operation.GetSource(1).AsgOp is Operation addOp &&
|
||||
addOp.Inst == (Instruction.FP32 | Instruction.Add) &&
|
||||
addOp.GetSource(1).Type == OperandType.Constant)
|
||||
{
|
||||
addOp.ForcePrecise = true;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
interface ITransformPass
|
||||
{
|
||||
abstract static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures);
|
||||
abstract static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.Translation.Optimizations;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
class SharedAtomicSignedCas : ITransformPass
|
||||
{
|
||||
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
|
||||
{
|
||||
return targetLanguage != TargetLanguage.Spirv && stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory);
|
||||
}
|
||||
|
||||
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
HelperFunctionName name;
|
||||
|
||||
if (operation.Inst == Instruction.AtomicMaxS32)
|
||||
{
|
||||
name = HelperFunctionName.SharedAtomicMaxS32;
|
||||
}
|
||||
else if (operation.Inst == Instruction.AtomicMinS32)
|
||||
{
|
||||
name = HelperFunctionName.SharedAtomicMinS32;
|
||||
}
|
||||
else
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
if (operation.StorageKind != StorageKind.SharedMemory)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
Operand result = operation.Dest;
|
||||
Operand memoryId = operation.GetSource(0);
|
||||
Operand byteOffset = operation.GetSource(1);
|
||||
Operand value = operation.GetSource(2);
|
||||
|
||||
Debug.Assert(memoryId.Type == OperandType.Constant);
|
||||
|
||||
int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value);
|
||||
|
||||
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
|
||||
|
||||
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
|
||||
|
||||
Utils.DeleteNode(node, operation);
|
||||
|
||||
return newNode;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.Translation.Optimizations;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
class SharedStoreSmallIntCas : ITransformPass
|
||||
{
|
||||
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
|
||||
{
|
||||
return stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory);
|
||||
}
|
||||
|
||||
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
HelperFunctionName name;
|
||||
|
||||
if (operation.StorageKind == StorageKind.SharedMemory8)
|
||||
{
|
||||
name = HelperFunctionName.SharedStore8;
|
||||
}
|
||||
else if (operation.StorageKind == StorageKind.SharedMemory16)
|
||||
{
|
||||
name = HelperFunctionName.SharedStore16;
|
||||
}
|
||||
else
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
if (operation.Inst != Instruction.Store)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
Operand memoryId = operation.GetSource(0);
|
||||
Operand byteOffset = operation.GetSource(1);
|
||||
Operand value = operation.GetSource(2);
|
||||
|
||||
Debug.Assert(memoryId.Type == OperandType.Constant);
|
||||
|
||||
int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value);
|
||||
|
||||
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
|
||||
|
||||
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
|
||||
|
||||
Utils.DeleteNode(node, operation);
|
||||
|
||||
return newNode;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,268 +1,45 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using Ryujinx.Graphics.Shader.Translation.Optimizations;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
static class Rewriter
|
||||
class TexturePass : ITransformPass
|
||||
{
|
||||
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
|
||||
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
|
||||
{
|
||||
bool isVertexShader = config.Stage == ShaderStage.Vertex;
|
||||
bool isImpreciseFragmentShader = config.Stage == ShaderStage.Fragment && config.GpuAccessor.QueryHostReducedPrecision();
|
||||
bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
|
||||
bool hasVectorIndexingBug = config.GpuAccessor.QueryHostHasVectorIndexingBug();
|
||||
bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
|
||||
return true;
|
||||
}
|
||||
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
|
||||
{
|
||||
if (node.Value is TextureOperation texOp)
|
||||
{
|
||||
BasicBlock block = blocks[blkIndex];
|
||||
node = InsertTexelFetchScale(context.Hfm, node, context.ResourceManager, context.Stage);
|
||||
node = InsertTextureSizeUnscale(context.Hfm, node, context.ResourceManager, context.Stage);
|
||||
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
if (texOp.Inst == Instruction.TextureSample)
|
||||
{
|
||||
if (node.Value is not Operation operation)
|
||||
node = InsertCoordNormalization(context.Hfm, node, context.ResourceManager, context.GpuAccessor, context.Stage);
|
||||
node = InsertCoordGatherBias(node, context.ResourceManager, context.GpuAccessor);
|
||||
node = InsertConstOffsets(node, context.ResourceManager, context.GpuAccessor);
|
||||
|
||||
if (texOp.Type == SamplerType.TextureBuffer && !context.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isVertexShader)
|
||||
{
|
||||
if (hasConstantBufferDrawParameters)
|
||||
{
|
||||
if (ReplaceConstantBufferWithDrawParameters(node, operation))
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.DrawParameters);
|
||||
}
|
||||
}
|
||||
else if (HasConstantBufferDrawParameters(operation))
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.DrawParameters);
|
||||
}
|
||||
}
|
||||
|
||||
if (isImpreciseFragmentShader)
|
||||
{
|
||||
EnableForcePreciseIfNeeded(operation);
|
||||
}
|
||||
|
||||
if (hasVectorIndexingBug)
|
||||
{
|
||||
InsertVectorComponentSelect(node, config);
|
||||
}
|
||||
|
||||
if (operation is TextureOperation texOp)
|
||||
{
|
||||
node = InsertTexelFetchScale(hfm, node, config);
|
||||
node = InsertTextureSizeUnscale(hfm, node, config);
|
||||
|
||||
if (texOp.Inst == Instruction.TextureSample)
|
||||
{
|
||||
node = InsertCoordNormalization(hfm, node, config);
|
||||
node = InsertCoordGatherBias(node, config);
|
||||
node = InsertConstOffsets(node, config);
|
||||
|
||||
if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
|
||||
{
|
||||
node = InsertSnormNormalization(node, config);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
node = InsertSharedStoreSmallInt(hfm, node);
|
||||
|
||||
if (config.Options.TargetLanguage != TargetLanguage.Spirv)
|
||||
{
|
||||
node = InsertSharedAtomicSigned(hfm, node);
|
||||
}
|
||||
node = InsertSnormNormalization(node, context.ResourceManager, context.GpuAccessor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static void EnableForcePreciseIfNeeded(Operation operation)
|
||||
{
|
||||
// There are some cases where a small bias is added to values to prevent division by zero.
|
||||
// When operating with reduced precision, it is possible for this bias to get rounded to 0
|
||||
// and cause a division by zero.
|
||||
// To prevent that, we force those operations to be precise even if the host wants
|
||||
// imprecise operations for performance.
|
||||
|
||||
if (operation.Inst == (Instruction.FP32 | Instruction.Divide) &&
|
||||
operation.GetSource(0).Type == OperandType.Constant &&
|
||||
operation.GetSource(0).AsFloat() == 1f &&
|
||||
operation.GetSource(1).AsgOp is Operation addOp &&
|
||||
addOp.Inst == (Instruction.FP32 | Instruction.Add) &&
|
||||
addOp.GetSource(1).Type == OperandType.Constant)
|
||||
{
|
||||
addOp.ForcePrecise = true;
|
||||
}
|
||||
}
|
||||
|
||||
private static void InsertVectorComponentSelect(LinkedListNode<INode> node, ShaderConfig config)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
if (operation.Inst != Instruction.Load ||
|
||||
operation.StorageKind != StorageKind.ConstantBuffer ||
|
||||
operation.SourcesCount < 3)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand bindingIndex = operation.GetSource(0);
|
||||
Operand fieldIndex = operation.GetSource(1);
|
||||
Operand elemIndex = operation.GetSource(operation.SourcesCount - 1);
|
||||
|
||||
if (bindingIndex.Type != OperandType.Constant ||
|
||||
fieldIndex.Type != OperandType.Constant ||
|
||||
elemIndex.Type == OperandType.Constant)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
BufferDefinition buffer = config.Properties.ConstantBuffers[bindingIndex.Value];
|
||||
StructureField field = buffer.Type.Fields[fieldIndex.Value];
|
||||
|
||||
int elemCount = (field.Type & AggregateType.ElementCountMask) switch
|
||||
{
|
||||
AggregateType.Vector2 => 2,
|
||||
AggregateType.Vector3 => 3,
|
||||
AggregateType.Vector4 => 4,
|
||||
_ => 1,
|
||||
};
|
||||
|
||||
if (elemCount == 1)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Operand result = null;
|
||||
|
||||
for (int i = 0; i < elemCount; i++)
|
||||
{
|
||||
Operand value = Local();
|
||||
Operand[] inputs = new Operand[operation.SourcesCount];
|
||||
|
||||
for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++)
|
||||
{
|
||||
inputs[srcIndex] = operation.GetSource(srcIndex);
|
||||
}
|
||||
|
||||
inputs[^1] = Const(i);
|
||||
|
||||
Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs);
|
||||
|
||||
node.List.AddBefore(node, loadOp);
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
result = value;
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand isCurrentIndex = Local();
|
||||
Operand selection = Local();
|
||||
|
||||
Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) });
|
||||
Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result });
|
||||
|
||||
node.List.AddBefore(node, compareOp);
|
||||
node.List.AddBefore(node, selectOp);
|
||||
|
||||
result = selection;
|
||||
}
|
||||
}
|
||||
|
||||
operation.TurnIntoCopy(result);
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertSharedStoreSmallInt(HelperFunctionManager hfm, LinkedListNode<INode> node)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
HelperFunctionName name;
|
||||
|
||||
if (operation.StorageKind == StorageKind.SharedMemory8)
|
||||
{
|
||||
name = HelperFunctionName.SharedStore8;
|
||||
}
|
||||
else if (operation.StorageKind == StorageKind.SharedMemory16)
|
||||
{
|
||||
name = HelperFunctionName.SharedStore16;
|
||||
}
|
||||
else
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
if (operation.Inst != Instruction.Store)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
Operand memoryId = operation.GetSource(0);
|
||||
Operand byteOffset = operation.GetSource(1);
|
||||
Operand value = operation.GetSource(2);
|
||||
|
||||
Debug.Assert(memoryId.Type == OperandType.Constant);
|
||||
|
||||
int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
|
||||
|
||||
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
|
||||
|
||||
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
|
||||
|
||||
Utils.DeleteNode(node, operation);
|
||||
|
||||
return newNode;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertSharedAtomicSigned(HelperFunctionManager hfm, LinkedListNode<INode> node)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
HelperFunctionName name;
|
||||
|
||||
if (operation.Inst == Instruction.AtomicMaxS32)
|
||||
{
|
||||
name = HelperFunctionName.SharedAtomicMaxS32;
|
||||
}
|
||||
else if (operation.Inst == Instruction.AtomicMinS32)
|
||||
{
|
||||
name = HelperFunctionName.SharedAtomicMinS32;
|
||||
}
|
||||
else
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
if (operation.StorageKind != StorageKind.SharedMemory)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
Operand result = operation.Dest;
|
||||
Operand memoryId = operation.GetSource(0);
|
||||
Operand byteOffset = operation.GetSource(1);
|
||||
Operand value = operation.GetSource(2);
|
||||
|
||||
Debug.Assert(memoryId.Type == OperandType.Constant);
|
||||
|
||||
int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
|
||||
|
||||
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
|
||||
|
||||
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
|
||||
|
||||
Utils.DeleteNode(node, operation);
|
||||
|
||||
return newNode;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
|
||||
private static LinkedListNode<INode> InsertTexelFetchScale(
|
||||
HelperFunctionManager hfm,
|
||||
LinkedListNode<INode> node,
|
||||
ResourceManager resourceManager,
|
||||
ShaderStage stage)
|
||||
{
|
||||
TextureOperation texOp = (TextureOperation)node.Value;
|
||||
|
||||
|
@ -280,20 +57,20 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
(intCoords || isImage) &&
|
||||
!isBindless &&
|
||||
!isIndexed &&
|
||||
config.Stage.SupportsRenderScale() &&
|
||||
stage.SupportsRenderScale() &&
|
||||
TypeSupportsScale(texOp.Type))
|
||||
{
|
||||
int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TexelFetchScale);
|
||||
int samplerIndex = isImage
|
||||
? config.ResourceManager.GetTextureDescriptors().Length + config.ResourceManager.FindImageDescriptorIndex(texOp.Binding)
|
||||
: config.ResourceManager.FindTextureDescriptorIndex(texOp.Binding);
|
||||
? resourceManager.GetTextureDescriptors().Length + resourceManager.FindImageDescriptorIndex(texOp.Binding)
|
||||
: resourceManager.FindTextureDescriptorIndex(texOp.Binding);
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
Operand scaledCoord = Local();
|
||||
Operand[] callArgs;
|
||||
|
||||
if (config.Stage == ShaderStage.Fragment)
|
||||
if (stage == ShaderStage.Fragment)
|
||||
{
|
||||
callArgs = new Operand[] { Const(functionId), texOp.GetSource(coordsIndex + index), Const(samplerIndex), Const(index) };
|
||||
}
|
||||
|
@ -311,7 +88,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return node;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertTextureSizeUnscale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
|
||||
private static LinkedListNode<INode> InsertTextureSizeUnscale(
|
||||
HelperFunctionManager hfm,
|
||||
LinkedListNode<INode> node,
|
||||
ResourceManager resourceManager,
|
||||
ShaderStage stage)
|
||||
{
|
||||
TextureOperation texOp = (TextureOperation)node.Value;
|
||||
|
||||
|
@ -322,11 +103,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
texOp.Index < 2 &&
|
||||
!isBindless &&
|
||||
!isIndexed &&
|
||||
config.Stage.SupportsRenderScale() &&
|
||||
stage.SupportsRenderScale() &&
|
||||
TypeSupportsScale(texOp.Type))
|
||||
{
|
||||
int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TextureSizeUnscale);
|
||||
int samplerIndex = config.ResourceManager.FindTextureDescriptorIndex(texOp.Binding);
|
||||
int samplerIndex = resourceManager.FindTextureDescriptorIndex(texOp.Binding);
|
||||
|
||||
for (int index = texOp.DestsCount - 1; index >= 0; index--)
|
||||
{
|
||||
|
@ -356,19 +137,12 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return node;
|
||||
}
|
||||
|
||||
private static bool IsImageInstructionWithScale(Instruction inst)
|
||||
{
|
||||
// Currently, we don't support scaling images that are modified,
|
||||
// so we only need to care about the load instruction.
|
||||
return inst == Instruction.ImageLoad;
|
||||
}
|
||||
|
||||
private static bool TypeSupportsScale(SamplerType type)
|
||||
{
|
||||
return (type & SamplerType.Mask) == SamplerType.Texture2D;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertCoordNormalization(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
|
||||
private static LinkedListNode<INode> InsertCoordNormalization(
|
||||
HelperFunctionManager hfm,
|
||||
LinkedListNode<INode> node,
|
||||
ResourceManager resourceManager,
|
||||
IGpuAccessor gpuAccessor,
|
||||
ShaderStage stage)
|
||||
{
|
||||
// Emulate non-normalized coordinates by normalizing the coordinates on the shader.
|
||||
// Without normalization, the coordinates are expected to the in the [0, W or H] range,
|
||||
|
@ -386,9 +160,9 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
|
||||
|
||||
(int cbufSlot, int handle) = config.ResourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
|
||||
(int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
|
||||
|
||||
bool isCoordNormalized = config.GpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot);
|
||||
bool isCoordNormalized = gpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot);
|
||||
|
||||
if (isCoordNormalized || intCoords)
|
||||
{
|
||||
|
@ -400,8 +174,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
int coordsCount = texOp.Type.GetDimensions();
|
||||
int coordsIndex = isBindless || isIndexed ? 1 : 0;
|
||||
|
||||
config.SetUsedFeature(FeatureFlags.IntegerSampling);
|
||||
|
||||
int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
|
||||
|
||||
for (int index = 0; index < normCoordsCount; index++)
|
||||
|
@ -429,7 +201,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
new[] { coordSize },
|
||||
texSizeSources));
|
||||
|
||||
config.ResourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type);
|
||||
resourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type);
|
||||
|
||||
Operand source = texOp.GetSource(coordsIndex + index);
|
||||
|
||||
|
@ -439,13 +211,13 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
texOp.SetSource(coordsIndex + index, coordNormalized);
|
||||
|
||||
InsertTextureSizeUnscale(hfm, textureSizeNode, config);
|
||||
InsertTextureSizeUnscale(hfm, textureSizeNode, resourceManager, stage);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ShaderConfig config)
|
||||
private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
|
||||
{
|
||||
// The gather behavior when the coordinate sits right in the middle of two texels is not well defined.
|
||||
// To ensure the correct texel is sampled, we add a small bias value to the coordinate.
|
||||
|
@ -457,25 +229,18 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
|
||||
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
|
||||
|
||||
int gatherBiasPrecision = config.GpuAccessor.QueryHostGatherBiasPrecision();
|
||||
int gatherBiasPrecision = gpuAccessor.QueryHostGatherBiasPrecision();
|
||||
|
||||
if (!isGather || gatherBiasPrecision == 0)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
#pragma warning disable IDE0059 // Remove unnecessary value assignment
|
||||
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
|
||||
|
||||
bool isArray = (texOp.Type & SamplerType.Array) != 0;
|
||||
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
|
||||
#pragma warning restore IDE0059
|
||||
|
||||
int coordsCount = texOp.Type.GetDimensions();
|
||||
int coordsIndex = isBindless || isIndexed ? 1 : 0;
|
||||
|
||||
config.SetUsedFeature(FeatureFlags.IntegerSampling);
|
||||
|
||||
int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
|
||||
|
||||
for (int index = 0; index < normCoordsCount; index++)
|
||||
|
@ -524,7 +289,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return node;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ShaderConfig config)
|
||||
private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
|
||||
{
|
||||
// Non-constant texture offsets are not allowed (according to the spec),
|
||||
// however some GPUs does support that.
|
||||
|
@ -540,7 +305,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
|
||||
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
|
||||
|
||||
bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
|
||||
bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset();
|
||||
|
||||
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
|
||||
|
||||
|
@ -673,8 +438,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
if (isGather && !isShadow)
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.IntegerSampling);
|
||||
|
||||
Operand[] newSources = new Operand[sources.Length];
|
||||
|
||||
sources.CopyTo(newSources, 0);
|
||||
|
@ -741,8 +504,6 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
}
|
||||
else
|
||||
{
|
||||
config.SetUsedFeature(FeatureFlags.IntegerSampling);
|
||||
|
||||
Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount);
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
|
@ -840,7 +601,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return texSizes;
|
||||
}
|
||||
|
||||
private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
|
||||
private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
|
||||
{
|
||||
TextureOperation texOp = (TextureOperation)node.Value;
|
||||
|
||||
|
@ -851,9 +612,9 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return node;
|
||||
}
|
||||
|
||||
(int cbufSlot, int handle) = config.ResourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
|
||||
(int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
|
||||
|
||||
TextureFormat format = config.GpuAccessor.QueryTextureFormat(handle, cbufSlot);
|
||||
TextureFormat format = gpuAccessor.QueryTextureFormat(handle, cbufSlot);
|
||||
|
||||
int maxPositive = format switch
|
||||
{
|
||||
|
@ -926,63 +687,16 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return res;
|
||||
}
|
||||
|
||||
private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
|
||||
private static bool IsImageInstructionWithScale(Instruction inst)
|
||||
{
|
||||
Operand GenerateLoad(IoVariable ioVariable)
|
||||
{
|
||||
Operand value = Local();
|
||||
node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
|
||||
return value;
|
||||
}
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
Operand src = operation.GetSource(srcIndex);
|
||||
|
||||
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
|
||||
{
|
||||
switch (src.GetCbufOffset())
|
||||
{
|
||||
case Constants.NvnBaseVertexByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
|
||||
modified = true;
|
||||
break;
|
||||
case Constants.NvnBaseInstanceByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
|
||||
modified = true;
|
||||
break;
|
||||
case Constants.NvnDrawIndexByteOffset / 4:
|
||||
operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
|
||||
modified = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return modified;
|
||||
// Currently, we don't support scaling images that are modified,
|
||||
// so we only need to care about the load instruction.
|
||||
return inst == Instruction.ImageLoad;
|
||||
}
|
||||
|
||||
private static bool HasConstantBufferDrawParameters(Operation operation)
|
||||
private static bool TypeSupportsScale(SamplerType type)
|
||||
{
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
Operand src = operation.GetSource(srcIndex);
|
||||
|
||||
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
|
||||
{
|
||||
switch (src.GetCbufOffset())
|
||||
{
|
||||
case Constants.NvnBaseVertexByteOffset / 4:
|
||||
case Constants.NvnBaseInstanceByteOffset / 4:
|
||||
case Constants.NvnDrawIndexByteOffset / 4:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return (type & SamplerType.Mask) == SamplerType.Texture2D;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
static class TransformPasses
|
||||
{
|
||||
public static void RunPass(TransformContext context)
|
||||
{
|
||||
RunPass<DrawParametersReplace>(context);
|
||||
RunPass<ForcePreciseEnable>(context);
|
||||
RunPass<VectorComponentSelect>(context);
|
||||
RunPass<TexturePass>(context);
|
||||
RunPass<SharedStoreSmallIntCas>(context);
|
||||
RunPass<SharedAtomicSignedCas>(context);
|
||||
}
|
||||
|
||||
private static void RunPass<T>(TransformContext context) where T : ITransformPass
|
||||
{
|
||||
if (!T.IsEnabled(context.GpuAccessor, context.Stage, context.TargetLanguage, context.UsedFeatures))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++)
|
||||
{
|
||||
BasicBlock block = context.Blocks[blkIndex];
|
||||
|
||||
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
|
||||
{
|
||||
if (node.Value is not Operation)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
node = T.RunPass(context, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using System.Collections.Generic;
|
||||
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
||||
{
|
||||
class VectorComponentSelect : ITransformPass
|
||||
{
|
||||
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
|
||||
{
|
||||
return gpuAccessor.QueryHostHasVectorIndexingBug();
|
||||
}
|
||||
|
||||
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
|
||||
{
|
||||
Operation operation = (Operation)node.Value;
|
||||
|
||||
if (operation.Inst != Instruction.Load ||
|
||||
operation.StorageKind != StorageKind.ConstantBuffer ||
|
||||
operation.SourcesCount < 3)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
Operand bindingIndex = operation.GetSource(0);
|
||||
Operand fieldIndex = operation.GetSource(1);
|
||||
Operand elemIndex = operation.GetSource(operation.SourcesCount - 1);
|
||||
|
||||
if (bindingIndex.Type != OperandType.Constant ||
|
||||
fieldIndex.Type != OperandType.Constant ||
|
||||
elemIndex.Type == OperandType.Constant)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
BufferDefinition buffer = context.ResourceManager.Properties.ConstantBuffers[bindingIndex.Value];
|
||||
StructureField field = buffer.Type.Fields[fieldIndex.Value];
|
||||
|
||||
int elemCount = (field.Type & AggregateType.ElementCountMask) switch
|
||||
{
|
||||
AggregateType.Vector2 => 2,
|
||||
AggregateType.Vector3 => 3,
|
||||
AggregateType.Vector4 => 4,
|
||||
_ => 1
|
||||
};
|
||||
|
||||
if (elemCount == 1)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
|
||||
Operand result = null;
|
||||
|
||||
for (int i = 0; i < elemCount; i++)
|
||||
{
|
||||
Operand value = Local();
|
||||
Operand[] inputs = new Operand[operation.SourcesCount];
|
||||
|
||||
for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++)
|
||||
{
|
||||
inputs[srcIndex] = operation.GetSource(srcIndex);
|
||||
}
|
||||
|
||||
inputs[^1] = Const(i);
|
||||
|
||||
Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs);
|
||||
|
||||
node.List.AddBefore(node, loadOp);
|
||||
|
||||
if (i == 0)
|
||||
{
|
||||
result = value;
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand isCurrentIndex = Local();
|
||||
Operand selection = Local();
|
||||
|
||||
Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) });
|
||||
Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result });
|
||||
|
||||
node.List.AddBefore(node, compareOp);
|
||||
node.List.AddBefore(node, selectOp);
|
||||
|
||||
result = selection;
|
||||
}
|
||||
}
|
||||
|
||||
operation.TurnIntoCopy(result);
|
||||
|
||||
return node;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,11 +1,6 @@
|
|||
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
|
||||
using Ryujinx.Graphics.Shader.CodeGen.Spirv;
|
||||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using Ryujinx.Graphics.Shader.Translation.Optimizations;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||
|
||||
|
@ -13,6 +8,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
public static class Translator
|
||||
{
|
||||
private const int ThreadsPerWarp = 32;
|
||||
private const int HeaderSize = 0x50;
|
||||
|
||||
internal readonly struct FunctionCode
|
||||
|
@ -30,94 +26,31 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return DecodeShader(address, gpuAccessor, options);
|
||||
}
|
||||
|
||||
internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config)
|
||||
{
|
||||
var cfgs = new ControlFlowGraph[functions.Length];
|
||||
var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
cfgs[i] = ControlFlowGraph.Create(functions[i].Code);
|
||||
|
||||
if (i != 0)
|
||||
{
|
||||
frus[i] = RegisterUsage.RunPass(cfgs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
List<Function> funcs = new(functions.Length);
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
funcs.Add(null);
|
||||
}
|
||||
|
||||
HelperFunctionManager hfm = new(funcs, config.Stage);
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
var cfg = cfgs[i];
|
||||
|
||||
int inArgumentsCount = 0;
|
||||
int outArgumentsCount = 0;
|
||||
|
||||
if (i != 0)
|
||||
{
|
||||
var fru = frus[i];
|
||||
|
||||
inArgumentsCount = fru.InArguments.Length;
|
||||
outArgumentsCount = fru.OutArguments.Length;
|
||||
}
|
||||
|
||||
if (cfg.Blocks.Length != 0)
|
||||
{
|
||||
RegisterUsage.FixupCalls(cfg.Blocks, frus);
|
||||
|
||||
Dominance.FindDominators(cfg);
|
||||
Dominance.FindDominanceFrontiers(cfg.Blocks);
|
||||
|
||||
Ssa.Rename(cfg.Blocks);
|
||||
|
||||
Optimizer.RunPass(hfm, cfg.Blocks, config);
|
||||
Rewriter.RunPass(hfm, cfg.Blocks, config);
|
||||
}
|
||||
|
||||
funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount);
|
||||
}
|
||||
|
||||
var identification = ShaderIdentifier.Identify(funcs, config);
|
||||
|
||||
var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config);
|
||||
|
||||
var info = config.CreateProgramInfo(identification);
|
||||
|
||||
return config.Options.TargetLanguage switch
|
||||
{
|
||||
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
|
||||
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
|
||||
_ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()),
|
||||
};
|
||||
}
|
||||
|
||||
private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
|
||||
{
|
||||
ShaderConfig config;
|
||||
int localMemorySize;
|
||||
ShaderDefinitions definitions;
|
||||
DecodedProgram program;
|
||||
ulong maxEndAddress = 0;
|
||||
|
||||
if (options.Flags.HasFlag(TranslationFlags.Compute))
|
||||
{
|
||||
config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options, gpuAccessor.QueryComputeLocalMemorySize());
|
||||
definitions = CreateComputeDefinitions(gpuAccessor);
|
||||
localMemorySize = gpuAccessor.QueryComputeLocalMemorySize();
|
||||
|
||||
program = Decoder.Decode(config, address);
|
||||
program = Decoder.Decode(definitions, gpuAccessor, address);
|
||||
}
|
||||
else
|
||||
{
|
||||
config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options);
|
||||
ShaderHeader header = new(gpuAccessor, address);
|
||||
|
||||
program = Decoder.Decode(config, address + HeaderSize);
|
||||
definitions = CreateGraphicsDefinitions(gpuAccessor, header);
|
||||
localMemorySize = GetLocalMemorySize(header);
|
||||
|
||||
program = Decoder.Decode(definitions, gpuAccessor, address + HeaderSize);
|
||||
}
|
||||
|
||||
ulong maxEndAddress = 0;
|
||||
|
||||
foreach (DecodedFunction function in program)
|
||||
{
|
||||
foreach (Block block in function.Blocks)
|
||||
|
@ -129,12 +62,76 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
}
|
||||
}
|
||||
|
||||
config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));
|
||||
int size = (int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize);
|
||||
|
||||
return new TranslatorContext(address, program, config);
|
||||
return new TranslatorContext(address, size, localMemorySize, definitions, gpuAccessor, options, program);
|
||||
}
|
||||
|
||||
internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations)
|
||||
private static ShaderDefinitions CreateComputeDefinitions(IGpuAccessor gpuAccessor)
|
||||
{
|
||||
return new ShaderDefinitions(
|
||||
ShaderStage.Compute,
|
||||
gpuAccessor.QueryComputeLocalSizeX(),
|
||||
gpuAccessor.QueryComputeLocalSizeY(),
|
||||
gpuAccessor.QueryComputeLocalSizeZ());
|
||||
}
|
||||
|
||||
private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header)
|
||||
{
|
||||
bool transformFeedbackEnabled =
|
||||
gpuAccessor.QueryTransformFeedbackEnabled() &&
|
||||
gpuAccessor.QueryHostSupportsTransformFeedback();
|
||||
TransformFeedbackOutput[] transformFeedbackOutputs = null;
|
||||
ulong transformFeedbackVecMap = 0UL;
|
||||
|
||||
if (transformFeedbackEnabled)
|
||||
{
|
||||
transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
|
||||
|
||||
for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
|
||||
{
|
||||
var locations = gpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
|
||||
var stride = gpuAccessor.QueryTransformFeedbackStride(tfbIndex);
|
||||
|
||||
for (int i = 0; i < locations.Length; i++)
|
||||
{
|
||||
byte wordOffset = locations[i];
|
||||
if (wordOffset < 0xc0)
|
||||
{
|
||||
transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
|
||||
transformFeedbackVecMap |= 1UL << (wordOffset / 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ShaderDefinitions(
|
||||
header.Stage,
|
||||
gpuAccessor.QueryGraphicsState(),
|
||||
header.Stage == ShaderStage.Geometry && header.GpPassthrough,
|
||||
header.ThreadsPerInputPrimitive,
|
||||
header.OutputTopology,
|
||||
header.MaxOutputVertexCount,
|
||||
header.ImapTypes,
|
||||
header.OmapTargets,
|
||||
header.OmapSampleMask,
|
||||
header.OmapDepth,
|
||||
transformFeedbackEnabled,
|
||||
transformFeedbackVecMap,
|
||||
transformFeedbackOutputs);
|
||||
}
|
||||
|
||||
private static int GetLocalMemorySize(ShaderHeader header)
|
||||
{
|
||||
return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
|
||||
}
|
||||
|
||||
internal static FunctionCode[] EmitShader(
|
||||
TranslatorContext translatorContext,
|
||||
ResourceManager resourceManager,
|
||||
DecodedProgram program,
|
||||
bool initializeOutputs,
|
||||
out int initializationOperations)
|
||||
{
|
||||
initializationOperations = 0;
|
||||
|
||||
|
@ -149,11 +146,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
|
||||
for (int index = 0; index < functions.Length; index++)
|
||||
{
|
||||
EmitterContext context = new(program, config, index != 0);
|
||||
EmitterContext context = new(translatorContext, resourceManager, program, index != 0);
|
||||
|
||||
if (initializeOutputs && index == 0)
|
||||
{
|
||||
EmitOutputsInitialization(context, config);
|
||||
EmitOutputsInitialization(context, translatorContext.AttributeUsage, translatorContext.GpuAccessor, translatorContext.Stage);
|
||||
initializationOperations = context.OperationsCount;
|
||||
}
|
||||
|
||||
|
@ -168,27 +165,27 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
EmitOps(context, block);
|
||||
}
|
||||
|
||||
functions[index] = new FunctionCode(context.GetOperations());
|
||||
functions[index] = new(context.GetOperations());
|
||||
}
|
||||
|
||||
return functions;
|
||||
}
|
||||
|
||||
private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config)
|
||||
private static void EmitOutputsInitialization(EmitterContext context, AttributeUsage attributeUsage, IGpuAccessor gpuAccessor, ShaderStage stage)
|
||||
{
|
||||
// Compute has no output attributes, and fragment is the last stage, so we
|
||||
// don't need to initialize outputs on those stages.
|
||||
if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment)
|
||||
if (stage == ShaderStage.Compute || stage == ShaderStage.Fragment)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.Stage == ShaderStage.Vertex)
|
||||
if (stage == ShaderStage.Vertex)
|
||||
{
|
||||
InitializePositionOutput(context);
|
||||
}
|
||||
|
||||
UInt128 usedAttributes = context.Config.NextInputAttributesComponents;
|
||||
UInt128 usedAttributes = context.TranslatorContext.AttributeUsage.NextInputAttributesComponents;
|
||||
while (usedAttributes != UInt128.Zero)
|
||||
{
|
||||
int index = (int)UInt128.TrailingZeroCount(usedAttributes);
|
||||
|
@ -197,7 +194,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
usedAttributes &= ~(UInt128.One << index);
|
||||
|
||||
// We don't need to initialize passthrough attributes.
|
||||
if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0)
|
||||
if ((context.TranslatorContext.AttributeUsage.PassthroughAttributes & (1 << vecIndex)) != 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -205,30 +202,28 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false);
|
||||
}
|
||||
|
||||
if (context.Config.NextUsedInputAttributesPerPatch != null)
|
||||
if (context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch != null)
|
||||
{
|
||||
foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order())
|
||||
foreach (int vecIndex in context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch.Order())
|
||||
{
|
||||
InitializeOutput(context, vecIndex, perPatch: true);
|
||||
}
|
||||
}
|
||||
|
||||
if (config.NextUsesFixedFuncAttributes)
|
||||
if (attributeUsage.NextUsesFixedFuncAttributes)
|
||||
{
|
||||
bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
|
||||
bool supportsLayerFromVertexOrTess = gpuAccessor.QueryHostSupportsLayerVertexTessellation();
|
||||
int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
|
||||
|
||||
for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++)
|
||||
{
|
||||
int index = config.GetFreeUserAttribute(isOutput: true, i);
|
||||
int index = attributeUsage.GetFreeUserAttribute(isOutput: true, i);
|
||||
if (index < 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
InitializeOutput(context, index, perPatch: false);
|
||||
|
||||
config.SetOutputUserAttributeFixedFunc(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -253,11 +248,11 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
|
||||
|
||||
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing))
|
||||
if (context.TranslatorContext.Definitions.OaIndexing)
|
||||
{
|
||||
Operand invocationId = null;
|
||||
|
||||
if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
|
||||
if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
|
||||
{
|
||||
invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
|
||||
}
|
||||
|
@ -268,7 +263,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
}
|
||||
else
|
||||
{
|
||||
if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
|
||||
if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
|
||||
{
|
||||
Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
|
||||
context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f));
|
||||
|
@ -286,7 +281,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
InstOp op = block.OpCodes[opIndex];
|
||||
|
||||
if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode))
|
||||
if (context.TranslatorContext.Options.Flags.HasFlag(TranslationFlags.DebugMode))
|
||||
{
|
||||
string instName;
|
||||
|
||||
|
@ -298,7 +293,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
{
|
||||
instName = "???";
|
||||
|
||||
context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
|
||||
context.TranslatorContext.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
|
||||
}
|
||||
|
||||
string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
|
||||
using Ryujinx.Graphics.Shader.CodeGen;
|
||||
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
|
||||
using Ryujinx.Graphics.Shader.CodeGen.Spirv;
|
||||
using Ryujinx.Graphics.Shader.Decoders;
|
||||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using Ryujinx.Graphics.Shader.StructuredIr;
|
||||
using Ryujinx.Graphics.Shader.Translation.Optimizations;
|
||||
using Ryujinx.Graphics.Shader.Translation.Transforms;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
@ -15,22 +18,47 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
public class TranslatorContext
|
||||
{
|
||||
private readonly DecodedProgram _program;
|
||||
private readonly ShaderConfig _config;
|
||||
private readonly int _localMemorySize;
|
||||
|
||||
public ulong Address { get; }
|
||||
public int Size { get; }
|
||||
public int Cb1DataSize => _program.Cb1DataSize;
|
||||
|
||||
public ShaderStage Stage => _config.Stage;
|
||||
public int Size => _config.Size;
|
||||
public int Cb1DataSize => _config.Cb1DataSize;
|
||||
public bool LayerOutputWritten => _config.LayerOutputWritten;
|
||||
internal bool HasLayerInputAttribute { get; private set; }
|
||||
internal int GpLayerInputAttribute { get; private set; }
|
||||
|
||||
public IGpuAccessor GpuAccessor => _config.GpuAccessor;
|
||||
internal AttributeUsage AttributeUsage => _program.AttributeUsage;
|
||||
|
||||
internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config)
|
||||
internal ShaderDefinitions Definitions { get; }
|
||||
|
||||
public ShaderStage Stage => Definitions.Stage;
|
||||
|
||||
internal IGpuAccessor GpuAccessor { get; }
|
||||
|
||||
internal TranslationOptions Options { get; }
|
||||
|
||||
internal FeatureFlags UsedFeatures { get; private set; }
|
||||
|
||||
public bool LayerOutputWritten { get; private set; }
|
||||
public int LayerOutputAttribute { get; private set; }
|
||||
|
||||
internal TranslatorContext(
|
||||
ulong address,
|
||||
int size,
|
||||
int localMemorySize,
|
||||
ShaderDefinitions definitions,
|
||||
IGpuAccessor gpuAccessor,
|
||||
TranslationOptions options,
|
||||
DecodedProgram program)
|
||||
{
|
||||
Address = address;
|
||||
Size = size;
|
||||
_program = program;
|
||||
_config = config;
|
||||
_localMemorySize = localMemorySize;
|
||||
Definitions = definitions;
|
||||
GpuAccessor = gpuAccessor;
|
||||
Options = options;
|
||||
UsedFeatures = program.UsedFeatures;
|
||||
}
|
||||
|
||||
private static bool IsLoadUserDefined(Operation operation)
|
||||
|
@ -131,63 +159,259 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
return output;
|
||||
}
|
||||
|
||||
public void SetNextStage(TranslatorContext nextStage)
|
||||
internal int GetDepthRegister()
|
||||
{
|
||||
_config.MergeFromtNextStage(nextStage._config);
|
||||
// The depth register is always two registers after the last color output.
|
||||
return BitOperations.PopCount((uint)Definitions.OmapTargets) + 1;
|
||||
}
|
||||
|
||||
public void SetLayerOutputAttribute(int attr)
|
||||
{
|
||||
LayerOutputWritten = true;
|
||||
LayerOutputAttribute = attr;
|
||||
}
|
||||
|
||||
public void SetGeometryShaderLayerInputAttribute(int attr)
|
||||
{
|
||||
_config.SetGeometryShaderLayerInputAttribute(attr);
|
||||
UsedFeatures |= FeatureFlags.RtLayer;
|
||||
HasLayerInputAttribute = true;
|
||||
GpLayerInputAttribute = attr;
|
||||
}
|
||||
|
||||
public void SetLastInVertexPipeline()
|
||||
{
|
||||
_config.SetLastInVertexPipeline();
|
||||
Definitions.LastInVertexPipeline = true;
|
||||
}
|
||||
|
||||
public ShaderProgram Translate(TranslatorContext other = null)
|
||||
public void SetNextStage(TranslatorContext nextStage)
|
||||
{
|
||||
bool usesLocalMemory = _config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
|
||||
AttributeUsage.MergeFromtNextStage(
|
||||
Definitions.GpPassthrough,
|
||||
nextStage.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr),
|
||||
nextStage.AttributeUsage);
|
||||
|
||||
_config.ResourceManager.SetCurrentLocalMemory(_config.LocalMemorySize, usesLocalMemory);
|
||||
|
||||
if (_config.Stage == ShaderStage.Compute)
|
||||
// We don't consider geometry shaders using the geometry shader passthrough feature
|
||||
// as being the last because when this feature is used, it can't actually modify any of the outputs,
|
||||
// so the stage that comes before it is the last one that can do modifications.
|
||||
if (nextStage.Definitions.Stage != ShaderStage.Fragment &&
|
||||
(nextStage.Definitions.Stage != ShaderStage.Geometry || !nextStage.Definitions.GpPassthrough))
|
||||
{
|
||||
bool usesSharedMemory = _config.UsedFeatures.HasFlag(FeatureFlags.SharedMemory);
|
||||
Definitions.LastInVertexPipeline = false;
|
||||
}
|
||||
}
|
||||
|
||||
_config.ResourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory);
|
||||
public ShaderProgram Translate()
|
||||
{
|
||||
ResourceManager resourceManager = CreateResourceManager();
|
||||
|
||||
bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
|
||||
|
||||
resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory);
|
||||
|
||||
if (Stage == ShaderStage.Compute)
|
||||
{
|
||||
bool usesSharedMemory = _program.UsedFeatures.HasFlag(FeatureFlags.SharedMemory);
|
||||
|
||||
resourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory);
|
||||
}
|
||||
|
||||
FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _);
|
||||
FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: true, out _);
|
||||
|
||||
if (other != null)
|
||||
return Translate(code, resourceManager, UsedFeatures, _program.ClipDistancesWritten);
|
||||
}
|
||||
|
||||
public ShaderProgram Translate(TranslatorContext other)
|
||||
{
|
||||
ResourceManager resourceManager = CreateResourceManager();
|
||||
|
||||
bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
|
||||
resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory);
|
||||
|
||||
FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: false, out _);
|
||||
|
||||
bool otherUsesLocalMemory = other._program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
|
||||
resourceManager.SetCurrentLocalMemory(other._localMemorySize, otherUsesLocalMemory);
|
||||
|
||||
FunctionCode[] otherCode = EmitShader(other, resourceManager, other._program, initializeOutputs: true, out int aStart);
|
||||
|
||||
code = Combine(otherCode, code, aStart);
|
||||
|
||||
return Translate(
|
||||
code,
|
||||
resourceManager,
|
||||
UsedFeatures | other.UsedFeatures,
|
||||
(byte)(_program.ClipDistancesWritten | other._program.ClipDistancesWritten));
|
||||
}
|
||||
|
||||
private ShaderProgram Translate(FunctionCode[] functions, ResourceManager resourceManager, FeatureFlags usedFeatures, byte clipDistancesWritten)
|
||||
{
|
||||
var cfgs = new ControlFlowGraph[functions.Length];
|
||||
var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>());
|
||||
cfgs[i] = ControlFlowGraph.Create(functions[i].Code);
|
||||
|
||||
// We need to share the resource manager since both shaders accesses the same constant buffers.
|
||||
other._config.ResourceManager = _config.ResourceManager;
|
||||
other._config.ResourceManager.SetCurrentLocalMemory(other._config.LocalMemorySize, other._config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory));
|
||||
|
||||
FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart);
|
||||
|
||||
code = Combine(otherCode, code, aStart);
|
||||
|
||||
_config.InheritFrom(other._config);
|
||||
if (i != 0)
|
||||
{
|
||||
frus[i] = RegisterUsage.RunPass(cfgs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return Translator.Translate(code, _config);
|
||||
List<Function> funcs = new(functions.Length);
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
funcs.Add(null);
|
||||
}
|
||||
|
||||
HelperFunctionManager hfm = new(funcs, Definitions.Stage);
|
||||
|
||||
for (int i = 0; i < functions.Length; i++)
|
||||
{
|
||||
var cfg = cfgs[i];
|
||||
|
||||
int inArgumentsCount = 0;
|
||||
int outArgumentsCount = 0;
|
||||
|
||||
if (i != 0)
|
||||
{
|
||||
var fru = frus[i];
|
||||
|
||||
inArgumentsCount = fru.InArguments.Length;
|
||||
outArgumentsCount = fru.OutArguments.Length;
|
||||
}
|
||||
|
||||
if (cfg.Blocks.Length != 0)
|
||||
{
|
||||
RegisterUsage.FixupCalls(cfg.Blocks, frus);
|
||||
|
||||
Dominance.FindDominators(cfg);
|
||||
Dominance.FindDominanceFrontiers(cfg.Blocks);
|
||||
|
||||
Ssa.Rename(cfg.Blocks);
|
||||
|
||||
TransformContext context = new(
|
||||
hfm,
|
||||
cfg.Blocks,
|
||||
resourceManager,
|
||||
GpuAccessor,
|
||||
Options.TargetLanguage,
|
||||
Definitions.Stage,
|
||||
ref usedFeatures);
|
||||
|
||||
Optimizer.RunPass(context);
|
||||
TransformPasses.RunPass(context);
|
||||
}
|
||||
|
||||
funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount);
|
||||
}
|
||||
|
||||
var identification = ShaderIdentifier.Identify(funcs, GpuAccessor, Definitions.Stage, Definitions.InputTopology, out int layerInputAttr);
|
||||
|
||||
return Generate(
|
||||
funcs,
|
||||
AttributeUsage,
|
||||
Definitions,
|
||||
resourceManager,
|
||||
usedFeatures,
|
||||
clipDistancesWritten,
|
||||
identification,
|
||||
layerInputAttr);
|
||||
}
|
||||
|
||||
private ShaderProgram Generate(
|
||||
IReadOnlyList<Function> funcs,
|
||||
AttributeUsage attributeUsage,
|
||||
ShaderDefinitions definitions,
|
||||
ResourceManager resourceManager,
|
||||
FeatureFlags usedFeatures,
|
||||
byte clipDistancesWritten,
|
||||
ShaderIdentification identification = ShaderIdentification.None,
|
||||
int layerInputAttr = 0)
|
||||
{
|
||||
var sInfo = StructuredProgram.MakeStructuredProgram(
|
||||
funcs,
|
||||
attributeUsage,
|
||||
definitions,
|
||||
resourceManager,
|
||||
Options.Flags.HasFlag(TranslationFlags.DebugMode));
|
||||
|
||||
var info = new ShaderProgramInfo(
|
||||
resourceManager.GetConstantBufferDescriptors(),
|
||||
resourceManager.GetStorageBufferDescriptors(),
|
||||
resourceManager.GetTextureDescriptors(),
|
||||
resourceManager.GetImageDescriptors(),
|
||||
identification,
|
||||
layerInputAttr,
|
||||
definitions.Stage,
|
||||
usedFeatures.HasFlag(FeatureFlags.FragCoordXY),
|
||||
usedFeatures.HasFlag(FeatureFlags.InstanceId),
|
||||
usedFeatures.HasFlag(FeatureFlags.DrawParameters),
|
||||
usedFeatures.HasFlag(FeatureFlags.RtLayer),
|
||||
clipDistancesWritten,
|
||||
definitions.OmapTargets);
|
||||
|
||||
var hostCapabilities = new HostCapabilities(
|
||||
GpuAccessor.QueryHostReducedPrecision(),
|
||||
GpuAccessor.QueryHostSupportsFragmentShaderInterlock(),
|
||||
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
|
||||
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
|
||||
GpuAccessor.QueryHostSupportsShaderBallot(),
|
||||
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
|
||||
GpuAccessor.QueryHostSupportsTextureShadowLod(),
|
||||
GpuAccessor.QueryHostSupportsViewportMask());
|
||||
|
||||
var parameters = new CodeGenParameters(attributeUsage, definitions, resourceManager.Properties, hostCapabilities, GpuAccessor, Options.TargetApi);
|
||||
|
||||
return Options.TargetLanguage switch
|
||||
{
|
||||
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, parameters)),
|
||||
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, parameters)),
|
||||
_ => throw new NotImplementedException(Options.TargetLanguage.ToString()),
|
||||
};
|
||||
}
|
||||
|
||||
private ResourceManager CreateResourceManager()
|
||||
{
|
||||
ResourceManager resourceManager = new(Definitions.Stage, GpuAccessor);
|
||||
|
||||
if (!GpuAccessor.QueryHostSupportsTransformFeedback() && GpuAccessor.QueryTransformFeedbackEnabled())
|
||||
{
|
||||
StructureType tfeInfoStruct = new(new StructureField[]
|
||||
{
|
||||
new StructureField(AggregateType.Array | AggregateType.U32, "base_offset", 4),
|
||||
new StructureField(AggregateType.U32, "vertex_count")
|
||||
});
|
||||
|
||||
BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct);
|
||||
resourceManager.Properties.AddOrUpdateStorageBuffer(tfeInfoBuffer);
|
||||
|
||||
StructureType tfeDataStruct = new(new StructureField[]
|
||||
{
|
||||
new StructureField(AggregateType.Array | AggregateType.U32, "data", 0)
|
||||
});
|
||||
|
||||
for (int i = 0; i < Constants.TfeBuffersCount; i++)
|
||||
{
|
||||
int binding = Constants.TfeBufferBaseBinding + i;
|
||||
BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct);
|
||||
resourceManager.Properties.AddOrUpdateStorageBuffer(tfeDataBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
return resourceManager;
|
||||
}
|
||||
|
||||
public ShaderProgram GenerateGeometryPassthrough()
|
||||
{
|
||||
int outputAttributesMask = _config.UsedOutputAttributes;
|
||||
int layerOutputAttr = _config.LayerOutputAttribute;
|
||||
int outputAttributesMask = AttributeUsage.UsedOutputAttributes;
|
||||
int layerOutputAttr = LayerOutputAttribute;
|
||||
|
||||
OutputTopology outputTopology;
|
||||
int maxOutputVertices;
|
||||
|
||||
switch (GpuAccessor.QueryPrimitiveTopology())
|
||||
switch (Definitions.InputTopology)
|
||||
{
|
||||
case InputTopology.Points:
|
||||
outputTopology = OutputTopology.PointList;
|
||||
|
@ -204,9 +428,10 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
break;
|
||||
}
|
||||
|
||||
ShaderConfig config = new(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options);
|
||||
var attributeUsage = new AttributeUsage(GpuAccessor);
|
||||
var resourceManager = new ResourceManager(ShaderStage.Geometry, GpuAccessor);
|
||||
|
||||
EmitterContext context = new(default, config, false);
|
||||
var context = new EmitterContext();
|
||||
|
||||
for (int v = 0; v < maxOutputVertices; v++)
|
||||
{
|
||||
|
@ -231,10 +456,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
else
|
||||
{
|
||||
context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value);
|
||||
config.SetOutputUserAttribute(attrIndex);
|
||||
}
|
||||
|
||||
config.SetInputUserAttribute(attrIndex, c);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -254,16 +476,15 @@ namespace Ryujinx.Graphics.Shader.Translation
|
|||
var cfg = ControlFlowGraph.Create(operations);
|
||||
var function = new Function(cfg.Blocks, "main", false, 0, 0);
|
||||
|
||||
var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config);
|
||||
var definitions = new ShaderDefinitions(
|
||||
ShaderStage.Geometry,
|
||||
GpuAccessor.QueryGraphicsState(),
|
||||
false,
|
||||
1,
|
||||
outputTopology,
|
||||
maxOutputVertices);
|
||||
|
||||
var info = config.CreateProgramInfo();
|
||||
|
||||
return config.Options.TargetLanguage switch
|
||||
{
|
||||
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
|
||||
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
|
||||
_ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()),
|
||||
};
|
||||
return Generate(new[] { function }, attributeUsage, definitions, resourceManager, FeatureFlags.RtLayer, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue