Delete ShaderConfig and organize shader resources/definitions better (#5509)

* Move some properties out of ShaderConfig

* Stop using ShaderConfig on backends

* Replace ShaderConfig usages on Translator and passes

* Move remaining properties out of ShaderConfig and delete ShaderConfig

* Remove ResourceManager property from TranslatorContext

* Move Rewriter passes to separate transform pass files

* Fix TransformPasses.RunPass on cases where a node is removed

* Move remaining ClipDistancePrimitivesWritten and UsedFeatures updates to decode stage

* Reduce excessive parameter passing a bit by using structs more

* Remove binding parameter from ShaderProperties methods since it is redundant

* Replace decoder instruction checks with switch statement

* Put GLSL on the same plan as SPIR-V for input/output declaration

* Stop mutating TranslatorContext state when Translate is called

* Pass most of the graphics state using a struct instead of individual query methods

* Auto-format

* Auto-format

* Add backend logging interface

* Auto-format

* Remove unnecessary use of interpolated strings

* Remove more modifications of AttributeUsage after decode

* PR feedback

* gl_Layer is not supported on compute
This commit is contained in:
gdkchan 2023-08-13 22:26:42 -03:00 committed by GitHub
parent 8edfb2bc7b
commit b423197619
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
68 changed files with 2653 additions and 2407 deletions

View file

@ -0,0 +1,168 @@
using System;
using System.Collections.Generic;
using System.Numerics;
namespace Ryujinx.Graphics.Shader.Translation
{
class AttributeUsage
{
public bool NextUsesFixedFuncAttributes { get; private set; }
public int UsedInputAttributes { get; private set; }
public int UsedOutputAttributes { get; private set; }
public HashSet<int> UsedInputAttributesPerPatch { get; }
public HashSet<int> UsedOutputAttributesPerPatch { get; }
public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
public int PassthroughAttributes { get; private set; }
private int _nextUsedInputAttributes;
private int _thisUsedInputAttributes;
private Dictionary<int, int> _perPatchAttributeLocations;
private readonly IGpuAccessor _gpuAccessor;
public UInt128 NextInputAttributesComponents { get; private set; }
public UInt128 ThisInputAttributesComponents { get; private set; }
public AttributeUsage(IGpuAccessor gpuAccessor)
{
_gpuAccessor = gpuAccessor;
UsedInputAttributesPerPatch = new();
UsedOutputAttributesPerPatch = new();
}
public void SetInputUserAttribute(int index, int component)
{
int mask = 1 << index;
UsedInputAttributes |= mask;
_thisUsedInputAttributes |= mask;
ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
}
public void SetInputUserAttributePerPatch(int index)
{
UsedInputAttributesPerPatch.Add(index);
}
public void SetOutputUserAttribute(int index)
{
UsedOutputAttributes |= 1 << index;
}
public void SetOutputUserAttributePerPatch(int index)
{
UsedOutputAttributesPerPatch.Add(index);
}
public void MergeFromtNextStage(bool gpPassthrough, bool nextUsesFixedFunctionAttributes, AttributeUsage nextStage)
{
NextInputAttributesComponents = nextStage.ThisInputAttributesComponents;
NextUsedInputAttributesPerPatch = nextStage.UsedInputAttributesPerPatch;
NextUsesFixedFuncAttributes = nextUsesFixedFunctionAttributes;
MergeOutputUserAttributes(gpPassthrough, nextStage.UsedInputAttributes, nextStage.UsedInputAttributesPerPatch);
if (UsedOutputAttributesPerPatch.Count != 0)
{
// Regular and per-patch input/output locations can't overlap,
// so we must assign on our location using unused regular input/output locations.
Dictionary<int, int> locationsMap = new();
int freeMask = ~UsedOutputAttributes;
foreach (int attr in UsedOutputAttributesPerPatch)
{
int location = BitOperations.TrailingZeroCount(freeMask);
if (location == 32)
{
_gpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
break;
}
locationsMap.Add(attr, location);
freeMask &= ~(1 << location);
}
// Both stages must agree on the locations, so use the same "map" for both.
_perPatchAttributeLocations = locationsMap;
nextStage._perPatchAttributeLocations = locationsMap;
}
}
private void MergeOutputUserAttributes(bool gpPassthrough, int mask, IEnumerable<int> perPatch)
{
_nextUsedInputAttributes = mask;
if (gpPassthrough)
{
PassthroughAttributes = mask & ~UsedOutputAttributes;
}
else
{
UsedOutputAttributes |= mask;
UsedOutputAttributesPerPatch.UnionWith(perPatch);
}
}
public int GetPerPatchAttributeLocation(int index)
{
if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
{
return index;
}
return location;
}
public bool IsUsedOutputAttribute(int attr)
{
// The check for fixed function attributes on the next stage is conservative,
// returning false if the output is just not used by the next stage is also valid.
if (NextUsesFixedFuncAttributes &&
attr >= AttributeConsts.UserAttributeBase &&
attr < AttributeConsts.UserAttributeEnd)
{
int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
return (_nextUsedInputAttributes & (1 << index)) != 0;
}
return true;
}
public int GetFreeUserAttribute(bool isOutput, int index)
{
int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
int bit = -1;
while (useMask != -1)
{
bit = BitOperations.TrailingZeroCount(~useMask);
if (bit == 32)
{
bit = -1;
break;
}
else if (index < 1)
{
break;
}
useMask |= 1 << bit;
index--;
}
return bit;
}
public void SetAllInputUserAttributes()
{
UsedInputAttributes |= Constants.AllAttributesMask;
ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
}
public void SetAllOutputUserAttributes()
{
UsedOutputAttributes |= Constants.AllAttributesMask;
}
}
}

View file

@ -11,7 +11,8 @@ namespace Ryujinx.Graphics.Shader.Translation
class EmitterContext
{
public DecodedProgram Program { get; }
public ShaderConfig Config { get; }
public TranslatorContext TranslatorContext { get; }
public ResourceManager ResourceManager { get; }
public bool IsNonMain { get; }
@ -54,10 +55,15 @@ namespace Ryujinx.Graphics.Shader.Translation
_labels = new Dictionary<ulong, BlockLabel>();
}
public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain) : this()
public EmitterContext(
TranslatorContext translatorContext,
ResourceManager resourceManager,
DecodedProgram program,
bool isNonMain) : this()
{
TranslatorContext = translatorContext;
ResourceManager = resourceManager;
Program = program;
Config = config;
IsNonMain = isNonMain;
EmitStart();
@ -65,12 +71,12 @@ namespace Ryujinx.Graphics.Shader.Translation
private void EmitStart()
{
if (Config.Stage == ShaderStage.Vertex &&
Config.Options.TargetApi == TargetApi.Vulkan &&
(Config.Options.Flags & TranslationFlags.VertexA) == 0)
if (TranslatorContext.Definitions.Stage == ShaderStage.Vertex &&
TranslatorContext.Options.TargetApi == TargetApi.Vulkan &&
(TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0)
{
// Vulkan requires the point size to be always written on the shader if the primitive topology is points.
this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize()));
this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(TranslatorContext.Definitions.PointSize));
}
}
@ -115,49 +121,6 @@ namespace Ryujinx.Graphics.Shader.Translation
_operations.Add(operation);
}
public void FlagAttributeRead(int attribute)
{
if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId)
{
Config.SetUsedFeature(FeatureFlags.InstanceId);
}
else if (Config.Stage == ShaderStage.Fragment)
{
switch (attribute)
{
case AttributeConsts.PositionX:
case AttributeConsts.PositionY:
Config.SetUsedFeature(FeatureFlags.FragCoordXY);
break;
}
}
}
public void FlagAttributeWritten(int attribute)
{
if (Config.Stage == ShaderStage.Vertex)
{
switch (attribute)
{
case AttributeConsts.ClipDistance0:
case AttributeConsts.ClipDistance1:
case AttributeConsts.ClipDistance2:
case AttributeConsts.ClipDistance3:
case AttributeConsts.ClipDistance4:
case AttributeConsts.ClipDistance5:
case AttributeConsts.ClipDistance6:
case AttributeConsts.ClipDistance7:
Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4);
break;
}
}
if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer)
{
Config.SetUsedFeature(FeatureFlags.RtLayer);
}
}
public void MarkLabel(Operand label)
{
Add(Instruction.MarkLabel, label);
@ -203,14 +166,14 @@ namespace Ryujinx.Graphics.Shader.Translation
public void PrepareForVertexReturn()
{
if (!Config.GpuAccessor.QueryHostSupportsTransformFeedback() && Config.GpuAccessor.QueryTransformFeedbackEnabled())
if (!TranslatorContext.GpuAccessor.QueryHostSupportsTransformFeedback() && TranslatorContext.GpuAccessor.QueryTransformFeedbackEnabled())
{
Operand vertexCount = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(1));
for (int tfbIndex = 0; tfbIndex < Constants.TfeBuffersCount; tfbIndex++)
{
var locations = Config.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
var stride = Config.GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
var locations = TranslatorContext.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
var stride = TranslatorContext.GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
Operand baseOffset = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(0), Const(tfbIndex));
Operand baseVertex = this.Load(StorageKind.Input, IoVariable.BaseVertex);
@ -242,7 +205,7 @@ namespace Ryujinx.Graphics.Shader.Translation
}
}
if (Config.GpuAccessor.QueryViewportTransformDisable())
if (TranslatorContext.Definitions.ViewportTransformDisable)
{
Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0));
Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1));
@ -254,7 +217,7 @@ namespace Ryujinx.Graphics.Shader.Translation
this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne));
}
if (Config.GpuAccessor.QueryTransformDepthMinusOneToOne() && !Config.GpuAccessor.QueryHostSupportsDepthClipControl())
if (TranslatorContext.Definitions.DepthMode && !TranslatorContext.GpuAccessor.QueryHostSupportsDepthClipControl())
{
Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2));
Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3));
@ -263,12 +226,10 @@ namespace Ryujinx.Graphics.Shader.Translation
this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW));
}
if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute)
if (TranslatorContext.Definitions.Stage != ShaderStage.Geometry && TranslatorContext.HasLayerInputAttribute)
{
Config.SetUsedFeature(FeatureFlags.RtLayer);
int attrVecIndex = Config.GpLayerInputAttribute >> 2;
int attrComponentIndex = Config.GpLayerInputAttribute & 3;
int attrVecIndex = TranslatorContext.GpLayerInputAttribute >> 2;
int attrComponentIndex = TranslatorContext.GpLayerInputAttribute & 3;
Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex));
@ -278,7 +239,7 @@ namespace Ryujinx.Graphics.Shader.Translation
public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal)
{
if (Config.GpuAccessor.QueryViewportTransformDisable())
if (TranslatorContext.Definitions.ViewportTransformDisable)
{
oldXLocal = Local();
this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)));
@ -291,7 +252,7 @@ namespace Ryujinx.Graphics.Shader.Translation
oldYLocal = null;
}
if (Config.GpuAccessor.QueryTransformDepthMinusOneToOne() && !Config.GpuAccessor.QueryHostSupportsDepthClipControl())
if (TranslatorContext.Definitions.DepthMode && !TranslatorContext.GpuAccessor.QueryHostSupportsDepthClipControl())
{
oldZLocal = Local();
this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)));
@ -311,13 +272,13 @@ namespace Ryujinx.Graphics.Shader.Translation
return true;
}
if (Config.LastInVertexPipeline &&
(Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) &&
(Config.Options.Flags & TranslationFlags.VertexA) == 0)
if (TranslatorContext.Definitions.LastInVertexPipeline &&
(TranslatorContext.Definitions.Stage == ShaderStage.Vertex || TranslatorContext.Definitions.Stage == ShaderStage.TessellationEvaluation) &&
(TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0)
{
PrepareForVertexReturn();
}
else if (Config.Stage == ShaderStage.Geometry)
else if (TranslatorContext.Definitions.Stage == ShaderStage.Geometry)
{
void WritePositionOutput(int primIndex)
{
@ -345,20 +306,19 @@ namespace Ryujinx.Graphics.Shader.Translation
this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w);
}
if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
if (TranslatorContext.Definitions.GpPassthrough && !TranslatorContext.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
{
int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices();
int inputVertices = TranslatorContext.Definitions.InputTopology.ToInputVertices();
for (int primIndex = 0; primIndex < inputVertices; primIndex++)
{
WritePositionOutput(primIndex);
int passthroughAttributes = Config.PassthroughAttributes;
int passthroughAttributes = TranslatorContext.AttributeUsage.PassthroughAttributes;
while (passthroughAttributes != 0)
{
int index = BitOperations.TrailingZeroCount(passthroughAttributes);
WriteUserDefinedOutput(index, primIndex);
Config.SetOutputUserAttribute(index);
passthroughAttributes &= ~(1 << index);
}
@ -368,20 +328,20 @@ namespace Ryujinx.Graphics.Shader.Translation
this.EndPrimitive();
}
}
else if (Config.Stage == ShaderStage.Fragment)
else if (TranslatorContext.Definitions.Stage == ShaderStage.Fragment)
{
GenerateAlphaToCoverageDitherDiscard();
bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat();
bool supportsBgra = TranslatorContext.GpuAccessor.QueryHostSupportsBgraFormat();
if (Config.OmapDepth)
if (TranslatorContext.Definitions.OmapDepth)
{
Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr);
Operand src = Register(TranslatorContext.GetDepthRegister(), RegisterType.Gpr);
this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src);
}
AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare();
AlphaTestOp alphaTestOp = TranslatorContext.Definitions.AlphaTestCompare;
if (alphaTestOp != AlphaTestOp.Always)
{
@ -389,7 +349,7 @@ namespace Ryujinx.Graphics.Shader.Translation
{
this.Discard();
}
else if ((Config.OmapTargets & 8) != 0)
else if ((TranslatorContext.Definitions.OmapTargets & 8) != 0)
{
Instruction comparator = alphaTestOp switch
{
@ -405,7 +365,7 @@ namespace Ryujinx.Graphics.Shader.Translation
Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\".");
Operand alpha = Register(3, RegisterType.Gpr);
Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference());
Operand alphaRef = ConstF(TranslatorContext.Definitions.AlphaTestReference);
Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef);
Operand alphaPassLabel = Label();
@ -427,7 +387,7 @@ namespace Ryujinx.Graphics.Shader.Translation
{
for (int component = 0; component < 4; component++)
{
bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0;
bool componentEnabled = (TranslatorContext.Definitions.OmapTargets & (1 << (rtIndex * 4 + component))) != 0;
if (!componentEnabled)
{
continue;
@ -460,10 +420,9 @@ namespace Ryujinx.Graphics.Shader.Translation
}
}
bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0;
bool targetEnabled = (TranslatorContext.Definitions.OmapTargets & (0xf << (rtIndex * 4))) != 0;
if (targetEnabled)
{
Config.SetOutputUserAttribute(rtIndex);
regIndexBase += 4;
}
}
@ -475,7 +434,7 @@ namespace Ryujinx.Graphics.Shader.Translation
private void GenerateAlphaToCoverageDitherDiscard()
{
// If the feature is disabled, or alpha is not written, then we're done.
if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0)
if (!TranslatorContext.Definitions.AlphaToCoverageDitherEnable || (TranslatorContext.Definitions.OmapTargets & 8) == 0)
{
return;
}

View file

@ -12,15 +12,12 @@ namespace Ryujinx.Graphics.Shader.Translation
None = 0,
// Affected by resolution scaling.
IntegerSampling = 1 << 0,
FragCoordXY = 1 << 1,
Bindless = 1 << 2,
InstanceId = 1 << 3,
DrawParameters = 1 << 4,
RtLayer = 1 << 5,
IaIndexing = 1 << 7,
OaIndexing = 1 << 8,
FixedFuncAttr = 1 << 9,
LocalMemory = 1 << 10,
SharedMemory = 1 << 11,

View file

@ -0,0 +1,34 @@
namespace Ryujinx.Graphics.Shader.Translation
{
class HostCapabilities
{
public readonly bool ReducedPrecision;
public readonly bool SupportsFragmentShaderInterlock;
public readonly bool SupportsFragmentShaderOrderingIntel;
public readonly bool SupportsGeometryShaderPassthrough;
public readonly bool SupportsShaderBallot;
public readonly bool SupportsShaderBarrierDivergence;
public readonly bool SupportsTextureShadowLod;
public readonly bool SupportsViewportMask;
public HostCapabilities(
bool reducedPrecision,
bool supportsFragmentShaderInterlock,
bool supportsFragmentShaderOrderingIntel,
bool supportsGeometryShaderPassthrough,
bool supportsShaderBallot,
bool supportsShaderBarrierDivergence,
bool supportsTextureShadowLod,
bool supportsViewportMask)
{
ReducedPrecision = reducedPrecision;
SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock;
SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel;
SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough;
SupportsShaderBallot = supportsShaderBallot;
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportMask = supportsViewportMask;
}
}
}

View file

@ -1,12 +1,13 @@
using Ryujinx.Graphics.Shader.Instructions;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
class BindlessElimination
{
public static void RunPass(BasicBlock block, ShaderConfig config)
public static void RunPass(BasicBlock block, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
{
// We can turn a bindless into regular access by recognizing the pattern
// produced by the compiler for separate texture and sampler.
@ -43,7 +44,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (bindlessHandle.Type == OperandType.ConstantBuffer)
{
SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false);
SetHandle(
resourceManager,
gpuAccessor,
texOp,
bindlessHandle.GetCbufOffset(),
bindlessHandle.GetCbufSlot(),
rewriteSamplerType,
isImage: false);
continue;
}
@ -140,7 +149,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (handleType == TextureHandleType.SeparateConstantSamplerHandle)
{
SetHandle(
config,
resourceManager,
gpuAccessor,
texOp,
TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType),
TextureHandle.PackSlots(src0.GetCbufSlot(), 0),
@ -150,7 +160,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
else if (src1.Type == OperandType.ConstantBuffer)
{
SetHandle(
config,
resourceManager,
gpuAccessor,
texOp,
TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
@ -173,17 +184,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
if (texOp.Inst == Instruction.ImageAtomic)
{
texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot);
texOp.Format = ShaderProperties.GetTextureFormatAtomic(gpuAccessor, cbufOffset, cbufSlot);
}
else
{
texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
texOp.Format = ShaderProperties.GetTextureFormat(gpuAccessor, cbufOffset, cbufSlot);
}
}
bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer;
SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
SetHandle(resourceManager, gpuAccessor, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
}
}
}
@ -220,11 +231,18 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return null;
}
private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage)
private static void SetHandle(
ResourceManager resourceManager,
IGpuAccessor gpuAccessor,
TextureOperation texOp,
int cbufOffset,
int cbufSlot,
bool rewriteSamplerType,
bool isImage)
{
if (rewriteSamplerType)
{
SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);
SamplerType newType = gpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);
if (texOp.Inst.IsTextureQuery())
{
@ -253,7 +271,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
int binding = config.ResourceManager.GetTextureOrImageBinding(
int binding = resourceManager.GetTextureOrImageBinding(
texOp.Inst,
texOp.Type,
texOp.Format,

View file

@ -9,7 +9,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
private const int NvnTextureBufferIndex = 2;
public static void RunPass(BasicBlock block, ShaderConfig config)
public static void RunPass(BasicBlock block, ResourceManager resourceManager)
{
// We can turn a bindless texture access into a indexed access,
// as long the following conditions are true:
@ -44,7 +44,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
Operand ldcSrc0 = handleAsgOp.GetSource(0);
if (ldcSrc0.Type != OperandType.Constant ||
!config.ResourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) ||
!resourceManager.TryGetConstantBufferSlot(ldcSrc0.Value, out int src0CbufSlot) ||
src0CbufSlot != NvnTextureBufferIndex)
{
continue;
@ -88,7 +88,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
continue;
}
TurnIntoIndexed(config, texOp, addSrc1.Value / 4);
TurnIntoIndexed(resourceManager, texOp, addSrc1.Value / 4);
Operand index = Local();
@ -102,9 +102,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle)
private static void TurnIntoIndexed(ResourceManager resourceManager, TextureOperation texOp, int handle)
{
int binding = config.ResourceManager.GetTextureOrImageBinding(
int binding = resourceManager.GetTextureOrImageBinding(
texOp.Inst,
texOp.Type | SamplerType.Indexed,
texOp.Format,

View file

@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class ConstantFolding
{
public static void RunPass(ShaderConfig config, Operation operation)
public static void RunPass(ResourceManager resourceManager, Operation operation)
{
if (!AreAllSourcesConstant(operation))
{
@ -158,7 +158,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
int binding = operation.GetSource(0).Value;
int fieldIndex = operation.GetSource(1).Value;
if (config.ResourceManager.TryGetConstantBufferSlot(binding, out int cbufSlot) && fieldIndex == 0)
if (resourceManager.TryGetConstantBufferSlot(binding, out int cbufSlot) && fieldIndex == 0)
{
int vecIndex = operation.GetSource(2).Value;
int elemIndex = operation.GetSource(3).Value;

View file

@ -205,7 +205,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
public static void RunPass(
HelperFunctionManager hfm,
BasicBlock[] blocks,
ResourceManager resourceManager,
IGpuAccessor gpuAccessor,
TargetLanguage targetLanguage)
{
GtsContext gtsContext = new(hfm);
@ -220,14 +225,20 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (IsGlobalMemory(operation.StorageKind))
{
LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node);
LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(
gtsContext,
resourceManager,
gpuAccessor,
targetLanguage,
block,
node);
if (nextNode == null)
{
// The returned value being null means that the global memory replacement failed,
// so we just make loads read 0 and stores do nothing.
config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\".");
gpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\".");
if (operation.Dest != null)
{
@ -286,7 +297,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
private static LinkedListNode<INode> ReplaceGlobalMemoryWithStorage(
GtsContext gtsContext,
ShaderConfig config,
ResourceManager resourceManager,
IGpuAccessor gpuAccessor,
TargetLanguage targetLanguage,
BasicBlock block,
LinkedListNode<INode> node)
{
@ -303,7 +316,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
Operand offset = result.Offset;
bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer();
bool storageUnaligned = gpuAccessor.QueryHasUnalignedStorageBuffer();
if (storageUnaligned)
{
@ -312,7 +325,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
Operand baseAddressMasked = Local();
Operand hostOffset = Local();
int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
int alignment = gpuAccessor.QueryHostStorageBufferOffsetAlignment();
Operation maskOp = new(Instruction.BitwiseAnd, baseAddressMasked, baseAddress, Const(-alignment));
Operation subOp = new(Instruction.Subtract, hostOffset, globalAddress, baseAddressMasked);
@ -333,13 +346,19 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
offset = newOffset;
}
if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage))
if (CanUseInlineStorageOp(operation, targetLanguage))
{
return GenerateInlineStorageOp(config, node, operation, offset, result);
return GenerateInlineStorageOp(resourceManager, node, operation, offset, result);
}
else
{
if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId))
if (!TryGenerateSingleTargetStorageOp(
gtsContext,
resourceManager,
targetLanguage,
operation,
result,
out int functionId))
{
return null;
}
@ -354,7 +373,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
// the base address might be stored.
// Generate a helper function that will check all possible storage buffers and use the right one.
if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId))
if (!TryGenerateMultiTargetStorageOp(
gtsContext,
resourceManager,
gpuAccessor,
targetLanguage,
block,
operation,
out int functionId))
{
return null;
}
@ -375,14 +401,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
private static LinkedListNode<INode> GenerateInlineStorageOp(
ShaderConfig config,
ResourceManager resourceManager,
LinkedListNode<INode> node,
Operation operation,
Operand offset,
SearchResult result)
{
bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic();
if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
if (!resourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
{
return null;
}
@ -474,7 +500,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
private static bool TryGenerateSingleTargetStorageOp(
GtsContext gtsContext,
ShaderConfig config,
ResourceManager resourceManager,
TargetLanguage targetLanguage,
Operation operation,
SearchResult result,
out int functionId)
@ -514,7 +541,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
if (!TryGenerateStorageOp(
config,
resourceManager,
targetLanguage,
context,
operation.Inst,
operation.StorageKind,
@ -555,7 +583,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
private static bool TryGenerateMultiTargetStorageOp(
GtsContext gtsContext,
ShaderConfig config,
ResourceManager resourceManager,
IGpuAccessor gpuAccessor,
TargetLanguage targetLanguage,
BasicBlock block,
Operation operation,
out int functionId)
@ -624,7 +654,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
if (targetCbs.Count == 0)
{
config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\".");
gpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\".");
}
if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId))
@ -685,13 +715,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
SearchResult result = new(sbCbSlot, sbCbOffset);
int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment();
int alignment = gpuAccessor.QueryHostStorageBufferOffsetAlignment();
Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment));
Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked);
if (!TryGenerateStorageOp(
config,
resourceManager,
targetLanguage,
context,
operation.Inst,
operation.StorageKind,
@ -781,7 +812,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
private static bool TryGenerateStorageOp(
ShaderConfig config,
ResourceManager resourceManager,
TargetLanguage targetLanguage,
EmitterContext context,
Instruction inst,
StorageKind storageKind,
@ -794,7 +826,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
resultValue = null;
bool isStore = inst.IsAtomic() || inst == Instruction.Store;
if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
if (!resourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding))
{
return false;
}
@ -820,7 +852,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value);
break;
case Instruction.AtomicMaxS32:
if (config.Options.TargetLanguage == TargetLanguage.Spirv)
if (targetLanguage == TargetLanguage.Spirv)
{
resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
}
@ -836,7 +868,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
break;
case Instruction.AtomicMinS32:
if (config.Options.TargetLanguage == TargetLanguage.Spirv)
if (targetLanguage == TargetLanguage.Spirv)
{
resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value);
}

View file

@ -7,40 +7,40 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class Optimizer
{
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
public static void RunPass(TransformContext context)
{
RunOptimizationPasses(blocks, config);
RunOptimizationPasses(context.Blocks, context.ResourceManager);
// TODO: Some of those are not optimizations and shouldn't be here.
GlobalToStorage.RunPass(hfm, blocks, config);
GlobalToStorage.RunPass(context.Hfm, context.Blocks, context.ResourceManager, context.GpuAccessor, context.TargetLanguage);
bool hostSupportsShaderFloat64 = config.GpuAccessor.QueryHostSupportsShaderFloat64();
bool hostSupportsShaderFloat64 = context.GpuAccessor.QueryHostSupportsShaderFloat64();
// Those passes are looking for specific patterns and only needs to run once.
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++)
{
BindlessToIndexed.RunPass(blocks[blkIndex], config);
BindlessElimination.RunPass(blocks[blkIndex], config);
BindlessToIndexed.RunPass(context.Blocks[blkIndex], context.ResourceManager);
BindlessElimination.RunPass(context.Blocks[blkIndex], context.ResourceManager, context.GpuAccessor);
// FragmentCoord only exists on fragment shaders, so we don't need to check other stages.
if (config.Stage == ShaderStage.Fragment)
if (context.Stage == ShaderStage.Fragment)
{
EliminateMultiplyByFragmentCoordW(blocks[blkIndex]);
EliminateMultiplyByFragmentCoordW(context.Blocks[blkIndex]);
}
// If the host does not support double operations, we need to turn them into float operations.
if (!hostSupportsShaderFloat64)
{
DoubleToFloat.RunPass(hfm, blocks[blkIndex]);
DoubleToFloat.RunPass(context.Hfm, context.Blocks[blkIndex]);
}
}
// Run optimizations one last time to remove any code that is now optimizable after above passes.
RunOptimizationPasses(blocks, config);
RunOptimizationPasses(context.Blocks, context.ResourceManager);
}
private static void RunOptimizationPasses(BasicBlock[] blocks, ShaderConfig config)
private static void RunOptimizationPasses(BasicBlock[] blocks, ResourceManager resourceManager)
{
bool modified;
@ -79,7 +79,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
continue;
}
ConstantFolding.RunPass(config, operation);
ConstantFolding.RunPass(resourceManager, operation);
Simplification.RunPass(operation);
if (DestIsLocalVar(operation))

View file

@ -50,10 +50,10 @@ namespace Ryujinx.Graphics.Shader.Translation
public ShaderProperties Properties { get; }
public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties)
public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor)
{
_gpuAccessor = gpuAccessor;
Properties = properties;
Properties = new();
_stage = stage;
_stagePrefix = GetShaderStagePrefix(stage);
@ -62,15 +62,15 @@ namespace Ryujinx.Graphics.Shader.Translation
_cbSlotToBindingMap.AsSpan().Fill(-1);
_sbSlotToBindingMap.AsSpan().Fill(-1);
_sbSlots = new Dictionary<int, int>();
_sbSlotsReverse = new Dictionary<int, int>();
_sbSlots = new();
_sbSlotsReverse = new();
_usedConstantBufferBindings = new HashSet<int>();
_usedConstantBufferBindings = new();
_usedTextures = new Dictionary<TextureInfo, TextureMeta>();
_usedImages = new Dictionary<TextureInfo, TextureMeta>();
_usedTextures = new();
_usedImages = new();
properties.AddOrUpdateConstantBuffer(0, new BufferDefinition(BufferLayout.Std140, 0, 0, "support_buffer", SupportBuffer.GetStructureType()));
Properties.AddOrUpdateConstantBuffer(new(BufferLayout.Std140, 0, SupportBuffer.Binding, "support_buffer", SupportBuffer.GetStructureType()));
LocalMemoryId = -1;
SharedMemoryId = -1;
@ -312,11 +312,11 @@ namespace Ryujinx.Graphics.Shader.Translation
if (isImage)
{
Properties.AddOrUpdateImage(binding, definition);
Properties.AddOrUpdateImage(definition);
}
else
{
Properties.AddOrUpdateTexture(binding, definition);
Properties.AddOrUpdateTexture(definition);
}
if (layer == 0)
@ -500,7 +500,7 @@ namespace Ryujinx.Graphics.Shader.Translation
new StructureField(AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32, "data", Constants.ConstantBufferSize / 16),
});
Properties.AddOrUpdateConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type));
Properties.AddOrUpdateConstantBuffer(new(BufferLayout.Std140, 0, binding, name, type));
}
private void AddNewStorageBuffer(int binding, string name)
@ -510,7 +510,7 @@ namespace Ryujinx.Graphics.Shader.Translation
new StructureField(AggregateType.Array | AggregateType.U32, "data", 0),
});
Properties.AddOrUpdateStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type));
Properties.AddOrUpdateStorageBuffer(new(BufferLayout.Std430, 1, binding, name, type));
}
public static string GetShaderStagePrefix(ShaderStage stage)

View file

@ -1,639 +0,0 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using System;
using System.Collections.Generic;
using System.Numerics;
namespace Ryujinx.Graphics.Shader.Translation
{
class ShaderConfig
{
private const int ThreadsPerWarp = 32;
public ShaderStage Stage { get; }
public bool GpPassthrough { get; }
public bool LastInVertexPipeline { get; private set; }
public bool HasLayerInputAttribute { get; private set; }
public int GpLayerInputAttribute { get; private set; }
public int ThreadsPerInputPrimitive { get; }
public OutputTopology OutputTopology { get; }
public int MaxOutputVertices { get; }
public int LocalMemorySize { get; }
public ImapPixelType[] ImapTypes { get; }
public int OmapTargets { get; }
public bool OmapSampleMask { get; }
public bool OmapDepth { get; }
public IGpuAccessor GpuAccessor { get; }
public TranslationOptions Options { get; }
public ShaderProperties Properties => ResourceManager.Properties;
public ResourceManager ResourceManager { get; set; }
public bool TransformFeedbackEnabled { get; }
private TransformFeedbackOutput[] _transformFeedbackOutputs;
readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
{
public IoVariable IoVariable { get; }
public int Location { get; }
public int Component { get; }
public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
{
IoVariable = ioVariable;
Location = location;
Component = component;
}
public override bool Equals(object other)
{
return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
}
public bool Equals(TransformFeedbackVariable other)
{
return IoVariable == other.IoVariable &&
Location == other.Location &&
Component == other.Component;
}
public override int GetHashCode()
{
return (int)IoVariable | (Location << 8) | (Component << 16);
}
public override string ToString()
{
return $"{IoVariable}.{Location}.{Component}";
}
}
private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
public int Size { get; private set; }
public byte ClipDistancesWritten { get; private set; }
public FeatureFlags UsedFeatures { get; private set; }
public int Cb1DataSize { get; private set; }
public bool LayerOutputWritten { get; private set; }
public int LayerOutputAttribute { get; private set; }
public bool NextUsesFixedFuncAttributes { get; private set; }
public int UsedInputAttributes { get; private set; }
public int UsedOutputAttributes { get; private set; }
public HashSet<int> UsedInputAttributesPerPatch { get; }
public HashSet<int> UsedOutputAttributesPerPatch { get; }
public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
public int PassthroughAttributes { get; private set; }
private int _nextUsedInputAttributes;
private int _thisUsedInputAttributes;
private Dictionary<int, int> _perPatchAttributeLocations;
public UInt128 NextInputAttributesComponents { get; private set; }
public UInt128 ThisInputAttributesComponents { get; private set; }
public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize)
{
Stage = stage;
GpuAccessor = gpuAccessor;
Options = options;
LocalMemorySize = localMemorySize;
_transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
TransformFeedbackEnabled =
stage != ShaderStage.Compute &&
gpuAccessor.QueryTransformFeedbackEnabled() &&
gpuAccessor.QueryHostSupportsTransformFeedback();
UsedInputAttributesPerPatch = new HashSet<int>();
UsedOutputAttributesPerPatch = new HashSet<int>();
ShaderProperties properties;
switch (stage)
{
case ShaderStage.Fragment:
bool originUpperLeft = options.TargetApi == TargetApi.Vulkan || gpuAccessor.QueryYNegateEnabled();
properties = new ShaderProperties(originUpperLeft);
break;
default:
properties = new ShaderProperties();
break;
}
ResourceManager = new ResourceManager(stage, gpuAccessor, properties);
if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled())
{
StructureType tfeInfoStruct = new(new StructureField[]
{
new(AggregateType.Array | AggregateType.U32, "base_offset", 4),
new(AggregateType.U32, "vertex_count"),
});
BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct);
properties.AddOrUpdateStorageBuffer(Constants.TfeInfoBinding, tfeInfoBuffer);
StructureType tfeDataStruct = new(new StructureField[]
{
new(AggregateType.Array | AggregateType.U32, "data", 0),
});
for (int i = 0; i < Constants.TfeBuffersCount; i++)
{
int binding = Constants.TfeBufferBaseBinding + i;
BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct);
properties.AddOrUpdateStorageBuffer(binding, tfeDataBuffer);
}
}
}
public ShaderConfig(
ShaderStage stage,
OutputTopology outputTopology,
int maxOutputVertices,
IGpuAccessor gpuAccessor,
TranslationOptions options) : this(stage, gpuAccessor, options, 0)
{
ThreadsPerInputPrimitive = 1;
OutputTopology = outputTopology;
MaxOutputVertices = maxOutputVertices;
}
public ShaderConfig(
ShaderHeader header,
IGpuAccessor gpuAccessor,
TranslationOptions options) : this(header.Stage, gpuAccessor, options, GetLocalMemorySize(header))
{
GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
OutputTopology = header.OutputTopology;
MaxOutputVertices = header.MaxOutputVertexCount;
ImapTypes = header.ImapTypes;
OmapTargets = header.OmapTargets;
OmapSampleMask = header.OmapSampleMask;
OmapDepth = header.OmapDepth;
LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
}
private static int GetLocalMemorySize(ShaderHeader header)
{
return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
}
private void EnsureTransformFeedbackInitialized()
{
if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null)
{
TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
ulong vecMap = 0UL;
for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
{
var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
for (int i = 0; i < locations.Length; i++)
{
byte wordOffset = locations[i];
if (wordOffset < 0xc0)
{
transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
vecMap |= 1UL << (wordOffset / 4);
}
}
}
_transformFeedbackOutputs = transformFeedbackOutputs;
while (vecMap != 0)
{
int vecIndex = BitOperations.TrailingZeroCount(vecMap);
for (int subIndex = 0; subIndex < 4; subIndex++)
{
int wordOffset = vecIndex * 4 + subIndex;
int byteOffset = wordOffset * 4;
if (transformFeedbackOutputs[wordOffset].Valid)
{
IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
int component = 0;
if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
{
component = subIndex;
}
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
_transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
}
}
vecMap &= ~(1UL << vecIndex);
}
}
}
public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
{
EnsureTransformFeedbackInitialized();
return _transformFeedbackOutputs;
}
public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
{
EnsureTransformFeedbackInitialized();
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
}
private bool HasTransformFeedbackOutputs()
{
return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
}
public bool HasTransformFeedbackOutputs(bool isOutput)
{
return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
}
public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
{
if (ioVariable == IoVariable.UserDefined)
{
return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
(isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing));
}
return ioVariable == IoVariable.FragmentOutputColor;
}
public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
{
if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
{
return false;
}
return GetTransformFeedbackOutputComponents(location, component) == 1;
}
public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
{
EnsureTransformFeedbackInitialized();
return _transformFeedbackOutputs[wordOffset];
}
public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
{
return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
}
public int GetTransformFeedbackOutputComponents(int location, int component)
{
EnsureTransformFeedbackInitialized();
int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
int index = baseIndex + component;
int count = 1;
for (; count < 4; count++)
{
ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
int prevOffset = prev.Offset;
int currOffset = curr.Offset;
if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
{
break;
}
}
if (baseIndex + count <= index)
{
return 1;
}
return count;
}
public AggregateType GetFragmentOutputColorType(int location)
{
return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType();
}
public AggregateType GetUserDefinedType(int location, bool isOutput)
{
if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
(isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing)))
{
return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
}
AggregateType type = AggregateType.Vector4;
if (Stage == ShaderStage.Vertex && !isOutput)
{
type |= GpuAccessor.QueryAttributeType(location).ToAggregateType();
}
else
{
type |= AggregateType.FP32;
}
return type;
}
public int GetDepthRegister()
{
// The depth register is always two registers after the last color output.
return BitOperations.PopCount((uint)OmapTargets) + 1;
}
public uint ConstantBuffer1Read(int offset)
{
if (Cb1DataSize < offset + 4)
{
Cb1DataSize = offset + 4;
}
return GpuAccessor.ConstantBuffer1Read(offset);
}
public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1)
{
// When the formatted load extension is supported, we don't need to
// specify a format, we can just declare it without a format and the GPU will handle it.
if (GpuAccessor.QueryHostSupportsImageLoadFormatted())
{
return TextureFormat.Unknown;
}
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
if (format == TextureFormat.Unknown)
{
GpuAccessor.Log($"Unknown format for texture {handle}.");
format = TextureFormat.R8G8B8A8Unorm;
}
return format;
}
private static bool FormatSupportsAtomic(TextureFormat format)
{
return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
}
public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
{
// Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
// and must have a type specified. Default to R32Sint if not available.
var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
if (!FormatSupportsAtomic(format))
{
GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
format = TextureFormat.R32Sint;
}
return format;
}
public void SizeAdd(int size)
{
Size += size;
}
public void InheritFrom(ShaderConfig other)
{
ClipDistancesWritten |= other.ClipDistancesWritten;
UsedFeatures |= other.UsedFeatures;
UsedInputAttributes |= other.UsedInputAttributes;
UsedOutputAttributes |= other.UsedOutputAttributes;
}
public void SetLayerOutputAttribute(int attr)
{
LayerOutputWritten = true;
LayerOutputAttribute = attr;
}
public void SetGeometryShaderLayerInputAttribute(int attr)
{
HasLayerInputAttribute = true;
GpLayerInputAttribute = attr;
}
public void SetLastInVertexPipeline()
{
LastInVertexPipeline = true;
}
public void SetInputUserAttributeFixedFunc(int index)
{
UsedInputAttributes |= 1 << index;
}
public void SetOutputUserAttributeFixedFunc(int index)
{
UsedOutputAttributes |= 1 << index;
}
public void SetInputUserAttribute(int index, int component)
{
int mask = 1 << index;
UsedInputAttributes |= mask;
_thisUsedInputAttributes |= mask;
ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
}
public void SetInputUserAttributePerPatch(int index)
{
UsedInputAttributesPerPatch.Add(index);
}
public void SetOutputUserAttribute(int index)
{
UsedOutputAttributes |= 1 << index;
}
public void SetOutputUserAttributePerPatch(int index)
{
UsedOutputAttributesPerPatch.Add(index);
}
public void MergeFromtNextStage(ShaderConfig config)
{
NextInputAttributesComponents = config.ThisInputAttributesComponents;
NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch;
NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr);
MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch);
if (UsedOutputAttributesPerPatch.Count != 0)
{
// Regular and per-patch input/output locations can't overlap,
// so we must assign on our location using unused regular input/output locations.
Dictionary<int, int> locationsMap = new();
int freeMask = ~UsedOutputAttributes;
foreach (int attr in UsedOutputAttributesPerPatch)
{
int location = BitOperations.TrailingZeroCount(freeMask);
if (location == 32)
{
config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
break;
}
locationsMap.Add(attr, location);
freeMask &= ~(1 << location);
}
// Both stages must agree on the locations, so use the same "map" for both.
_perPatchAttributeLocations = locationsMap;
config._perPatchAttributeLocations = locationsMap;
}
// We don't consider geometry shaders using the geometry shader passthrough feature
// as being the last because when this feature is used, it can't actually modify any of the outputs,
// so the stage that comes before it is the last one that can do modifications.
if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough))
{
LastInVertexPipeline = false;
}
}
public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch)
{
_nextUsedInputAttributes = mask;
if (GpPassthrough)
{
PassthroughAttributes = mask & ~UsedOutputAttributes;
}
else
{
UsedOutputAttributes |= mask;
UsedOutputAttributesPerPatch.UnionWith(perPatch);
}
}
public int GetPerPatchAttributeLocation(int index)
{
if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
{
return index;
}
return location;
}
public bool IsUsedOutputAttribute(int attr)
{
// The check for fixed function attributes on the next stage is conservative,
// returning false if the output is just not used by the next stage is also valid.
if (NextUsesFixedFuncAttributes &&
attr >= AttributeConsts.UserAttributeBase &&
attr < AttributeConsts.UserAttributeEnd)
{
int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
return (_nextUsedInputAttributes & (1 << index)) != 0;
}
return true;
}
public int GetFreeUserAttribute(bool isOutput, int index)
{
int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
int bit = -1;
while (useMask != -1)
{
bit = BitOperations.TrailingZeroCount(~useMask);
if (bit == 32)
{
bit = -1;
break;
}
else if (index < 1)
{
break;
}
useMask |= 1 << bit;
index--;
}
return bit;
}
public void SetAllInputUserAttributes()
{
UsedInputAttributes |= Constants.AllAttributesMask;
ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
}
public void SetAllOutputUserAttributes()
{
UsedOutputAttributes |= Constants.AllAttributesMask;
}
public void SetClipDistanceWritten(int index)
{
ClipDistancesWritten |= (byte)(1 << index);
}
public void SetUsedFeature(FeatureFlags flags)
{
UsedFeatures |= flags;
}
public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
{
return new ShaderProgramInfo(
ResourceManager.GetConstantBufferDescriptors(),
ResourceManager.GetStorageBufferDescriptors(),
ResourceManager.GetTextureDescriptors(),
ResourceManager.GetImageDescriptors(),
identification,
GpLayerInputAttribute,
Stage,
UsedFeatures.HasFlag(FeatureFlags.FragCoordXY),
UsedFeatures.HasFlag(FeatureFlags.InstanceId),
UsedFeatures.HasFlag(FeatureFlags.DrawParameters),
UsedFeatures.HasFlag(FeatureFlags.RtLayer),
ClipDistancesWritten,
OmapTargets);
}
}
}

View file

@ -0,0 +1,315 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System;
using System.Collections.Generic;
using System.Numerics;
namespace Ryujinx.Graphics.Shader.Translation
{
class ShaderDefinitions
{
private readonly GpuGraphicsState _graphicsState;
public ShaderStage Stage { get; }
public int ComputeLocalSizeX { get; }
public int ComputeLocalSizeY { get; }
public int ComputeLocalSizeZ { get; }
public bool TessCw => _graphicsState.TessCw;
public TessPatchType TessPatchType => _graphicsState.TessPatchType;
public TessSpacing TessSpacing => _graphicsState.TessSpacing;
public bool AlphaToCoverageDitherEnable => _graphicsState.AlphaToCoverageEnable && _graphicsState.AlphaToCoverageDitherEnable;
public bool ViewportTransformDisable => _graphicsState.ViewportTransformDisable;
public bool DepthMode => _graphicsState.DepthMode;
public float PointSize => _graphicsState.PointSize;
public AlphaTestOp AlphaTestCompare => _graphicsState.AlphaTestCompare;
public float AlphaTestReference => _graphicsState.AlphaTestReference;
public bool GpPassthrough { get; }
public bool LastInVertexPipeline { get; set; }
public int ThreadsPerInputPrimitive { get; }
public InputTopology InputTopology => _graphicsState.Topology;
public OutputTopology OutputTopology { get; }
public int MaxOutputVertices { get; }
public bool DualSourceBlend => _graphicsState.DualSourceBlendEnable;
public bool EarlyZForce => _graphicsState.EarlyZForce;
public bool YNegateEnabled => _graphicsState.YNegateEnabled;
public bool OriginUpperLeft => _graphicsState.OriginUpperLeft;
public ImapPixelType[] ImapTypes { get; }
public bool IaIndexing { get; private set; }
public bool OaIndexing { get; private set; }
public int OmapTargets { get; }
public bool OmapSampleMask { get; }
public bool OmapDepth { get; }
public bool TransformFeedbackEnabled { get; }
private readonly TransformFeedbackOutput[] _transformFeedbackOutputs;
readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
{
public IoVariable IoVariable { get; }
public int Location { get; }
public int Component { get; }
public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
{
IoVariable = ioVariable;
Location = location;
Component = component;
}
public override bool Equals(object other)
{
return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
}
public bool Equals(TransformFeedbackVariable other)
{
return IoVariable == other.IoVariable &&
Location == other.Location &&
Component == other.Component;
}
public override int GetHashCode()
{
return (int)IoVariable | (Location << 8) | (Component << 16);
}
public override string ToString()
{
return $"{IoVariable}.{Location}.{Component}";
}
}
private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
public ShaderDefinitions(ShaderStage stage)
{
Stage = stage;
}
public ShaderDefinitions(
ShaderStage stage,
int computeLocalSizeX,
int computeLocalSizeY,
int computeLocalSizeZ)
{
Stage = stage;
ComputeLocalSizeX = computeLocalSizeX;
ComputeLocalSizeY = computeLocalSizeY;
ComputeLocalSizeZ = computeLocalSizeZ;
}
public ShaderDefinitions(
ShaderStage stage,
GpuGraphicsState graphicsState,
bool gpPassthrough,
int threadsPerInputPrimitive,
OutputTopology outputTopology,
int maxOutputVertices)
{
Stage = stage;
_graphicsState = graphicsState;
GpPassthrough = gpPassthrough;
ThreadsPerInputPrimitive = threadsPerInputPrimitive;
OutputTopology = outputTopology;
MaxOutputVertices = maxOutputVertices;
}
public ShaderDefinitions(
ShaderStage stage,
GpuGraphicsState graphicsState,
bool gpPassthrough,
int threadsPerInputPrimitive,
OutputTopology outputTopology,
int maxOutputVertices,
ImapPixelType[] imapTypes,
int omapTargets,
bool omapSampleMask,
bool omapDepth,
bool transformFeedbackEnabled,
ulong transformFeedbackVecMap,
TransformFeedbackOutput[] transformFeedbackOutputs)
{
Stage = stage;
_graphicsState = graphicsState;
GpPassthrough = gpPassthrough;
ThreadsPerInputPrimitive = threadsPerInputPrimitive;
OutputTopology = outputTopology;
MaxOutputVertices = maxOutputVertices;
ImapTypes = imapTypes;
OmapTargets = omapTargets;
OmapSampleMask = omapSampleMask;
OmapDepth = omapDepth;
LastInVertexPipeline = stage < ShaderStage.Fragment;
TransformFeedbackEnabled = transformFeedbackEnabled;
_transformFeedbackOutputs = transformFeedbackOutputs;
_transformFeedbackDefinitions = new();
while (transformFeedbackVecMap != 0)
{
int vecIndex = BitOperations.TrailingZeroCount(transformFeedbackVecMap);
for (int subIndex = 0; subIndex < 4; subIndex++)
{
int wordOffset = vecIndex * 4 + subIndex;
int byteOffset = wordOffset * 4;
if (transformFeedbackOutputs[wordOffset].Valid)
{
IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
int component = 0;
if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
{
component = subIndex;
}
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
_transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
}
}
transformFeedbackVecMap &= ~(1UL << vecIndex);
}
}
public void EnableInputIndexing()
{
IaIndexing = true;
}
public void EnableOutputIndexing()
{
OaIndexing = true;
}
public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
{
if (!HasTransformFeedbackOutputs())
{
return null;
}
return _transformFeedbackOutputs;
}
public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
{
if (!HasTransformFeedbackOutputs())
{
transformFeedbackOutput = default;
return false;
}
var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
}
private bool HasTransformFeedbackOutputs()
{
return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
}
public bool HasTransformFeedbackOutputs(bool isOutput)
{
return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
}
public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
{
if (ioVariable == IoVariable.UserDefined)
{
return (!isOutput && !IaIndexing) || (isOutput && !OaIndexing);
}
return ioVariable == IoVariable.FragmentOutputColor;
}
public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
{
if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
{
return false;
}
return GetTransformFeedbackOutputComponents(location, component) == 1;
}
public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
{
return _transformFeedbackOutputs[wordOffset];
}
public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
{
return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
}
public int GetTransformFeedbackOutputComponents(int location, int component)
{
int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
int index = baseIndex + component;
int count = 1;
for (; count < 4; count++)
{
ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
int prevOffset = prev.Offset;
int currOffset = curr.Offset;
if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
{
break;
}
}
if (baseIndex + count <= index)
{
return 1;
}
return count;
}
public AggregateType GetFragmentOutputColorType(int location)
{
return AggregateType.Vector4 | _graphicsState.FragmentOutputTypes[location].ToAggregateType();
}
public AggregateType GetUserDefinedType(int location, bool isOutput)
{
if ((!isOutput && IaIndexing) || (isOutput && OaIndexing))
{
return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
}
AggregateType type = AggregateType.Vector4;
if (Stage == ShaderStage.Vertex && !isOutput)
{
type |= _graphicsState.AttributeTypes[location].ToAggregateType();
}
else
{
type |= AggregateType.FP32;
}
return type;
}
}
}

View file

@ -5,18 +5,22 @@ namespace Ryujinx.Graphics.Shader.Translation
{
static class ShaderIdentifier
{
public static ShaderIdentification Identify(IReadOnlyList<Function> functions, ShaderConfig config)
public static ShaderIdentification Identify(
IReadOnlyList<Function> functions,
IGpuAccessor gpuAccessor,
ShaderStage stage,
InputTopology inputTopology,
out int layerInputAttr)
{
if (config.Stage == ShaderStage.Geometry &&
config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles &&
!config.GpuAccessor.QueryHostSupportsGeometryShader() &&
IsLayerPassthroughGeometryShader(functions, out int layerInputAttr))
if (stage == ShaderStage.Geometry &&
inputTopology == InputTopology.Triangles &&
!gpuAccessor.QueryHostSupportsGeometryShader() &&
IsLayerPassthroughGeometryShader(functions, out layerInputAttr))
{
config.SetGeometryShaderLayerInputAttribute(layerInputAttr);
return ShaderIdentification.GeometryLayerPassthrough;
}
layerInputAttr = 0;
return ShaderIdentification.None;
}

View file

@ -0,0 +1,33 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
namespace Ryujinx.Graphics.Shader.Translation
{
readonly ref struct TransformContext
{
public readonly HelperFunctionManager Hfm;
public readonly BasicBlock[] Blocks;
public readonly ResourceManager ResourceManager;
public readonly IGpuAccessor GpuAccessor;
public readonly TargetLanguage TargetLanguage;
public readonly ShaderStage Stage;
public readonly ref FeatureFlags UsedFeatures;
public TransformContext(
HelperFunctionManager hfm,
BasicBlock[] blocks,
ResourceManager resourceManager,
IGpuAccessor gpuAccessor,
TargetLanguage targetLanguage,
ShaderStage stage,
ref FeatureFlags usedFeatures)
{
Hfm = hfm;
Blocks = blocks;
ResourceManager = resourceManager;
GpuAccessor = gpuAccessor;
TargetLanguage = targetLanguage;
Stage = stage;
UsedFeatures = ref usedFeatures;
}
}
}

View file

@ -0,0 +1,18 @@
namespace Ryujinx.Graphics.Shader.Translation
{
readonly struct TransformFeedbackOutput
{
public readonly bool Valid;
public readonly int Buffer;
public readonly int Offset;
public readonly int Stride;
public TransformFeedbackOutput(int buffer, int offset, int stride)
{
Valid = true;
Buffer = buffer;
Offset = offset;
Stride = stride;
}
}
}

View file

@ -0,0 +1,93 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
class DrawParametersReplace : ITransformPass
{
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
{
return stage == ShaderStage.Vertex;
}
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
if (context.GpuAccessor.QueryHasConstantBufferDrawParameters())
{
if (ReplaceConstantBufferWithDrawParameters(node, operation))
{
context.UsedFeatures |= FeatureFlags.DrawParameters;
}
}
else if (HasConstantBufferDrawParameters(operation))
{
context.UsedFeatures |= FeatureFlags.DrawParameters;
}
return node;
}
private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
{
Operand GenerateLoad(IoVariable ioVariable)
{
Operand value = Local();
node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
return value;
}
bool modified = false;
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand src = operation.GetSource(srcIndex);
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
{
switch (src.GetCbufOffset())
{
case Constants.NvnBaseVertexByteOffset / 4:
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
modified = true;
break;
case Constants.NvnBaseInstanceByteOffset / 4:
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
modified = true;
break;
case Constants.NvnDrawIndexByteOffset / 4:
operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
modified = true;
break;
}
}
}
return modified;
}
private static bool HasConstantBufferDrawParameters(Operation operation)
{
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand src = operation.GetSource(srcIndex);
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
{
switch (src.GetCbufOffset())
{
case Constants.NvnBaseVertexByteOffset / 4:
case Constants.NvnBaseInstanceByteOffset / 4:
case Constants.NvnDrawIndexByteOffset / 4:
return true;
}
}
}
return false;
}
}
}

View file

@ -0,0 +1,36 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
class ForcePreciseEnable : ITransformPass
{
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
{
return stage == ShaderStage.Fragment && gpuAccessor.QueryHostReducedPrecision();
}
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
{
// There are some cases where a small bias is added to values to prevent division by zero.
// When operating with reduced precision, it is possible for this bias to get rounded to 0
// and cause a division by zero.
// To prevent that, we force those operations to be precise even if the host wants
// imprecise operations for performance.
Operation operation = (Operation)node.Value;
if (operation.Inst == (Instruction.FP32 | Instruction.Divide) &&
operation.GetSource(0).Type == OperandType.Constant &&
operation.GetSource(0).AsFloat() == 1f &&
operation.GetSource(1).AsgOp is Operation addOp &&
addOp.Inst == (Instruction.FP32 | Instruction.Add) &&
addOp.GetSource(1).Type == OperandType.Constant)
{
addOp.ForcePrecise = true;
}
return node;
}
}
}

View file

@ -0,0 +1,11 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
interface ITransformPass
{
abstract static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures);
abstract static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node);
}
}

View file

@ -0,0 +1,58 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using System.Collections.Generic;
using System.Diagnostics;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
class SharedAtomicSignedCas : ITransformPass
{
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
{
return targetLanguage != TargetLanguage.Spirv && stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory);
}
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
HelperFunctionName name;
if (operation.Inst == Instruction.AtomicMaxS32)
{
name = HelperFunctionName.SharedAtomicMaxS32;
}
else if (operation.Inst == Instruction.AtomicMinS32)
{
name = HelperFunctionName.SharedAtomicMinS32;
}
else
{
return node;
}
if (operation.StorageKind != StorageKind.SharedMemory)
{
return node;
}
Operand result = operation.Dest;
Operand memoryId = operation.GetSource(0);
Operand byteOffset = operation.GetSource(1);
Operand value = operation.GetSource(2);
Debug.Assert(memoryId.Type == OperandType.Constant);
int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value);
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
Utils.DeleteNode(node, operation);
return newNode;
}
}
}

View file

@ -0,0 +1,57 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using System.Collections.Generic;
using System.Diagnostics;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
class SharedStoreSmallIntCas : ITransformPass
{
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
{
return stage == ShaderStage.Compute && usedFeatures.HasFlag(FeatureFlags.SharedMemory);
}
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
HelperFunctionName name;
if (operation.StorageKind == StorageKind.SharedMemory8)
{
name = HelperFunctionName.SharedStore8;
}
else if (operation.StorageKind == StorageKind.SharedMemory16)
{
name = HelperFunctionName.SharedStore16;
}
else
{
return node;
}
if (operation.Inst != Instruction.Store)
{
return node;
}
Operand memoryId = operation.GetSource(0);
Operand byteOffset = operation.GetSource(1);
Operand value = operation.GetSource(2);
Debug.Assert(memoryId.Type == OperandType.Constant);
int functionId = context.Hfm.GetOrCreateFunctionId(name, memoryId.Value);
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
Utils.DeleteNode(node, operation);
return newNode;
}
}
}

View file

@ -1,268 +1,45 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
static class Rewriter
class TexturePass : ITransformPass
{
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
{
bool isVertexShader = config.Stage == ShaderStage.Vertex;
bool isImpreciseFragmentShader = config.Stage == ShaderStage.Fragment && config.GpuAccessor.QueryHostReducedPrecision();
bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
bool hasVectorIndexingBug = config.GpuAccessor.QueryHostHasVectorIndexingBug();
bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
return true;
}
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
{
if (node.Value is TextureOperation texOp)
{
BasicBlock block = blocks[blkIndex];
node = InsertTexelFetchScale(context.Hfm, node, context.ResourceManager, context.Stage);
node = InsertTextureSizeUnscale(context.Hfm, node, context.ResourceManager, context.Stage);
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
if (texOp.Inst == Instruction.TextureSample)
{
if (node.Value is not Operation operation)
node = InsertCoordNormalization(context.Hfm, node, context.ResourceManager, context.GpuAccessor, context.Stage);
node = InsertCoordGatherBias(node, context.ResourceManager, context.GpuAccessor);
node = InsertConstOffsets(node, context.ResourceManager, context.GpuAccessor);
if (texOp.Type == SamplerType.TextureBuffer && !context.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat())
{
continue;
}
if (isVertexShader)
{
if (hasConstantBufferDrawParameters)
{
if (ReplaceConstantBufferWithDrawParameters(node, operation))
{
config.SetUsedFeature(FeatureFlags.DrawParameters);
}
}
else if (HasConstantBufferDrawParameters(operation))
{
config.SetUsedFeature(FeatureFlags.DrawParameters);
}
}
if (isImpreciseFragmentShader)
{
EnableForcePreciseIfNeeded(operation);
}
if (hasVectorIndexingBug)
{
InsertVectorComponentSelect(node, config);
}
if (operation is TextureOperation texOp)
{
node = InsertTexelFetchScale(hfm, node, config);
node = InsertTextureSizeUnscale(hfm, node, config);
if (texOp.Inst == Instruction.TextureSample)
{
node = InsertCoordNormalization(hfm, node, config);
node = InsertCoordGatherBias(node, config);
node = InsertConstOffsets(node, config);
if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
{
node = InsertSnormNormalization(node, config);
}
}
}
else
{
node = InsertSharedStoreSmallInt(hfm, node);
if (config.Options.TargetLanguage != TargetLanguage.Spirv)
{
node = InsertSharedAtomicSigned(hfm, node);
}
node = InsertSnormNormalization(node, context.ResourceManager, context.GpuAccessor);
}
}
}
return node;
}
private static void EnableForcePreciseIfNeeded(Operation operation)
{
// There are some cases where a small bias is added to values to prevent division by zero.
// When operating with reduced precision, it is possible for this bias to get rounded to 0
// and cause a division by zero.
// To prevent that, we force those operations to be precise even if the host wants
// imprecise operations for performance.
if (operation.Inst == (Instruction.FP32 | Instruction.Divide) &&
operation.GetSource(0).Type == OperandType.Constant &&
operation.GetSource(0).AsFloat() == 1f &&
operation.GetSource(1).AsgOp is Operation addOp &&
addOp.Inst == (Instruction.FP32 | Instruction.Add) &&
addOp.GetSource(1).Type == OperandType.Constant)
{
addOp.ForcePrecise = true;
}
}
private static void InsertVectorComponentSelect(LinkedListNode<INode> node, ShaderConfig config)
{
Operation operation = (Operation)node.Value;
if (operation.Inst != Instruction.Load ||
operation.StorageKind != StorageKind.ConstantBuffer ||
operation.SourcesCount < 3)
{
return;
}
Operand bindingIndex = operation.GetSource(0);
Operand fieldIndex = operation.GetSource(1);
Operand elemIndex = operation.GetSource(operation.SourcesCount - 1);
if (bindingIndex.Type != OperandType.Constant ||
fieldIndex.Type != OperandType.Constant ||
elemIndex.Type == OperandType.Constant)
{
return;
}
BufferDefinition buffer = config.Properties.ConstantBuffers[bindingIndex.Value];
StructureField field = buffer.Type.Fields[fieldIndex.Value];
int elemCount = (field.Type & AggregateType.ElementCountMask) switch
{
AggregateType.Vector2 => 2,
AggregateType.Vector3 => 3,
AggregateType.Vector4 => 4,
_ => 1,
};
if (elemCount == 1)
{
return;
}
Operand result = null;
for (int i = 0; i < elemCount; i++)
{
Operand value = Local();
Operand[] inputs = new Operand[operation.SourcesCount];
for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++)
{
inputs[srcIndex] = operation.GetSource(srcIndex);
}
inputs[^1] = Const(i);
Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs);
node.List.AddBefore(node, loadOp);
if (i == 0)
{
result = value;
}
else
{
Operand isCurrentIndex = Local();
Operand selection = Local();
Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) });
Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result });
node.List.AddBefore(node, compareOp);
node.List.AddBefore(node, selectOp);
result = selection;
}
}
operation.TurnIntoCopy(result);
}
private static LinkedListNode<INode> InsertSharedStoreSmallInt(HelperFunctionManager hfm, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
HelperFunctionName name;
if (operation.StorageKind == StorageKind.SharedMemory8)
{
name = HelperFunctionName.SharedStore8;
}
else if (operation.StorageKind == StorageKind.SharedMemory16)
{
name = HelperFunctionName.SharedStore16;
}
else
{
return node;
}
if (operation.Inst != Instruction.Store)
{
return node;
}
Operand memoryId = operation.GetSource(0);
Operand byteOffset = operation.GetSource(1);
Operand value = operation.GetSource(2);
Debug.Assert(memoryId.Type == OperandType.Constant);
int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs));
Utils.DeleteNode(node, operation);
return newNode;
}
private static LinkedListNode<INode> InsertSharedAtomicSigned(HelperFunctionManager hfm, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
HelperFunctionName name;
if (operation.Inst == Instruction.AtomicMaxS32)
{
name = HelperFunctionName.SharedAtomicMaxS32;
}
else if (operation.Inst == Instruction.AtomicMinS32)
{
name = HelperFunctionName.SharedAtomicMinS32;
}
else
{
return node;
}
if (operation.StorageKind != StorageKind.SharedMemory)
{
return node;
}
Operand result = operation.Dest;
Operand memoryId = operation.GetSource(0);
Operand byteOffset = operation.GetSource(1);
Operand value = operation.GetSource(2);
Debug.Assert(memoryId.Type == OperandType.Constant);
int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value);
Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value };
LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs));
Utils.DeleteNode(node, operation);
return newNode;
}
private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
private static LinkedListNode<INode> InsertTexelFetchScale(
HelperFunctionManager hfm,
LinkedListNode<INode> node,
ResourceManager resourceManager,
ShaderStage stage)
{
TextureOperation texOp = (TextureOperation)node.Value;
@ -280,20 +57,20 @@ namespace Ryujinx.Graphics.Shader.Translation
(intCoords || isImage) &&
!isBindless &&
!isIndexed &&
config.Stage.SupportsRenderScale() &&
stage.SupportsRenderScale() &&
TypeSupportsScale(texOp.Type))
{
int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TexelFetchScale);
int samplerIndex = isImage
? config.ResourceManager.GetTextureDescriptors().Length + config.ResourceManager.FindImageDescriptorIndex(texOp.Binding)
: config.ResourceManager.FindTextureDescriptorIndex(texOp.Binding);
? resourceManager.GetTextureDescriptors().Length + resourceManager.FindImageDescriptorIndex(texOp.Binding)
: resourceManager.FindTextureDescriptorIndex(texOp.Binding);
for (int index = 0; index < coordsCount; index++)
{
Operand scaledCoord = Local();
Operand[] callArgs;
if (config.Stage == ShaderStage.Fragment)
if (stage == ShaderStage.Fragment)
{
callArgs = new Operand[] { Const(functionId), texOp.GetSource(coordsIndex + index), Const(samplerIndex), Const(index) };
}
@ -311,7 +88,11 @@ namespace Ryujinx.Graphics.Shader.Translation
return node;
}
private static LinkedListNode<INode> InsertTextureSizeUnscale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
private static LinkedListNode<INode> InsertTextureSizeUnscale(
HelperFunctionManager hfm,
LinkedListNode<INode> node,
ResourceManager resourceManager,
ShaderStage stage)
{
TextureOperation texOp = (TextureOperation)node.Value;
@ -322,11 +103,11 @@ namespace Ryujinx.Graphics.Shader.Translation
texOp.Index < 2 &&
!isBindless &&
!isIndexed &&
config.Stage.SupportsRenderScale() &&
stage.SupportsRenderScale() &&
TypeSupportsScale(texOp.Type))
{
int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.TextureSizeUnscale);
int samplerIndex = config.ResourceManager.FindTextureDescriptorIndex(texOp.Binding);
int samplerIndex = resourceManager.FindTextureDescriptorIndex(texOp.Binding);
for (int index = texOp.DestsCount - 1; index >= 0; index--)
{
@ -356,19 +137,12 @@ namespace Ryujinx.Graphics.Shader.Translation
return node;
}
private static bool IsImageInstructionWithScale(Instruction inst)
{
// Currently, we don't support scaling images that are modified,
// so we only need to care about the load instruction.
return inst == Instruction.ImageLoad;
}
private static bool TypeSupportsScale(SamplerType type)
{
return (type & SamplerType.Mask) == SamplerType.Texture2D;
}
private static LinkedListNode<INode> InsertCoordNormalization(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config)
private static LinkedListNode<INode> InsertCoordNormalization(
HelperFunctionManager hfm,
LinkedListNode<INode> node,
ResourceManager resourceManager,
IGpuAccessor gpuAccessor,
ShaderStage stage)
{
// Emulate non-normalized coordinates by normalizing the coordinates on the shader.
// Without normalization, the coordinates are expected to the in the [0, W or H] range,
@ -386,9 +160,9 @@ namespace Ryujinx.Graphics.Shader.Translation
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
(int cbufSlot, int handle) = config.ResourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
(int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
bool isCoordNormalized = config.GpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot);
bool isCoordNormalized = gpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot);
if (isCoordNormalized || intCoords)
{
@ -400,8 +174,6 @@ namespace Ryujinx.Graphics.Shader.Translation
int coordsCount = texOp.Type.GetDimensions();
int coordsIndex = isBindless || isIndexed ? 1 : 0;
config.SetUsedFeature(FeatureFlags.IntegerSampling);
int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
for (int index = 0; index < normCoordsCount; index++)
@ -429,7 +201,7 @@ namespace Ryujinx.Graphics.Shader.Translation
new[] { coordSize },
texSizeSources));
config.ResourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type);
resourceManager.SetUsageFlagsForTextureQuery(texOp.Binding, texOp.Type);
Operand source = texOp.GetSource(coordsIndex + index);
@ -439,13 +211,13 @@ namespace Ryujinx.Graphics.Shader.Translation
texOp.SetSource(coordsIndex + index, coordNormalized);
InsertTextureSizeUnscale(hfm, textureSizeNode, config);
InsertTextureSizeUnscale(hfm, textureSizeNode, resourceManager, stage);
}
return node;
}
private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ShaderConfig config)
private static LinkedListNode<INode> InsertCoordGatherBias(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
{
// The gather behavior when the coordinate sits right in the middle of two texels is not well defined.
// To ensure the correct texel is sampled, we add a small bias value to the coordinate.
@ -457,25 +229,18 @@ namespace Ryujinx.Graphics.Shader.Translation
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
int gatherBiasPrecision = config.GpuAccessor.QueryHostGatherBiasPrecision();
int gatherBiasPrecision = gpuAccessor.QueryHostGatherBiasPrecision();
if (!isGather || gatherBiasPrecision == 0)
{
return node;
}
#pragma warning disable IDE0059 // Remove unnecessary value assignment
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
#pragma warning restore IDE0059
int coordsCount = texOp.Type.GetDimensions();
int coordsIndex = isBindless || isIndexed ? 1 : 0;
config.SetUsedFeature(FeatureFlags.IntegerSampling);
int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
for (int index = 0; index < normCoordsCount; index++)
@ -524,7 +289,7 @@ namespace Ryujinx.Graphics.Shader.Translation
return node;
}
private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ShaderConfig config)
private static LinkedListNode<INode> InsertConstOffsets(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
{
// Non-constant texture offsets are not allowed (according to the spec),
// however some GPUs does support that.
@ -540,7 +305,7 @@ namespace Ryujinx.Graphics.Shader.Translation
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset();
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
@ -673,8 +438,6 @@ namespace Ryujinx.Graphics.Shader.Translation
if (isGather && !isShadow)
{
config.SetUsedFeature(FeatureFlags.IntegerSampling);
Operand[] newSources = new Operand[sources.Length];
sources.CopyTo(newSources, 0);
@ -741,8 +504,6 @@ namespace Ryujinx.Graphics.Shader.Translation
}
else
{
config.SetUsedFeature(FeatureFlags.IntegerSampling);
Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount);
for (int index = 0; index < coordsCount; index++)
@ -840,7 +601,7 @@ namespace Ryujinx.Graphics.Shader.Translation
return texSizes;
}
private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ResourceManager resourceManager, IGpuAccessor gpuAccessor)
{
TextureOperation texOp = (TextureOperation)node.Value;
@ -851,9 +612,9 @@ namespace Ryujinx.Graphics.Shader.Translation
return node;
}
(int cbufSlot, int handle) = config.ResourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
(int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding);
TextureFormat format = config.GpuAccessor.QueryTextureFormat(handle, cbufSlot);
TextureFormat format = gpuAccessor.QueryTextureFormat(handle, cbufSlot);
int maxPositive = format switch
{
@ -926,63 +687,16 @@ namespace Ryujinx.Graphics.Shader.Translation
return res;
}
private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
private static bool IsImageInstructionWithScale(Instruction inst)
{
Operand GenerateLoad(IoVariable ioVariable)
{
Operand value = Local();
node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
return value;
}
bool modified = false;
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand src = operation.GetSource(srcIndex);
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
{
switch (src.GetCbufOffset())
{
case Constants.NvnBaseVertexByteOffset / 4:
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
modified = true;
break;
case Constants.NvnBaseInstanceByteOffset / 4:
operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
modified = true;
break;
case Constants.NvnDrawIndexByteOffset / 4:
operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
modified = true;
break;
}
}
}
return modified;
// Currently, we don't support scaling images that are modified,
// so we only need to care about the load instruction.
return inst == Instruction.ImageLoad;
}
private static bool HasConstantBufferDrawParameters(Operation operation)
private static bool TypeSupportsScale(SamplerType type)
{
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
Operand src = operation.GetSource(srcIndex);
if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
{
switch (src.GetCbufOffset())
{
case Constants.NvnBaseVertexByteOffset / 4:
case Constants.NvnBaseInstanceByteOffset / 4:
case Constants.NvnDrawIndexByteOffset / 4:
return true;
}
}
}
return false;
return (type & SamplerType.Mask) == SamplerType.Texture2D;
}
}
}

View file

@ -0,0 +1,41 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
static class TransformPasses
{
public static void RunPass(TransformContext context)
{
RunPass<DrawParametersReplace>(context);
RunPass<ForcePreciseEnable>(context);
RunPass<VectorComponentSelect>(context);
RunPass<TexturePass>(context);
RunPass<SharedStoreSmallIntCas>(context);
RunPass<SharedAtomicSignedCas>(context);
}
private static void RunPass<T>(TransformContext context) where T : ITransformPass
{
if (!T.IsEnabled(context.GpuAccessor, context.Stage, context.TargetLanguage, context.UsedFeatures))
{
return;
}
for (int blkIndex = 0; blkIndex < context.Blocks.Length; blkIndex++)
{
BasicBlock block = context.Blocks[blkIndex];
for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
{
if (node.Value is not Operation)
{
continue;
}
node = T.RunPass(context, node);
}
}
}
}
}

View file

@ -0,0 +1,96 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation.Transforms
{
class VectorComponentSelect : ITransformPass
{
public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures)
{
return gpuAccessor.QueryHostHasVectorIndexingBug();
}
public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node)
{
Operation operation = (Operation)node.Value;
if (operation.Inst != Instruction.Load ||
operation.StorageKind != StorageKind.ConstantBuffer ||
operation.SourcesCount < 3)
{
return node;
}
Operand bindingIndex = operation.GetSource(0);
Operand fieldIndex = operation.GetSource(1);
Operand elemIndex = operation.GetSource(operation.SourcesCount - 1);
if (bindingIndex.Type != OperandType.Constant ||
fieldIndex.Type != OperandType.Constant ||
elemIndex.Type == OperandType.Constant)
{
return node;
}
BufferDefinition buffer = context.ResourceManager.Properties.ConstantBuffers[bindingIndex.Value];
StructureField field = buffer.Type.Fields[fieldIndex.Value];
int elemCount = (field.Type & AggregateType.ElementCountMask) switch
{
AggregateType.Vector2 => 2,
AggregateType.Vector3 => 3,
AggregateType.Vector4 => 4,
_ => 1
};
if (elemCount == 1)
{
return node;
}
Operand result = null;
for (int i = 0; i < elemCount; i++)
{
Operand value = Local();
Operand[] inputs = new Operand[operation.SourcesCount];
for (int srcIndex = 0; srcIndex < inputs.Length - 1; srcIndex++)
{
inputs[srcIndex] = operation.GetSource(srcIndex);
}
inputs[^1] = Const(i);
Operation loadOp = new(Instruction.Load, StorageKind.ConstantBuffer, value, inputs);
node.List.AddBefore(node, loadOp);
if (i == 0)
{
result = value;
}
else
{
Operand isCurrentIndex = Local();
Operand selection = Local();
Operation compareOp = new(Instruction.CompareEqual, isCurrentIndex, new Operand[] { elemIndex, Const(i) });
Operation selectOp = new(Instruction.ConditionalSelect, selection, new Operand[] { isCurrentIndex, value, result });
node.List.AddBefore(node, compareOp);
node.List.AddBefore(node, selectOp);
result = selection;
}
}
operation.TurnIntoCopy(result);
return node;
}
}
}

View file

@ -1,11 +1,6 @@
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
using Ryujinx.Graphics.Shader.CodeGen.Spirv;
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using System;
using System.Collections.Generic;
using System.Linq;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
@ -13,6 +8,7 @@ namespace Ryujinx.Graphics.Shader.Translation
{
public static class Translator
{
private const int ThreadsPerWarp = 32;
private const int HeaderSize = 0x50;
internal readonly struct FunctionCode
@ -30,94 +26,31 @@ namespace Ryujinx.Graphics.Shader.Translation
return DecodeShader(address, gpuAccessor, options);
}
internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config)
{
var cfgs = new ControlFlowGraph[functions.Length];
var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
for (int i = 0; i < functions.Length; i++)
{
cfgs[i] = ControlFlowGraph.Create(functions[i].Code);
if (i != 0)
{
frus[i] = RegisterUsage.RunPass(cfgs[i]);
}
}
List<Function> funcs = new(functions.Length);
for (int i = 0; i < functions.Length; i++)
{
funcs.Add(null);
}
HelperFunctionManager hfm = new(funcs, config.Stage);
for (int i = 0; i < functions.Length; i++)
{
var cfg = cfgs[i];
int inArgumentsCount = 0;
int outArgumentsCount = 0;
if (i != 0)
{
var fru = frus[i];
inArgumentsCount = fru.InArguments.Length;
outArgumentsCount = fru.OutArguments.Length;
}
if (cfg.Blocks.Length != 0)
{
RegisterUsage.FixupCalls(cfg.Blocks, frus);
Dominance.FindDominators(cfg);
Dominance.FindDominanceFrontiers(cfg.Blocks);
Ssa.Rename(cfg.Blocks);
Optimizer.RunPass(hfm, cfg.Blocks, config);
Rewriter.RunPass(hfm, cfg.Blocks, config);
}
funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount);
}
var identification = ShaderIdentifier.Identify(funcs, config);
var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config);
var info = config.CreateProgramInfo(identification);
return config.Options.TargetLanguage switch
{
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
_ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()),
};
}
private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
{
ShaderConfig config;
int localMemorySize;
ShaderDefinitions definitions;
DecodedProgram program;
ulong maxEndAddress = 0;
if (options.Flags.HasFlag(TranslationFlags.Compute))
{
config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options, gpuAccessor.QueryComputeLocalMemorySize());
definitions = CreateComputeDefinitions(gpuAccessor);
localMemorySize = gpuAccessor.QueryComputeLocalMemorySize();
program = Decoder.Decode(config, address);
program = Decoder.Decode(definitions, gpuAccessor, address);
}
else
{
config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options);
ShaderHeader header = new(gpuAccessor, address);
program = Decoder.Decode(config, address + HeaderSize);
definitions = CreateGraphicsDefinitions(gpuAccessor, header);
localMemorySize = GetLocalMemorySize(header);
program = Decoder.Decode(definitions, gpuAccessor, address + HeaderSize);
}
ulong maxEndAddress = 0;
foreach (DecodedFunction function in program)
{
foreach (Block block in function.Blocks)
@ -129,12 +62,76 @@ namespace Ryujinx.Graphics.Shader.Translation
}
}
config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));
int size = (int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize);
return new TranslatorContext(address, program, config);
return new TranslatorContext(address, size, localMemorySize, definitions, gpuAccessor, options, program);
}
internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations)
private static ShaderDefinitions CreateComputeDefinitions(IGpuAccessor gpuAccessor)
{
return new ShaderDefinitions(
ShaderStage.Compute,
gpuAccessor.QueryComputeLocalSizeX(),
gpuAccessor.QueryComputeLocalSizeY(),
gpuAccessor.QueryComputeLocalSizeZ());
}
private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header)
{
bool transformFeedbackEnabled =
gpuAccessor.QueryTransformFeedbackEnabled() &&
gpuAccessor.QueryHostSupportsTransformFeedback();
TransformFeedbackOutput[] transformFeedbackOutputs = null;
ulong transformFeedbackVecMap = 0UL;
if (transformFeedbackEnabled)
{
transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
{
var locations = gpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
var stride = gpuAccessor.QueryTransformFeedbackStride(tfbIndex);
for (int i = 0; i < locations.Length; i++)
{
byte wordOffset = locations[i];
if (wordOffset < 0xc0)
{
transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
transformFeedbackVecMap |= 1UL << (wordOffset / 4);
}
}
}
}
return new ShaderDefinitions(
header.Stage,
gpuAccessor.QueryGraphicsState(),
header.Stage == ShaderStage.Geometry && header.GpPassthrough,
header.ThreadsPerInputPrimitive,
header.OutputTopology,
header.MaxOutputVertexCount,
header.ImapTypes,
header.OmapTargets,
header.OmapSampleMask,
header.OmapDepth,
transformFeedbackEnabled,
transformFeedbackVecMap,
transformFeedbackOutputs);
}
private static int GetLocalMemorySize(ShaderHeader header)
{
return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
}
internal static FunctionCode[] EmitShader(
TranslatorContext translatorContext,
ResourceManager resourceManager,
DecodedProgram program,
bool initializeOutputs,
out int initializationOperations)
{
initializationOperations = 0;
@ -149,11 +146,11 @@ namespace Ryujinx.Graphics.Shader.Translation
for (int index = 0; index < functions.Length; index++)
{
EmitterContext context = new(program, config, index != 0);
EmitterContext context = new(translatorContext, resourceManager, program, index != 0);
if (initializeOutputs && index == 0)
{
EmitOutputsInitialization(context, config);
EmitOutputsInitialization(context, translatorContext.AttributeUsage, translatorContext.GpuAccessor, translatorContext.Stage);
initializationOperations = context.OperationsCount;
}
@ -168,27 +165,27 @@ namespace Ryujinx.Graphics.Shader.Translation
EmitOps(context, block);
}
functions[index] = new FunctionCode(context.GetOperations());
functions[index] = new(context.GetOperations());
}
return functions;
}
private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config)
private static void EmitOutputsInitialization(EmitterContext context, AttributeUsage attributeUsage, IGpuAccessor gpuAccessor, ShaderStage stage)
{
// Compute has no output attributes, and fragment is the last stage, so we
// don't need to initialize outputs on those stages.
if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment)
if (stage == ShaderStage.Compute || stage == ShaderStage.Fragment)
{
return;
}
if (config.Stage == ShaderStage.Vertex)
if (stage == ShaderStage.Vertex)
{
InitializePositionOutput(context);
}
UInt128 usedAttributes = context.Config.NextInputAttributesComponents;
UInt128 usedAttributes = context.TranslatorContext.AttributeUsage.NextInputAttributesComponents;
while (usedAttributes != UInt128.Zero)
{
int index = (int)UInt128.TrailingZeroCount(usedAttributes);
@ -197,7 +194,7 @@ namespace Ryujinx.Graphics.Shader.Translation
usedAttributes &= ~(UInt128.One << index);
// We don't need to initialize passthrough attributes.
if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0)
if ((context.TranslatorContext.AttributeUsage.PassthroughAttributes & (1 << vecIndex)) != 0)
{
continue;
}
@ -205,30 +202,28 @@ namespace Ryujinx.Graphics.Shader.Translation
InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false);
}
if (context.Config.NextUsedInputAttributesPerPatch != null)
if (context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch != null)
{
foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order())
foreach (int vecIndex in context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch.Order())
{
InitializeOutput(context, vecIndex, perPatch: true);
}
}
if (config.NextUsesFixedFuncAttributes)
if (attributeUsage.NextUsesFixedFuncAttributes)
{
bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
bool supportsLayerFromVertexOrTess = gpuAccessor.QueryHostSupportsLayerVertexTessellation();
int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++)
{
int index = config.GetFreeUserAttribute(isOutput: true, i);
int index = attributeUsage.GetFreeUserAttribute(isOutput: true, i);
if (index < 0)
{
break;
}
InitializeOutput(context, index, perPatch: false);
config.SetOutputUserAttributeFixedFunc(index);
}
}
}
@ -253,11 +248,11 @@ namespace Ryujinx.Graphics.Shader.Translation
{
StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing))
if (context.TranslatorContext.Definitions.OaIndexing)
{
Operand invocationId = null;
if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
{
invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
}
@ -268,7 +263,7 @@ namespace Ryujinx.Graphics.Shader.Translation
}
else
{
if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
{
Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f));
@ -286,7 +281,7 @@ namespace Ryujinx.Graphics.Shader.Translation
{
InstOp op = block.OpCodes[opIndex];
if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode))
if (context.TranslatorContext.Options.Flags.HasFlag(TranslationFlags.DebugMode))
{
string instName;
@ -298,7 +293,7 @@ namespace Ryujinx.Graphics.Shader.Translation
{
instName = "???";
context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
context.TranslatorContext.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
}
string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";

View file

@ -1,8 +1,11 @@
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
using Ryujinx.Graphics.Shader.CodeGen;
using Ryujinx.Graphics.Shader.CodeGen.Glsl;
using Ryujinx.Graphics.Shader.CodeGen.Spirv;
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation.Optimizations;
using Ryujinx.Graphics.Shader.Translation.Transforms;
using System;
using System.Collections.Generic;
using System.Linq;
@ -15,22 +18,47 @@ namespace Ryujinx.Graphics.Shader.Translation
public class TranslatorContext
{
private readonly DecodedProgram _program;
private readonly ShaderConfig _config;
private readonly int _localMemorySize;
public ulong Address { get; }
public int Size { get; }
public int Cb1DataSize => _program.Cb1DataSize;
public ShaderStage Stage => _config.Stage;
public int Size => _config.Size;
public int Cb1DataSize => _config.Cb1DataSize;
public bool LayerOutputWritten => _config.LayerOutputWritten;
internal bool HasLayerInputAttribute { get; private set; }
internal int GpLayerInputAttribute { get; private set; }
public IGpuAccessor GpuAccessor => _config.GpuAccessor;
internal AttributeUsage AttributeUsage => _program.AttributeUsage;
internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config)
internal ShaderDefinitions Definitions { get; }
public ShaderStage Stage => Definitions.Stage;
internal IGpuAccessor GpuAccessor { get; }
internal TranslationOptions Options { get; }
internal FeatureFlags UsedFeatures { get; private set; }
public bool LayerOutputWritten { get; private set; }
public int LayerOutputAttribute { get; private set; }
internal TranslatorContext(
ulong address,
int size,
int localMemorySize,
ShaderDefinitions definitions,
IGpuAccessor gpuAccessor,
TranslationOptions options,
DecodedProgram program)
{
Address = address;
Size = size;
_program = program;
_config = config;
_localMemorySize = localMemorySize;
Definitions = definitions;
GpuAccessor = gpuAccessor;
Options = options;
UsedFeatures = program.UsedFeatures;
}
private static bool IsLoadUserDefined(Operation operation)
@ -131,63 +159,259 @@ namespace Ryujinx.Graphics.Shader.Translation
return output;
}
public void SetNextStage(TranslatorContext nextStage)
internal int GetDepthRegister()
{
_config.MergeFromtNextStage(nextStage._config);
// The depth register is always two registers after the last color output.
return BitOperations.PopCount((uint)Definitions.OmapTargets) + 1;
}
public void SetLayerOutputAttribute(int attr)
{
LayerOutputWritten = true;
LayerOutputAttribute = attr;
}
public void SetGeometryShaderLayerInputAttribute(int attr)
{
_config.SetGeometryShaderLayerInputAttribute(attr);
UsedFeatures |= FeatureFlags.RtLayer;
HasLayerInputAttribute = true;
GpLayerInputAttribute = attr;
}
public void SetLastInVertexPipeline()
{
_config.SetLastInVertexPipeline();
Definitions.LastInVertexPipeline = true;
}
public ShaderProgram Translate(TranslatorContext other = null)
public void SetNextStage(TranslatorContext nextStage)
{
bool usesLocalMemory = _config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
AttributeUsage.MergeFromtNextStage(
Definitions.GpPassthrough,
nextStage.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr),
nextStage.AttributeUsage);
_config.ResourceManager.SetCurrentLocalMemory(_config.LocalMemorySize, usesLocalMemory);
if (_config.Stage == ShaderStage.Compute)
// We don't consider geometry shaders using the geometry shader passthrough feature
// as being the last because when this feature is used, it can't actually modify any of the outputs,
// so the stage that comes before it is the last one that can do modifications.
if (nextStage.Definitions.Stage != ShaderStage.Fragment &&
(nextStage.Definitions.Stage != ShaderStage.Geometry || !nextStage.Definitions.GpPassthrough))
{
bool usesSharedMemory = _config.UsedFeatures.HasFlag(FeatureFlags.SharedMemory);
Definitions.LastInVertexPipeline = false;
}
}
_config.ResourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory);
public ShaderProgram Translate()
{
ResourceManager resourceManager = CreateResourceManager();
bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory);
if (Stage == ShaderStage.Compute)
{
bool usesSharedMemory = _program.UsedFeatures.HasFlag(FeatureFlags.SharedMemory);
resourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory);
}
FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _);
FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: true, out _);
if (other != null)
return Translate(code, resourceManager, UsedFeatures, _program.ClipDistancesWritten);
}
public ShaderProgram Translate(TranslatorContext other)
{
ResourceManager resourceManager = CreateResourceManager();
bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory);
FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: false, out _);
bool otherUsesLocalMemory = other._program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory);
resourceManager.SetCurrentLocalMemory(other._localMemorySize, otherUsesLocalMemory);
FunctionCode[] otherCode = EmitShader(other, resourceManager, other._program, initializeOutputs: true, out int aStart);
code = Combine(otherCode, code, aStart);
return Translate(
code,
resourceManager,
UsedFeatures | other.UsedFeatures,
(byte)(_program.ClipDistancesWritten | other._program.ClipDistancesWritten));
}
private ShaderProgram Translate(FunctionCode[] functions, ResourceManager resourceManager, FeatureFlags usedFeatures, byte clipDistancesWritten)
{
var cfgs = new ControlFlowGraph[functions.Length];
var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
for (int i = 0; i < functions.Length; i++)
{
other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>());
cfgs[i] = ControlFlowGraph.Create(functions[i].Code);
// We need to share the resource manager since both shaders accesses the same constant buffers.
other._config.ResourceManager = _config.ResourceManager;
other._config.ResourceManager.SetCurrentLocalMemory(other._config.LocalMemorySize, other._config.UsedFeatures.HasFlag(FeatureFlags.LocalMemory));
FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart);
code = Combine(otherCode, code, aStart);
_config.InheritFrom(other._config);
if (i != 0)
{
frus[i] = RegisterUsage.RunPass(cfgs[i]);
}
}
return Translator.Translate(code, _config);
List<Function> funcs = new(functions.Length);
for (int i = 0; i < functions.Length; i++)
{
funcs.Add(null);
}
HelperFunctionManager hfm = new(funcs, Definitions.Stage);
for (int i = 0; i < functions.Length; i++)
{
var cfg = cfgs[i];
int inArgumentsCount = 0;
int outArgumentsCount = 0;
if (i != 0)
{
var fru = frus[i];
inArgumentsCount = fru.InArguments.Length;
outArgumentsCount = fru.OutArguments.Length;
}
if (cfg.Blocks.Length != 0)
{
RegisterUsage.FixupCalls(cfg.Blocks, frus);
Dominance.FindDominators(cfg);
Dominance.FindDominanceFrontiers(cfg.Blocks);
Ssa.Rename(cfg.Blocks);
TransformContext context = new(
hfm,
cfg.Blocks,
resourceManager,
GpuAccessor,
Options.TargetLanguage,
Definitions.Stage,
ref usedFeatures);
Optimizer.RunPass(context);
TransformPasses.RunPass(context);
}
funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount);
}
var identification = ShaderIdentifier.Identify(funcs, GpuAccessor, Definitions.Stage, Definitions.InputTopology, out int layerInputAttr);
return Generate(
funcs,
AttributeUsage,
Definitions,
resourceManager,
usedFeatures,
clipDistancesWritten,
identification,
layerInputAttr);
}
private ShaderProgram Generate(
IReadOnlyList<Function> funcs,
AttributeUsage attributeUsage,
ShaderDefinitions definitions,
ResourceManager resourceManager,
FeatureFlags usedFeatures,
byte clipDistancesWritten,
ShaderIdentification identification = ShaderIdentification.None,
int layerInputAttr = 0)
{
var sInfo = StructuredProgram.MakeStructuredProgram(
funcs,
attributeUsage,
definitions,
resourceManager,
Options.Flags.HasFlag(TranslationFlags.DebugMode));
var info = new ShaderProgramInfo(
resourceManager.GetConstantBufferDescriptors(),
resourceManager.GetStorageBufferDescriptors(),
resourceManager.GetTextureDescriptors(),
resourceManager.GetImageDescriptors(),
identification,
layerInputAttr,
definitions.Stage,
usedFeatures.HasFlag(FeatureFlags.FragCoordXY),
usedFeatures.HasFlag(FeatureFlags.InstanceId),
usedFeatures.HasFlag(FeatureFlags.DrawParameters),
usedFeatures.HasFlag(FeatureFlags.RtLayer),
clipDistancesWritten,
definitions.OmapTargets);
var hostCapabilities = new HostCapabilities(
GpuAccessor.QueryHostReducedPrecision(),
GpuAccessor.QueryHostSupportsFragmentShaderInterlock(),
GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel(),
GpuAccessor.QueryHostSupportsGeometryShaderPassthrough(),
GpuAccessor.QueryHostSupportsShaderBallot(),
GpuAccessor.QueryHostSupportsShaderBarrierDivergence(),
GpuAccessor.QueryHostSupportsTextureShadowLod(),
GpuAccessor.QueryHostSupportsViewportMask());
var parameters = new CodeGenParameters(attributeUsage, definitions, resourceManager.Properties, hostCapabilities, GpuAccessor, Options.TargetApi);
return Options.TargetLanguage switch
{
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, parameters)),
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, parameters)),
_ => throw new NotImplementedException(Options.TargetLanguage.ToString()),
};
}
private ResourceManager CreateResourceManager()
{
ResourceManager resourceManager = new(Definitions.Stage, GpuAccessor);
if (!GpuAccessor.QueryHostSupportsTransformFeedback() && GpuAccessor.QueryTransformFeedbackEnabled())
{
StructureType tfeInfoStruct = new(new StructureField[]
{
new StructureField(AggregateType.Array | AggregateType.U32, "base_offset", 4),
new StructureField(AggregateType.U32, "vertex_count")
});
BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct);
resourceManager.Properties.AddOrUpdateStorageBuffer(tfeInfoBuffer);
StructureType tfeDataStruct = new(new StructureField[]
{
new StructureField(AggregateType.Array | AggregateType.U32, "data", 0)
});
for (int i = 0; i < Constants.TfeBuffersCount; i++)
{
int binding = Constants.TfeBufferBaseBinding + i;
BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct);
resourceManager.Properties.AddOrUpdateStorageBuffer(tfeDataBuffer);
}
}
return resourceManager;
}
public ShaderProgram GenerateGeometryPassthrough()
{
int outputAttributesMask = _config.UsedOutputAttributes;
int layerOutputAttr = _config.LayerOutputAttribute;
int outputAttributesMask = AttributeUsage.UsedOutputAttributes;
int layerOutputAttr = LayerOutputAttribute;
OutputTopology outputTopology;
int maxOutputVertices;
switch (GpuAccessor.QueryPrimitiveTopology())
switch (Definitions.InputTopology)
{
case InputTopology.Points:
outputTopology = OutputTopology.PointList;
@ -204,9 +428,10 @@ namespace Ryujinx.Graphics.Shader.Translation
break;
}
ShaderConfig config = new(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options);
var attributeUsage = new AttributeUsage(GpuAccessor);
var resourceManager = new ResourceManager(ShaderStage.Geometry, GpuAccessor);
EmitterContext context = new(default, config, false);
var context = new EmitterContext();
for (int v = 0; v < maxOutputVertices; v++)
{
@ -231,10 +456,7 @@ namespace Ryujinx.Graphics.Shader.Translation
else
{
context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value);
config.SetOutputUserAttribute(attrIndex);
}
config.SetInputUserAttribute(attrIndex, c);
}
}
@ -254,16 +476,15 @@ namespace Ryujinx.Graphics.Shader.Translation
var cfg = ControlFlowGraph.Create(operations);
var function = new Function(cfg.Blocks, "main", false, 0, 0);
var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config);
var definitions = new ShaderDefinitions(
ShaderStage.Geometry,
GpuAccessor.QueryGraphicsState(),
false,
1,
outputTopology,
maxOutputVertices);
var info = config.CreateProgramInfo();
return config.Options.TargetLanguage switch
{
TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
_ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()),
};
return Generate(new[] { function }, attributeUsage, definitions, resourceManager, FeatureFlags.RtLayer, 0);
}
}
}