Implement BFI, BRK, FLO, FSWZADD, PBK, SHFL and TXD shader instructions, misc. fixes

This commit is contained in:
gdk 2019-10-31 00:29:22 -03:00 committed by Thog
parent d786d8d2b9
commit 278a4c317c
38 changed files with 972 additions and 166 deletions

View file

@ -5,7 +5,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
class CodeGenContext
{
private const string Tab = " ";
public const string Tab = " ";
public ShaderConfig Config { get; }
@ -90,5 +90,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
return indentation;
}
public string GetTabString()
{
return Tab;
}
}
}

View file

@ -1,3 +1,4 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
@ -15,6 +16,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static void Declare(CodeGenContext context, StructuredProgramInfo info)
{
context.AppendLine("#version 420 core");
context.AppendLine("#extension GL_ARB_shader_ballot : enable");
context.AppendLine("#extension GL_ARB_shader_storage_buffer_object : enable");
if (context.Config.Stage == ShaderStage.Compute)
@ -131,6 +133,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
$"local_size_z = {localSizeZ}) in;");
context.AppendLine();
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
}
}
public static void DeclareLocals(CodeGenContext context, StructuredProgramInfo info)
@ -321,6 +348,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
}
}
private static void AppendHelperFunction(CodeGenContext context, string filename)
{
string code = EmbeddedResources.ReadAllText(filename);
context.AppendLine(code.Replace("\t", CodeGenContext.Tab));
context.AppendLine();
}
private static string GetSamplerTypeName(SamplerType type)
{
string typeName;

View file

@ -33,6 +33,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
Declarations.DeclareLocals(context, info);
// Some games will leave some elements of gl_Position uninitialized,
// in those cases, the elements will contain undefined values according
// to the spec, but on NVIDIA they seems to be always initialized to (0, 0, 0, 1),
// so we do explicit initialization to avoid UB on non-NVIDIA gpus.
if (context.Config.Stage == ShaderStage.Vertex)
{
context.AppendLine("gl_Position = vec4(0.0, 0.0, 0.0, 1.0);");
}
// Ensure that unused attributes are set, otherwise the downstream
// compiler may eliminate them.
// (Not needed for fragment shader as it is the last stage).

View file

@ -0,0 +1,11 @@
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
static class HelperFunctionNames
{
public static string Shuffle = "Helper_Shuffle";
public static string ShuffleDown = "Helper_ShuffleDown";
public static string ShuffleUp = "Helper_ShuffleUp";
public static string ShuffleXor = "Helper_ShuffleXor";
public static string SwizzleAdd = "Helper_SwizzleAdd";
}
}

View file

@ -0,0 +1,9 @@
float Helper_Shuffle(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = (index & ~segMask) | minThreadId;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,9 @@
float Helper_ShuffleDown(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB + index;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,8 @@
float Helper_ShuffleUp(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint srcThreadId = gl_SubGroupInvocationARB - index;
return (srcThreadId >= minThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,9 @@
float Helper_ShuffleXor(float x, uint index, uint mask)
{
uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB ^ index;
return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
}

View file

@ -0,0 +1,7 @@
float Helper_SwizzleAdd(float x, float y, int mask)
{
vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
int lutIdx = mask >> int(gl_SubGroupInvocationARB & 3u) * 2;
return x * xLut[lutIdx] + y * yLut[lutIdx];
}

View file

@ -15,6 +15,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.Absolute, InstType.CallUnary, "abs");
Add(Instruction.Add, InstType.OpBinaryCom, "+", 2);
Add(Instruction.BitCount, InstType.CallUnary, "bitCount");
Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert");
@ -41,11 +42,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ConvertS32ToFP, InstType.CallUnary, "float");
Add(Instruction.ConvertU32ToFP, InstType.CallUnary, "float");
Add(Instruction.Cosine, InstType.CallUnary, "cos");
Add(Instruction.Ddx, InstType.CallUnary, "dFdx");
Add(Instruction.Ddy, InstType.CallUnary, "dFdy");
Add(Instruction.Discard, InstType.OpNullary, "discard");
Add(Instruction.Divide, InstType.OpBinary, "/", 1);
Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex");
Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive");
Add(Instruction.ExponentB2, InstType.CallUnary, "exp2");
Add(Instruction.FindFirstSetS32, InstType.CallUnary, "findMSB");
Add(Instruction.FindFirstSetU32, InstType.CallUnary, "findMSB");
Add(Instruction.Floor, InstType.CallUnary, "floor");
Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma");
Add(Instruction.ImageLoad, InstType.Special);
@ -66,6 +71,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3);
Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3);
Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3);
Add(Instruction.Shuffle, InstType.CallTernary, HelperFunctionNames.Shuffle);
Add(Instruction.ShuffleDown, InstType.CallTernary, HelperFunctionNames.ShuffleDown);
Add(Instruction.ShuffleUp, InstType.CallTernary, HelperFunctionNames.ShuffleUp);
Add(Instruction.ShuffleXor, InstType.CallTernary, HelperFunctionNames.ShuffleXor);
Add(Instruction.Maximum, InstType.CallBinary, "max");
Add(Instruction.MaximumU32, InstType.CallBinary, "max");
Add(Instruction.Minimum, InstType.CallBinary, "min");
@ -80,6 +89,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.StoreLocal, InstType.Special);
Add(Instruction.StoreStorage, InstType.Special);
Add(Instruction.Subtract, InstType.OpBinary, "-", 2);
Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd);
Add(Instruction.TextureSample, InstType.Special);
Add(Instruction.TextureSize, InstType.Special);
Add(Instruction.Truncate, InstType.CallUnary, "trunc");

View file

@ -164,13 +164,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
AstTextureOperation texOp = (AstTextureOperation)operation;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
@ -190,6 +191,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
texCall += "Gather";
}
else if (hasDerivatives)
{
texCall += "Grad";
}
else if (hasLodLevel && !intCoords)
{
texCall += "Lod";
@ -297,6 +302,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Append(AssemblePVector(pCount));
string AssembleDerivativesVector(int count)
{
if (count > 1)
{
string[] elems = new string[count];
for (int index = 0; index < count; index++)
{
elems[index] = Src(VariableType.F32);
}
return "vec" + count + "(" + string.Join(", ", elems) + ")";
}
else
{
return Src(VariableType.F32);
}
}
if (hasDerivatives)
{
Append(AssembleDerivativesVector(coordsCount)); // dPdx
Append(AssembleDerivativesVector(coordsCount)); // dPdy
}
if (hasExtraCompareArg)
{
Append(Src(VariableType.F32));