Add support for shader atomic min/max (S32) (#1948)

This commit is contained in:
gdkchan 2021-01-26 03:38:33 -03:00 committed by GitHub
parent c19cfca183
commit e453ba69f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 103 additions and 21 deletions

View file

@ -157,6 +157,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
} }
} }
if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0)
{
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl");
}
if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0)
{ {
AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl");
@ -523,7 +533,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{ {
string code = EmbeddedResources.ReadAllText(filename); string code = EmbeddedResources.ReadAllText(filename);
context.AppendLine(code.Replace("\t", CodeGenContext.Tab)); code = code.Replace("\t", CodeGenContext.Tab);
code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName);
code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix);
context.AppendLine(code);
context.AppendLine(); context.AppendLine();
} }
} }

View file

@ -0,0 +1,21 @@
int Helper_AtomicMaxS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}
int Helper_AtomicMinS32(int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $SHARED_MEM$[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($SHARED_MEM$[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}

View file

@ -0,0 +1,21 @@
int Helper_AtomicMaxS32(int index, int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(max(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}
int Helper_AtomicMinS32(int index, int offset, int value)
{
uint oldValue, newValue;
do
{
oldValue = $STORAGE_MEM$[index].data[offset];
newValue = uint(min(int(oldValue), value));
} while (atomicCompSwap($STORAGE_MEM$[index].data[offset], newValue, oldValue) != oldValue);
return int(oldValue);
}

View file

@ -2,6 +2,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{ {
static class HelperFunctionNames static class HelperFunctionNames
{ {
public static string AtomicMaxS32 = "Helper_AtomicMaxS32";
public static string AtomicMinS32 = "Helper_AtomicMinS32";
public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; public static string MultiplyHighS32 = "Helper_MultiplyHighS32";
public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; public static string MultiplyHighU32 = "Helper_MultiplyHighU32";

View file

@ -1,6 +1,8 @@
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) { ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
{
float scale = cp_renderScale[samplerIndex]; float scale = cp_renderScale[samplerIndex];
if (scale == 1.0) { if (scale == 1.0)
{
return inputVec; return inputVec;
} }
return ivec2(vec2(inputVec) * scale); return ivec2(vec2(inputVec) * scale);

View file

@ -1,11 +1,16 @@
ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) { ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
{
float scale = fp_renderScale[1 + samplerIndex]; float scale = fp_renderScale[1 + samplerIndex];
if (scale == 1.0) { if (scale == 1.0)
{
return inputVec; return inputVec;
} }
if (scale < 0.0) { // If less than 0, try interpolate between texels by using the screen position. if (scale < 0.0) // If less than 0, try interpolate between texels by using the screen position.
{
return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, -scale)); return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, -scale));
} else { }
else
{
return ivec2(vec2(inputVec) * scale); return ivec2(vec2(inputVec) * scale);
} }
} }

View file

@ -42,13 +42,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
for (int argIndex = 0; argIndex < arity; argIndex++) for (int argIndex = 0; argIndex < arity; argIndex++)
{ {
// For shared memory access, the second argument is unused and should be ignored.
// It is there to make both storage and shared access have the same number of arguments.
if (argIndex == 1 && (inst & Instruction.MrMask) == Instruction.MrShared)
{
continue;
}
if (argIndex != 0) if (argIndex != 0)
{ {
args += ", "; args += ", ";
} }
VariableType dstType = GetSrcVarType(inst, argIndex);
if (argIndex == 0 && atomic) if (argIndex == 0 && atomic)
{ {
Instruction memRegion = inst & Instruction.MrMask; Instruction memRegion = inst & Instruction.MrMask;
@ -60,12 +65,11 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
default: throw new InvalidOperationException($"Invalid memory region \"{memRegion}\"."); default: throw new InvalidOperationException($"Invalid memory region \"{memRegion}\".");
} }
// We use the first 2 operands above.
argIndex++;
} }
else else
{ {
VariableType dstType = GetSrcVarType(inst, argIndex);
args += GetSoureExpr(context, operation.GetSource(argIndex), dstType); args += GetSoureExpr(context, operation.GetSource(argIndex), dstType);
} }
} }

View file

@ -16,9 +16,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd"); Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd");
Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd"); Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd");
Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap"); Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap");
Add(Instruction.AtomicMaxS32, InstType.AtomicBinary, "atomicMax"); Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32);
Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax"); Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax");
Add(Instruction.AtomicMinS32, InstType.AtomicBinary, "atomicMin"); Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32);
Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin"); Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin");
Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr"); Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr");
Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange"); Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange");

View file

@ -9,6 +9,8 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Shared.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Storage.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" />
<EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" /> <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />

View file

@ -5,12 +5,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
[Flags] [Flags]
enum HelperFunctionsMask enum HelperFunctionsMask
{ {
MultiplyHighS32 = 1 << 0, AtomicMinMaxS32Shared = 1 << 0,
MultiplyHighU32 = 1 << 1, AtomicMinMaxS32Storage = 1 << 1,
Shuffle = 1 << 2, MultiplyHighS32 = 1 << 2,
ShuffleDown = 1 << 3, MultiplyHighU32 = 1 << 3,
ShuffleUp = 1 << 4, Shuffle = 1 << 4,
ShuffleXor = 1 << 5, ShuffleDown = 1 << 5,
SwizzleAdd = 1 << 6 ShuffleUp = 1 << 6,
ShuffleXor = 1 << 7,
SwizzleAdd = 1 << 8
} }
} }

View file

@ -244,6 +244,14 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
// decide which helper functions are needed on the final generated code. // decide which helper functions are needed on the final generated code.
switch (operation.Inst) switch (operation.Inst)
{ {
case Instruction.AtomicMaxS32 | Instruction.MrShared:
case Instruction.AtomicMinS32 | Instruction.MrShared:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared;
break;
case Instruction.AtomicMaxS32 | Instruction.MrStorage:
case Instruction.AtomicMinS32 | Instruction.MrStorage:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage;
break;
case Instruction.MultiplyHighS32: case Instruction.MultiplyHighS32:
context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32;
break; break;