diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 5e0e6bed..f3870900 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 3184; + private const ulong ShaderCodeGenVersion = 3251; // Progress reporting helpers private volatile int _shaderCount; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs index b1bd8188..2d6607ad 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -250,9 +250,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl : "gl_SubgroupInvocationID"; } - // TODO: There must be a better way to handle this... if (config.Stage == ShaderStage.Fragment) { + // TODO: There must be a better way to handle this... switch (value) { case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])"; diff --git a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs index b7a0caf1..98a43640 100644 --- a/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs +++ b/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs @@ -5144,6 +5144,7 @@ namespace Ryujinx.Graphics.Shader.Decoders public int SrcC => (int)((_opcode >> 39) & 0xFF); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); public bool WriteCC => (_opcode & 0x800000000000) != 0; public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3); public bool DFormat => (_opcode & 0x40000000000000) != 0; @@ -5164,6 +5165,7 @@ namespace Ryujinx.Graphics.Shader.Decoders public int SrcC => (int)((_opcode >> 39) & 0xFF); public int Pred => (int)((_opcode >> 16) & 0x7); public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); public bool WriteCC => (_opcode & 0x800000000000) != 0; public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); diff --git a/Ryujinx.Graphics.Shader/InputTopology.cs b/Ryujinx.Graphics.Shader/InputTopology.cs index 429aa211..da332909 100644 --- a/Ryujinx.Graphics.Shader/InputTopology.cs +++ b/Ryujinx.Graphics.Shader/InputTopology.cs @@ -13,16 +13,28 @@ namespace Ryujinx.Graphics.Shader { public static string ToGlslString(this InputTopology topology) { - switch (topology) + return topology switch { - case InputTopology.Points: return "points"; - case InputTopology.Lines: return "lines"; - case InputTopology.LinesAdjacency: return "lines_adjacency"; - case InputTopology.Triangles: return "triangles"; - case InputTopology.TrianglesAdjacency: return "triangles_adjacency"; - } + InputTopology.Points => "points", + InputTopology.Lines => "lines", + InputTopology.LinesAdjacency => "lines_adjacency", + InputTopology.Triangles => "triangles", + InputTopology.TrianglesAdjacency => "triangles_adjacency", + _ => "points" + }; + } - return "points"; + public static int ToInputVertices(this InputTopology topology) + { + return topology switch + { + InputTopology.Points => 1, + InputTopology.Lines or + InputTopology.LinesAdjacency => 2, + InputTopology.Triangles or + InputTopology.TrianglesAdjacency => 3, + _ => 1 + }; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs index 3fbd0aeb..879075ba 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs @@ -73,6 +73,26 @@ namespace Ryujinx.Graphics.Shader.Instructions }; } + public static Operand Extend(EmitterContext context, Operand src, VectorSelect type) + { + return type switch + { + VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + _ => src + }; + } + public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false) { if (!setCC) @@ -118,6 +138,15 @@ namespace Ryujinx.Graphics.Shader.Instructions } } + public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high) + { + low = context.BitwiseNot(low); + high = context.BitwiseNot(high); + low = AddWithCarry(context, low, Const(1), out Operand carryOut); + high = context.IAdd(high, carryOut); + return (low, high); + } + public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut) { Operand result = context.IAdd(lhs, rhs); diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs index e2131602..1cdb3842 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -168,10 +168,11 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstIsberd op = context.GetOp(); - // This instruction performs a load from ISBE memory, - // however it seems to be only used to get some vertex - // input data, so we instead propagate the offset so that - // it can be used on the attribute load. + // This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory. + // Here, we just propagate the offset, as the result from this instruction is usually + // used with ALD to perform vertex load on geometry or tessellation shaders. + // The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex. + // Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex. context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA)); } diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs index 51b70601..16b02f97 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs @@ -94,31 +94,19 @@ namespace Ryujinx.Graphics.Shader.Instructions case SReg.InvocationInfo: if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment) { - Operand primitiveId = Attribute(AttributeConsts.PrimitiveId); - Operand patchVerticesIn; + // Note: Lowest 8-bits seems to contain some primitive index, + // but it seems to be NVIDIA implementation specific as it's only used + // to calculate ISBE offsets, so we can just keep it as zero. - if (context.Config.Stage == ShaderStage.TessellationEvaluation) + if (context.Config.Stage == ShaderStage.TessellationControl || + context.Config.Stage == ShaderStage.TessellationEvaluation) { - patchVerticesIn = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16)); + src = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16)); } else { - InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); - - int inputVertices = inputTopology switch - { - InputTopology.Points => 1, - InputTopology.Lines or - InputTopology.LinesAdjacency => 2, - InputTopology.Triangles or - InputTopology.TrianglesAdjacency => 3, - _ => 1 - }; - - patchVerticesIn = Const(inputVertices << 16); + src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16); } - - src = context.BitwiseOr(primitiveId, patchVerticesIn); } else { diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs index 43c0035a..2d84c5bd 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs @@ -1,7 +1,9 @@ using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.Translation; using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; namespace Ryujinx.Graphics.Shader.Instructions { @@ -11,8 +13,106 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstVmad op = context.GetOp(); - // TODO: Implement properly. - context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcC)); + bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0; + bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0; + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if (bSigned) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand productLow = context.IMultiply(srcA, srcB); + Operand productHigh; + + if (aSigned == bSigned) + { + productHigh = aSigned + ? context.MultiplyHighS32(srcA, srcB) + : context.MultiplyHighU32(srcA, srcB); + } + else + { + Operand temp = aSigned + ? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))) + : context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))); + + productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB)); + } + + if (op.AvgMode == AvgMode.NegA) + { + (productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh); + } + + Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry); + Operand resHigh = context.IAdd(productHigh, sumCarry); + + if (op.AvgMode == AvgMode.PlusOne) + { + resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry); + resHigh = context.IAdd(resHigh, poCarry); + } + + bool resSigned = op.ASelect == VectorSelect.S32 || + op.BSelect == VectorSelect.S32 || + op.AvgMode == AvgMode.NegB || + op.AvgMode == AvgMode.NegA; + + int shift = op.VideoScale switch + { + VideoScale.Shr7 => 7, + VideoScale.Shr15 => 15, + _ => 0 + }; + + if (shift != 0) + { + // Low = (Low >> Shift) | (High << (32 - Shift)) + // High >>= Shift + resLow = context.ShiftRightU32(resLow, Const(shift)); + resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift))); + resHigh = resSigned + ? context.ShiftRightS32(resHigh, Const(shift)) + : context.ShiftRightU32(resHigh, Const(shift)); + } + + Operand res = resLow; + + if (op.Sat) + { + Operand sign = context.ShiftRightS32(resHigh, Const(31)); + + if (resSigned) + { + Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31))); + Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue)); + res = context.ConditionalSelect(overflow, clampValue, resLow); + } + else + { + Operand overflow = context.ICompareNotEqual(resHigh, Const(0)); + res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow); + } + } + + context.Copy(GetDest(op.Dest), res); + + // TODO: CC. } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs index 120d6f22..67b185ab 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs @@ -13,14 +13,13 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstVmnmx op = context.GetOp(); - Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); Operand srcC = GetSrcReg(context, op.SrcC); - Operand srcB; if (op.BVideo) { - srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); } else { @@ -124,13 +123,12 @@ namespace Ryujinx.Graphics.Shader.Instructions { InstVsetp op = context.GetOp(); - Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); - + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); Operand srcB; if (op.BVideo) { - srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); } else { @@ -181,25 +179,5 @@ namespace Ryujinx.Graphics.Shader.Instructions context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); } - - private static Operand Extend(EmitterContext context, Operand src, VectorSelect type) - { - return type switch - { - VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), - VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), - VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), - VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), - VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), - VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), - VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), - VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), - VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), - VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), - VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), - VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), - _ => src - }; - } } } \ No newline at end of file