From ed754af8d5046d2fd7218c742521e38ab17cbcfe Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 11 Aug 2021 17:27:00 -0300 Subject: [PATCH] Make sure attributes used on subsequent shader stages are initialized (#2538) --- Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs | 214 ++++++++++-------- .../Shader/ShaderDumpPaths.cs | 49 ++++ Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs | 18 +- .../CodeGen/Glsl/Declarations.cs | 63 ++---- .../CodeGen/Glsl/GlslGenerator.cs | 40 ---- Ryujinx.Graphics.Shader/Decoders/Decoder.cs | 49 ++-- .../Decoders/IOpCodeAttribute.cs | 8 + .../Decoders/OpCodeAttribute.cs | 2 +- Ryujinx.Graphics.Shader/Decoders/OpCodeIpa.cs | 3 +- .../StructuredIr/StructuredProgramContext.cs | 35 --- .../StructuredIr/StructuredProgramInfo.cs | 6 - .../Translation/EmitterContext.cs | 3 + .../Translation/ShaderConfig.cs | 28 +++ .../Translation/Translator.cs | 57 ++++- .../Translation/TranslatorContext.cs | 34 ++- 15 files changed, 347 insertions(+), 262 deletions(-) create mode 100644 Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs create mode 100644 Ryujinx.Graphics.Shader/Decoders/IOpCodeAttribute.cs diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index db5a3bff..2c1fb084 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Version of the codegen (to be changed when codegen or guest format change). /// - private const ulong ShaderCodeGenVersion = 2494; + private const ulong ShaderCodeGenVersion = 2538; // Progress reporting helpers private volatile int _shaderCount; @@ -290,6 +290,43 @@ namespace Ryujinx.Graphics.Gpu.Shader { Task compileTask = Task.Run(() => { + TranslatorContext[] shaderContexts = null; + + if (!isHostProgramValid) + { + shaderContexts = new TranslatorContext[1 + entries.Length]; + + for (int i = 0; i < entries.Length; i++) + { + GuestShaderCacheEntry entry = entries[i]; + + if (entry == null) + { + continue; + } + + var binaryCode = new Memory(entry.Code); + + var gpuAccessor = new CachedGpuAccessor( + _context, + binaryCode, + binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize), + entry.Header.GpuAccessorHeader, + entry.TextureDescriptors); + + var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags); + + shaderContexts[i + 1] = Translator.CreateContext(0, gpuAccessor, options, counts); + + if (entry.Header.SizeA != 0) + { + var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags | TranslationFlags.VertexA); + + shaderContexts[0] = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, options2, counts); + } + } + } + // Reconstruct code holder. for (int i = 0; i < entries.Length; i++) { @@ -301,71 +338,30 @@ namespace Ryujinx.Graphics.Gpu.Shader } ShaderProgram program; + ShaderProgramInfo shaderProgramInfo; - if (entry.Header.SizeA != 0) + if (isHostProgramValid) { - ShaderProgramInfo shaderProgramInfo; - - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - var binaryCode = new Memory(entry.Code); - - var gpuAccessor = new CachedGpuAccessor( - _context, - binaryCode, - binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize), - entry.Header.GpuAccessorHeader, - entry.TextureDescriptors); - - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags); - var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags | TranslationFlags.VertexA); - - TranslatorContext translatorContext = Translator.CreateContext(0, gpuAccessor, options, counts); - TranslatorContext translatorContext2 = Translator.CreateContext((ulong)entry.Header.Size, gpuAccessor, options2, counts); - - program = translatorContext.Translate(out shaderProgramInfo, translatorContext2); - } - - // NOTE: Vertex B comes first in the shader cache. - byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray(); - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); + program = new ShaderProgram(entry.Header.Stage, ""); + shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); } else { - ShaderProgramInfo shaderProgramInfo; + int stageIndex = i + 1; - if (isHostProgramValid) - { - program = new ShaderProgram(entry.Header.Stage, ""); - shaderProgramInfo = hostShaderEntries[i].ToShaderProgramInfo(); - } - else - { - var binaryCode = new Memory(entry.Code); + TranslatorContext currentStage = shaderContexts[stageIndex]; + TranslatorContext nextStage = GetNextStageContext(shaderContexts, stageIndex); + TranslatorContext vertexA = stageIndex == 1 ? shaderContexts[0] : null; - var gpuAccessor = new CachedGpuAccessor( - _context, - binaryCode, - binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize), - entry.Header.GpuAccessorHeader, - entry.TextureDescriptors); - - var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags); - program = Translator.CreateContext(0, gpuAccessor, options, counts).Translate(out shaderProgramInfo); - } - - byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); - - shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code); + program = currentStage.Translate(out shaderProgramInfo, nextStage, vertexA); } + // NOTE: Vertex B comes first in the shader cache. + byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); + byte[] code2 = entry.Header.SizeA != 0 ? entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray() : null; + + shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2); + shaderPrograms.Add(program); } }); @@ -591,7 +587,7 @@ namespace Ryujinx.Graphics.Gpu.Shader } // The shader isn't currently cached, translate it and compile it. - ShaderCodeHolder shader = TranslateShader(channel.MemoryManager, shaderContexts[0]); + ShaderCodeHolder shader = TranslateShader(_dumper, channel.MemoryManager, shaderContexts[0], null, null); shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code); @@ -715,11 +711,10 @@ namespace Ryujinx.Graphics.Gpu.Shader // The shader isn't currently cached, translate it and compile it. ShaderCodeHolder[] shaders = new ShaderCodeHolder[Constants.ShaderStages]; - shaders[0] = TranslateShader(channel.MemoryManager, shaderContexts[1], shaderContexts[0]); - shaders[1] = TranslateShader(channel.MemoryManager, shaderContexts[2]); - shaders[2] = TranslateShader(channel.MemoryManager, shaderContexts[3]); - shaders[3] = TranslateShader(channel.MemoryManager, shaderContexts[4]); - shaders[4] = TranslateShader(channel.MemoryManager, shaderContexts[5]); + for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) + { + shaders[stageIndex] = TranslateShader(_dumper, channel.MemoryManager, shaderContexts, stageIndex + 1); + } List hostShaders = new List(); @@ -942,53 +937,94 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Translates a previously generated translator context to something that the host API accepts. /// + /// Optional shader code dumper /// Memory manager used to access the GPU memory where the shader is located - /// Current translator context to translate - /// Optional translator context of the shader that should be combined + /// Translator context of all available shader stages + /// Index on the stages array to translate /// Compiled graphics shader code - private ShaderCodeHolder TranslateShader( + private static ShaderCodeHolder TranslateShader( + ShaderDumper dumper, MemoryManager memoryManager, - TranslatorContext translatorContext, - TranslatorContext translatorContext2 = null) + TranslatorContext[] stages, + int stageIndex) { - if (translatorContext == null) + TranslatorContext currentStage = stages[stageIndex]; + TranslatorContext nextStage = GetNextStageContext(stages, stageIndex); + TranslatorContext vertexA = stageIndex == 1 ? stages[0] : null; + + return TranslateShader(dumper, memoryManager, currentStage, nextStage, vertexA); + } + + /// + /// Gets the next shader stage context, from an array of contexts and index of the current stage. + /// + /// Translator context of all available shader stages + /// Index on the stages array to translate + /// The translator context of the next stage, or null if inexistent + private static TranslatorContext GetNextStageContext(TranslatorContext[] stages, int stageIndex) + { + for (int nextStageIndex = stageIndex + 1; nextStageIndex < stages.Length; nextStageIndex++) + { + if (stages[nextStageIndex] != null) + { + return stages[nextStageIndex]; + } + } + + return null; + } + + /// + /// Translates a previously generated translator context to something that the host API accepts. + /// + /// Optional shader code dumper + /// Memory manager used to access the GPU memory where the shader is located + /// Translator context of the stage to be translated + /// Translator context of the next active stage, if existent + /// Optional translator context of the shader that should be combined + /// Compiled graphics shader code + private static ShaderCodeHolder TranslateShader( + ShaderDumper dumper, + MemoryManager memoryManager, + TranslatorContext currentStage, + TranslatorContext nextStage, + TranslatorContext vertexA) + { + if (currentStage == null) { return null; } - if (translatorContext2 != null) + if (vertexA != null) { - byte[] codeA = memoryManager.GetSpan(translatorContext2.Address, translatorContext2.Size).ToArray(); - byte[] codeB = memoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray(); + byte[] codeA = memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray(); + byte[] codeB = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); - _dumper.Dump(codeA, compute: false, out string fullPathA, out string codePathA); - _dumper.Dump(codeB, compute: false, out string fullPathB, out string codePathB); + ShaderDumpPaths pathsA = default; + ShaderDumpPaths pathsB = default; - ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo, translatorContext2); - - if (fullPathA != null && fullPathB != null && codePathA != null && codePathB != null) + if (dumper != null) { - program.Prepend("// " + codePathB); - program.Prepend("// " + fullPathB); - program.Prepend("// " + codePathA); - program.Prepend("// " + fullPathA); + pathsA = dumper.Dump(codeA, compute: false); + pathsB = dumper.Dump(codeB, compute: false); } + ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage, vertexA); + + pathsB.Prepend(program); + pathsA.Prepend(program); + return new ShaderCodeHolder(program, shaderProgramInfo, codeB, codeA); } else { - byte[] code = memoryManager.GetSpan(translatorContext.Address, translatorContext.Size).ToArray(); + byte[] code = memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray(); - _dumper.Dump(code, translatorContext.Stage == ShaderStage.Compute, out string fullPath, out string codePath); + ShaderDumpPaths paths = dumper?.Dump(code, currentStage.Stage == ShaderStage.Compute) ?? default; - ShaderProgram program = translatorContext.Translate(out ShaderProgramInfo shaderProgramInfo); + ShaderProgram program = currentStage.Translate(out ShaderProgramInfo shaderProgramInfo, nextStage); - if (fullPath != null && codePath != null) - { - program.Prepend("// " + codePath); - program.Prepend("// " + fullPath); - } + paths.Prepend(program); return new ShaderCodeHolder(program, shaderProgramInfo, code); } diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs new file mode 100644 index 00000000..f96a8ce1 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderDumpPaths.cs @@ -0,0 +1,49 @@ +using Ryujinx.Graphics.Shader; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + /// + /// Paths where shader code was dumped on disk. + /// + struct ShaderDumpPaths + { + /// + /// Path where the full shader code with header was dumped, or null if not dumped. + /// + public string FullPath { get; } + + /// + /// Path where the shader code without header was dumped, or null if not dumped. + /// + public string CodePath { get; } + + /// + /// True if the shader was dumped, false otherwise. + /// + public bool HasPath => FullPath != null && CodePath != null; + + /// + /// Creates a new shader dumps path structure. + /// + /// Path where the full shader code with header was dumped, or null if not dumped + /// Path where the shader code without header was dumped, or null if not dumped + public ShaderDumpPaths(string fullPath, string codePath) + { + FullPath = fullPath; + CodePath = codePath; + } + + /// + /// Prepends the shader paths on the program source, as a comment. + /// + /// Program to prepend into + public void Prepend(ShaderProgram program) + { + if (HasPath) + { + program.Prepend("// " + CodePath); + program.Prepend("// " + FullPath); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs index c170f9e2..8247bbfb 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderDumper.cs @@ -1,4 +1,4 @@ -using System; +using Ryujinx.Graphics.Shader; using System.IO; namespace Ryujinx.Graphics.Gpu.Shader @@ -30,24 +30,20 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// Code to be dumped /// True for compute shader code, false for graphics shader code - /// Output path for the shader code with header included - /// Output path for the shader code without header - public void Dump(byte[] code, bool compute, out string fullPath, out string codePath) + /// Paths where the shader code was dumped + public ShaderDumpPaths Dump(byte[] code, bool compute) { _dumpPath = GraphicsConfig.ShadersDumpPath; if (string.IsNullOrWhiteSpace(_dumpPath)) { - fullPath = null; - codePath = null; - - return; + return default; } string fileName = "Shader" + CurrentDumpIndex.ToString("d4") + ".bin"; - fullPath = Path.Combine(FullDir(), fileName); - codePath = Path.Combine(CodeDir(), fileName); + string fullPath = Path.Combine(FullDir(), fileName); + string codePath = Path.Combine(CodeDir(), fileName); CurrentDumpIndex++; @@ -73,6 +69,8 @@ namespace Ryujinx.Graphics.Gpu.Shader { codeWriter.Write(0); } + + return new ShaderDumpPaths(fullPath, codePath); } /// diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index 478ae497..2a93be32 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -3,14 +3,12 @@ using Ryujinx.Graphics.Shader.StructuredIr; using Ryujinx.Graphics.Shader.Translation; using System; using System.Linq; +using System.Numerics; namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { static class Declarations { - // At least 16 attributes are guaranteed by the spec. - public const int MaxAttributes = 16; - public static void Declare(CodeGenContext context, StructuredProgramInfo info) { context.AppendLine("#version 450 core"); @@ -129,14 +127,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine(); } - if (info.IAttributes.Count != 0 || context.Config.GpPassthrough) + if (context.Config.UsedInputAttributes != 0 || context.Config.GpPassthrough) { DeclareInputAttributes(context, info); context.AppendLine(); } - if (info.OAttributes.Count != 0 || context.Config.Stage != ShaderStage.Fragment) + if (context.Config.UsedOutputAttributes != 0 || context.Config.Stage != ShaderStage.Fragment) { DeclareOutputAttributes(context, info); @@ -404,24 +402,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl private static void DeclareInputAttributes(CodeGenContext context, StructuredProgramInfo info) { - if (context.Config.GpPassthrough) + int usedAttribtes = context.Config.UsedInputAttributes; + while (usedAttribtes != 0) { - for (int attr = 0; attr < MaxAttributes; attr++) - { - DeclareInputAttribute(context, info, attr); - } + int index = BitOperations.TrailingZeroCount(usedAttribtes); - foreach (int attr in info.IAttributes.OrderBy(x => x).Where(x => x >= MaxAttributes)) - { - DeclareInputAttribute(context, info, attr); - } - } - else - { - foreach (int attr in info.IAttributes.OrderBy(x => x)) - { - DeclareInputAttribute(context, info, attr); - } + DeclareInputAttribute(context, info, index); + + usedAttribtes &= ~(1 << index); } } @@ -440,8 +428,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl }; } - string pass = context.Config.GpPassthrough && !info.OAttributes.Contains(attr) ? "passthrough, " : string.Empty; - + string pass = (context.Config.PassthroughAttributes & (1 << attr)) != 0 ? "passthrough, " : string.Empty; string name = $"{DefaultNames.IAttributePrefix}{attr}"; if ((context.Config.Options.Flags & TranslationFlags.Feedback) != 0) @@ -461,34 +448,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info) { - if (context.Config.Stage == ShaderStage.Fragment || context.Config.GpPassthrough) + int usedAttribtes = context.Config.UsedOutputAttributes; + while (usedAttribtes != 0) { - DeclareUsedOutputAttributes(context, info); - } - else - { - DeclareAllOutputAttributes(context, info); - } - } + int index = BitOperations.TrailingZeroCount(usedAttribtes); - private static void DeclareUsedOutputAttributes(CodeGenContext context, StructuredProgramInfo info) - { - foreach (int attr in info.OAttributes.OrderBy(x => x)) - { - DeclareOutputAttribute(context, attr); - } - } + DeclareOutputAttribute(context, index); - private static void DeclareAllOutputAttributes(CodeGenContext context, StructuredProgramInfo info) - { - for (int attr = 0; attr < MaxAttributes; attr++) - { - DeclareOutputAttribute(context, attr); - } - - foreach (int attr in info.OAttributes.OrderBy(x => x).Where(x => x >= MaxAttributes)) - { - DeclareOutputAttribute(context, attr); + usedAttribtes &= ~(1 << index); } } diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs index e1da1a56..6ea700ac 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs @@ -49,46 +49,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl Declarations.DeclareLocals(context, function); - if (funcName == MainFunctionName) - { - // Some games will leave some elements of gl_Position uninitialized, - // in those cases, the elements will contain undefined values according - // to the spec, but on NVIDIA they seems to be always initialized to (0, 0, 0, 1), - // so we do explicit initialization to avoid UB on non-NVIDIA gpus. - if (context.Config.Stage == ShaderStage.Vertex) - { - context.AppendLine("gl_Position = vec4(0.0, 0.0, 0.0, 1.0);"); - } - - // Ensure that unused attributes are set, otherwise the downstream - // compiler may eliminate them. - // (Not needed for fragment shader as it is the last stage). - if (context.Config.Stage != ShaderStage.Compute && - context.Config.Stage != ShaderStage.Fragment && - !context.Config.GpPassthrough) - { - for (int attr = 0; attr < Declarations.MaxAttributes; attr++) - { - if (info.OAttributes.Contains(attr)) - { - continue; - } - - if ((context.Config.Options.Flags & TranslationFlags.Feedback) != 0) - { - context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_x = 0.0;"); - context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_y = 0.0;"); - context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_z = 0.0;"); - context.AppendLine($"{DefaultNames.OAttributePrefix}{attr}_w = 1.0;"); - } - else - { - context.AppendLine($"{DefaultNames.OAttributePrefix}{attr} = vec4(0.0, 0.0, 0.0, 1.0);"); - } - } - } - } - PrintBlock(context, function.MainBlock); context.LeaveScope(); diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index c916935e..12b49d35 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Shader.Instructions; +using Ryujinx.Graphics.Shader.Translation; using System; using System.Collections.Generic; using System.Linq; @@ -9,10 +10,8 @@ namespace Ryujinx.Graphics.Shader.Decoders { static class Decoder { - public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless) + public static Block[][] Decode(ShaderConfig config, ulong startAddress) { - hasBindless = false; - List funcs = new List(); Queue funcQueue = new Queue(); @@ -90,8 +89,7 @@ namespace Ryujinx.Graphics.Shader.Decoders } } - FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless); - hasBindless |= blockHasBindless; + FillBlock(config, currBlock, limitAddress, startAddress); if (currBlock.OpCodes.Count != 0) { @@ -168,7 +166,7 @@ namespace Ryujinx.Graphics.Shader.Decoders for (int i = 0; i < cbOffsetsCount; i++) { - uint targetOffset = gpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4); + uint targetOffset = config.GpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4); Block target = GetBlock(baseOffset + targetOffset); opBrIndir.PossibleTargets.Add(target); target.Predecessors.Add(block); @@ -224,15 +222,11 @@ namespace Ryujinx.Graphics.Shader.Decoders return false; } - private static void FillBlock( - IGpuAccessor gpuAccessor, - Block block, - ulong limitAddress, - ulong startAddress, - out bool hasBindless) + private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress) { + IGpuAccessor gpuAccessor = config.GpuAccessor; + ulong address = block.Address; - hasBindless = false; do { @@ -274,13 +268,38 @@ namespace Ryujinx.Graphics.Shader.Decoders OpCode op = makeOp(emitter, opAddress, opCode); // We check these patterns to figure out the presence of bindless access - hasBindless |= (op is OpCodeImage image && image.IsBindless) || + if ((op is OpCodeImage image && image.IsBindless) || (op is OpCodeTxd txd && txd.IsBindless) || (op is OpCodeTld4B) || (emitter == InstEmit.TexB) || (emitter == InstEmit.TldB) || (emitter == InstEmit.TmmlB) || - (emitter == InstEmit.TxqB); + (emitter == InstEmit.TxqB)) + { + config.SetUsedFeature(FeatureFlags.Bindless); + } + + // Populate used attributes. + if (op is IOpCodeAttribute opAttr) + { + for (int elemIndex = 0; elemIndex < opAttr.Count; elemIndex++) + { + int attr = opAttr.AttributeOffset + elemIndex * 4; + if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd) + { + int index = (attr - AttributeConsts.UserAttributeBase) / 16; + + if (op.Emitter == InstEmit.Ast) + { + config.SetOutputUserAttribute(index); + } + else + { + config.SetInputUserAttribute(index); + } + } + } + } block.OpCodes.Add(op); } diff --git a/Ryujinx.Graphics.Shader/Decoders/IOpCodeAttribute.cs b/Ryujinx.Graphics.Shader/Decoders/IOpCodeAttribute.cs new file mode 100644 index 00000000..b5b16f1b --- /dev/null +++ b/Ryujinx.Graphics.Shader/Decoders/IOpCodeAttribute.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + interface IOpCodeAttribute + { + int AttributeOffset { get; } + int Count { get; } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeAttribute.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeAttribute.cs index 1457b602..f9119665 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeAttribute.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeAttribute.cs @@ -2,7 +2,7 @@ using Ryujinx.Graphics.Shader.Instructions; namespace Ryujinx.Graphics.Shader.Decoders { - class OpCodeAttribute : OpCodeAluReg + class OpCodeAttribute : OpCodeAluReg, IOpCodeAttribute { public int AttributeOffset { get; } public int Count { get; } diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeIpa.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeIpa.cs index 51138a1d..dc4e03e3 100644 --- a/Ryujinx.Graphics.Shader/Decoders/OpCodeIpa.cs +++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeIpa.cs @@ -2,9 +2,10 @@ using Ryujinx.Graphics.Shader.Instructions; namespace Ryujinx.Graphics.Shader.Decoders { - class OpCodeIpa : OpCodeAluReg + class OpCodeIpa : OpCodeAluReg, IOpCodeAttribute { public int AttributeOffset { get; } + public int Count => 1; public InterpolationMode Mode { get; } diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs index 73fff1ec..a9e44175 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs @@ -277,21 +277,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr public AstOperand GetOperandDef(Operand operand) { - if (TryGetUserAttributeIndex(operand, out int attrIndex)) - { - Info.OAttributes.Add(attrIndex); - } - return GetOperand(operand); } public AstOperand GetOperandUse(Operand operand) { - if (TryGetUserAttributeIndex(operand, out int attrIndex)) - { - Info.IAttributes.Add(attrIndex); - } - return GetOperand(operand); } @@ -318,30 +308,5 @@ namespace Ryujinx.Graphics.Shader.StructuredIr return astOperand; } - - private static bool TryGetUserAttributeIndex(Operand operand, out int attrIndex) - { - if (operand.Type == OperandType.Attribute) - { - if (operand.Value >= AttributeConsts.UserAttributeBase && - operand.Value < AttributeConsts.UserAttributeEnd) - { - attrIndex = (operand.Value - AttributeConsts.UserAttributeBase) >> 4; - - return true; - } - else if (operand.Value >= AttributeConsts.FragmentOutputColorBase && - operand.Value < AttributeConsts.FragmentOutputColorEnd) - { - attrIndex = (operand.Value - AttributeConsts.FragmentOutputColorBase) >> 4; - - return true; - } - } - - attrIndex = 0; - - return false; - } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs index 9479d535..aeaa03ec 100644 --- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs +++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs @@ -6,17 +6,11 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { public List Functions { get; } - public HashSet IAttributes { get; } - public HashSet OAttributes { get; } - public HelperFunctionsMask HelperFunctionsMask { get; set; } public StructuredProgramInfo() { Functions = new List(); - - IAttributes = new HashSet(); - OAttributes = new HashSet(); } } } \ No newline at end of file diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs index 49a89374..5cdd5c0a 100644 --- a/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -15,6 +15,8 @@ namespace Ryujinx.Graphics.Shader.Translation public bool IsNonMain { get; } + public int OperationsCount => _operations.Count; + private readonly IReadOnlyDictionary _funcs; private readonly List _operations; private readonly Dictionary _labels; @@ -200,6 +202,7 @@ namespace Ryujinx.Graphics.Shader.Translation if (target.Enabled) { + Config.SetOutputUserAttribute(rtIndex); regIndexBase += 4; } } diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 3e7be582..c7704c2b 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -41,6 +41,10 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly TranslationCounts _counts; + public int UsedInputAttributes { get; private set; } + public int UsedOutputAttributes { get; private set; } + public int PassthroughAttributes { get; private set; } + private int _usedConstantBuffers; private int _usedStorageBuffers; private int _usedStorageBuffersWrite; @@ -170,6 +174,8 @@ namespace Ryujinx.Graphics.Shader.Translation TextureHandlesForCache.UnionWith(other.TextureHandlesForCache); + UsedInputAttributes |= other.UsedInputAttributes; + UsedOutputAttributes |= other.UsedOutputAttributes; _usedConstantBuffers |= other._usedConstantBuffers; _usedStorageBuffers |= other._usedStorageBuffers; _usedStorageBuffersWrite |= other._usedStorageBuffersWrite; @@ -191,6 +197,28 @@ namespace Ryujinx.Graphics.Shader.Translation } } + public void SetInputUserAttribute(int index) + { + UsedInputAttributes |= 1 << index; + } + + public void SetOutputUserAttribute(int index) + { + UsedOutputAttributes |= 1 << index; + } + + public void MergeOutputUserAttributes(int mask) + { + if (GpPassthrough) + { + PassthroughAttributes = mask & ~UsedOutputAttributes; + } + else + { + UsedOutputAttributes |= mask; + } + } + public void SetClipDistanceWritten(int index) { ClipDistancesWritten |= (byte)(1 << index); diff --git a/Ryujinx.Graphics.Shader/Translation/Translator.cs b/Ryujinx.Graphics.Shader/Translation/Translator.cs index 685b6a20..f1e92d7c 100644 --- a/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -5,6 +5,7 @@ using Ryujinx.Graphics.Shader.StructuredIr; using Ryujinx.Graphics.Shader.Translation.Optimizations; using System; using System.Collections.Generic; +using System.Numerics; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; @@ -120,24 +121,17 @@ namespace Ryujinx.Graphics.Shader.Translation Block[][] cfg; ulong maxEndAddress = 0; - bool hasBindless; - if ((options.Flags & TranslationFlags.Compute) != 0) { config = new ShaderConfig(gpuAccessor, options, counts); - cfg = Decoder.Decode(gpuAccessor, address, out hasBindless); + cfg = Decoder.Decode(config, address); } else { config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options, counts); - cfg = Decoder.Decode(gpuAccessor, address + HeaderSize, out hasBindless); - } - - if (hasBindless) - { - config.SetUsedFeature(FeatureFlags.Bindless); + cfg = Decoder.Decode(config, address + HeaderSize); } for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++) @@ -151,7 +145,7 @@ namespace Ryujinx.Graphics.Shader.Translation maxEndAddress = block.EndAddress; } - if (!hasBindless) + if (!config.UsedFeatures.HasFlag(FeatureFlags.Bindless)) { for (int index = 0; index < block.OpCodes.Count; index++) { @@ -169,8 +163,10 @@ namespace Ryujinx.Graphics.Shader.Translation return cfg; } - internal static FunctionCode[] EmitShader(Block[][] cfg, ShaderConfig config) + internal static FunctionCode[] EmitShader(Block[][] cfg, ShaderConfig config, bool initializeOutputs, out int initializationOperations) { + initializationOperations = 0; + Dictionary funcIds = new Dictionary(); for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++) @@ -184,6 +180,12 @@ namespace Ryujinx.Graphics.Shader.Translation { EmitterContext context = new EmitterContext(config, funcIndex != 0, funcIds); + if (initializeOutputs && funcIndex == 0) + { + EmitOutputsInitialization(context, config); + initializationOperations = context.OperationsCount; + } + for (int blkIndex = 0; blkIndex < cfg[funcIndex].Length; blkIndex++) { Block block = cfg[funcIndex][blkIndex]; @@ -201,6 +203,39 @@ namespace Ryujinx.Graphics.Shader.Translation return funcs.ToArray(); } + private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config) + { + // Compute has no output attributes, and fragment is the last stage, so we + // don't need to initialize outputs on those stages. + if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment) + { + return; + } + + void InitializeOutput(int baseAttr) + { + for (int c = 0; c < 4; c++) + { + context.Copy(Attribute(baseAttr + c * 4), ConstF(c == 3 ? 1f : 0f)); + } + } + + if (config.Stage == ShaderStage.Vertex) + { + InitializeOutput(AttributeConsts.PositionX); + } + + int usedAttribtes = context.Config.UsedOutputAttributes; + while (usedAttribtes != 0) + { + int index = BitOperations.TrailingZeroCount(usedAttribtes); + + InitializeOutput(AttributeConsts.UserAttributeBase + index * 16); + + usedAttribtes &= ~(1 << index); + } + } + private static void EmitOps(EmitterContext context, Block block) { for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++) diff --git a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index ab74d039..47cf0ac8 100644 --- a/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Shader.Translation operand.Value < AttributeConsts.UserAttributeEnd; } - private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b) + private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aStart) { // Here we combine two shaders. // For shader A: @@ -57,7 +57,7 @@ namespace Ryujinx.Graphics.Shader.Translation Operand lblB = Label(); - for (int index = 0; index < a[0].Code.Length; index++) + for (int index = aStart; index < a[0].Code.Length; index++) { Operation operation = a[0].Code[index]; @@ -103,7 +103,17 @@ namespace Ryujinx.Graphics.Shader.Translation if (temp != null) { - operation.SetSource(srcIndex, temp); + // TODO: LoadAttribute should accept any integer value as first argument, + // then we don't need special case here. Right now it expects the first + // operand to be of type "attribute". + if ((operation.Inst & Instruction.Mask) == Instruction.LoadAttribute) + { + operation.TurnIntoCopy(temp); + } + else + { + operation.SetSource(srcIndex, temp); + } } } } @@ -126,13 +136,25 @@ namespace Ryujinx.Graphics.Shader.Translation return output; } - public ShaderProgram Translate(out ShaderProgramInfo shaderProgramInfo, TranslatorContext other = null) + public ShaderProgram Translate( + out ShaderProgramInfo shaderProgramInfo, + TranslatorContext nextStage = null, + TranslatorContext other = null) { - FunctionCode[] code = EmitShader(_cfg, _config); + if (nextStage != null) + { + _config.MergeOutputUserAttributes(nextStage._config.UsedInputAttributes); + } + + FunctionCode[] code = EmitShader(_cfg, _config, initializeOutputs: other == null, out _); if (other != null) { - code = Combine(EmitShader(other._cfg, other._config), code); + other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes); + + FunctionCode[] otherCode = EmitShader(other._cfg, other._config, initializeOutputs: true, out int aStart); + + code = Combine(otherCode, code, aStart); _config.InheritFrom(other._config); }