From fe30c03cac9d1f09270a4156aceab273dbac81fb Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 8 Jun 2023 17:09:14 -0300 Subject: [PATCH] Implement soft float64 conversion on shaders when host has no support (#5159) * Implement soft float64 conversion on shaders when host has no support * Shader cache version bump * Fix rebase error --- src/Ryujinx.Graphics.GAL/Capabilities.cs | 3 + .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../Shader/GpuAccessorBase.cs | 2 + src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 1 + src/Ryujinx.Graphics.Shader/IGpuAccessor.cs | 9 ++ .../IntermediateRepresentation/Operation.cs | 30 +++++++ .../Translation/HelperFunctionManager.cs | 89 +++++++++++++++++++ .../Translation/HelperFunctionName.cs | 5 +- .../Optimizations/DoubleToFloat.cs | 70 +++++++++++++++ .../Translation/Optimizations/Optimizer.cs | 10 +++ .../HardwareCapabilities.cs | 3 + src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 2 + 12 files changed, 222 insertions(+), 4 deletions(-) create mode 100644 src/Ryujinx.Graphics.Shader/Translation/Optimizations/DoubleToFloat.cs diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index 48b37d35..f2dd0963 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsCubemapView; public readonly bool SupportsNonConstantTextureOffset; public readonly bool SupportsShaderBallot; + public readonly bool SupportsShaderFloat64; public readonly bool SupportsTextureShadowLod; public readonly bool SupportsViewportIndexVertexTessellation; public readonly bool SupportsViewportMask; @@ -81,6 +82,7 @@ namespace Ryujinx.Graphics.GAL bool supportsCubemapView, bool supportsNonConstantTextureOffset, bool supportsShaderBallot, + bool supportsShaderFloat64, bool supportsTextureShadowLod, bool supportsViewportIndexVertexTessellation, bool supportsViewportMask, @@ -124,6 +126,7 @@ namespace Ryujinx.Graphics.GAL SupportsCubemapView = supportsCubemapView; SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; SupportsShaderBallot = supportsShaderBallot; + SupportsShaderFloat64 = supportsShaderFloat64; SupportsTextureShadowLod = supportsTextureShadowLod; SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation; SupportsViewportMask = supportsViewportMask; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 4b828080..9419ea92 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 4992; + private const uint CodeGenVersion = 5159; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 0001243d..a60564e0 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot; + public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64; + public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat; public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod; diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 161191b8..234340e5 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -158,6 +158,7 @@ namespace Ryujinx.Graphics.OpenGL supportsCubemapView: true, supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset, supportsShaderBallot: HwCapabilities.SupportsShaderBallot, + supportsShaderFloat64: true, supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod, supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray, supportsViewportMask: HwCapabilities.SupportsViewportArray2, diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs index 473964de..d4f99e11 100644 --- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader return true; } + /// + /// Queries host GPU support for 64-bit floating point (double precision) operations on the shader. + /// + /// True if the GPU and driver supports double operations, false otherwise + bool QueryHostSupportsShaderFloat64() + { + return true; + } + /// /// Queries host GPU support for signed normalized buffer texture formats. /// diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs index d502a9b6..425cfd90 100644 --- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs @@ -255,5 +255,35 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation _sources = new Operand[] { source }; } + + public void TurnDoubleIntoFloat() + { + if ((Inst & ~Instruction.Mask) == Instruction.FP64) + { + Inst = (Inst & Instruction.Mask) | Instruction.FP32; + } + else + { + switch (Inst) + { + case Instruction.ConvertFP32ToFP64: + case Instruction.ConvertFP64ToFP32: + Inst = Instruction.Copy; + break; + case Instruction.ConvertFP64ToS32: + Inst = Instruction.ConvertFP32ToS32; + break; + case Instruction.ConvertFP64ToU32: + Inst = Instruction.ConvertFP32ToU32; + break; + case Instruction.ConvertS32ToFP64: + Inst = Instruction.ConvertS32ToFP32; + break; + case Instruction.ConvertU32ToFP64: + Inst = Instruction.ConvertU32ToFP32; + break; + } + } + } } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs index 7dd267f3..6958b86f 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs @@ -45,12 +45,101 @@ namespace Ryujinx.Graphics.Shader.Translation { return functionName switch { + HelperFunctionName.ConvertDoubleToFloat => GenerateConvertDoubleToFloatFunction(), + HelperFunctionName.ConvertFloatToDouble => GenerateConvertFloatToDoubleFunction(), HelperFunctionName.TexelFetchScale => GenerateTexelFetchScaleFunction(), HelperFunctionName.TextureSizeUnscale => GenerateTextureSizeUnscaleFunction(), _ => throw new ArgumentException($"Invalid function name {functionName}") }; } + private Function GenerateConvertDoubleToFloatFunction() + { + EmitterContext context = new EmitterContext(); + + Operand valueLow = Argument(0); + Operand valueHigh = Argument(1); + + Operand mantissaLow = context.BitwiseAnd(valueLow, Const(((1 << 22) - 1))); + Operand mantissa = context.ShiftRightU32(valueLow, Const(22)); + + mantissa = context.BitwiseOr(mantissa, context.ShiftLeft(context.BitwiseAnd(valueHigh, Const(0xfffff)), Const(10))); + mantissa = context.BitwiseOr(mantissa, context.ConditionalSelect(mantissaLow, Const(1), Const(0))); + + Operand exp = context.BitwiseAnd(context.ShiftRightU32(valueHigh, Const(20)), Const(0x7ff)); + Operand sign = context.ShiftRightS32(valueHigh, Const(31)); + + Operand resultSign = context.ShiftLeft(sign, Const(31)); + + Operand notZero = context.BitwiseOr(mantissa, exp); + + Operand lblNotZero = Label(); + + context.BranchIfTrue(lblNotZero, notZero); + + context.Return(resultSign); + + context.MarkLabel(lblNotZero); + + Operand notNaNOrInf = context.ICompareNotEqual(exp, Const(0x7ff)); + + mantissa = context.BitwiseOr(mantissa, Const(0x40000000)); + exp = context.ISubtract(exp, Const(0x381)); + + // Note: Overflow cases are not handled here and might produce incorrect results. + + Operand roundBits = context.BitwiseAnd(mantissa, Const(0x7f)); + Operand roundBitsXor64 = context.BitwiseExclusiveOr(roundBits, Const(0x40)); + mantissa = context.ShiftRightU32(context.IAdd(mantissa, Const(0x40)), Const(7)); + mantissa = context.BitwiseAnd(mantissa, context.ConditionalSelect(roundBitsXor64, Const(~0), Const(~1))); + + exp = context.ConditionalSelect(mantissa, exp, Const(0)); + exp = context.ConditionalSelect(notNaNOrInf, exp, Const(0xff)); + + Operand result = context.IAdd(context.IAdd(mantissa, context.ShiftLeft(exp, Const(23))), resultSign); + + context.Return(result); + + return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertDoubleToFloat", true, 2, 0); + } + + private Function GenerateConvertFloatToDoubleFunction() + { + EmitterContext context = new EmitterContext(); + + Operand value = Argument(0); + + Operand mantissa = context.BitwiseAnd(value, Const(0x7fffff)); + Operand exp = context.BitwiseAnd(context.ShiftRightU32(value, Const(23)), Const(0xff)); + Operand sign = context.ShiftRightS32(value, Const(31)); + + Operand notNaNOrInf = context.ICompareNotEqual(exp, Const(0xff)); + Operand expNotZero = context.ICompareNotEqual(exp, Const(0)); + Operand notDenorm = context.BitwiseOr(expNotZero, context.ICompareEqual(mantissa, Const(0))); + + exp = context.IAdd(exp, Const(0x380)); + + Operand shiftDist = context.ISubtract(Const(32), context.FindMSBU32(mantissa)); + Operand normExp = context.ISubtract(context.ISubtract(Const(1), shiftDist), Const(1)); + Operand normMant = context.ShiftLeft(mantissa, shiftDist); + + exp = context.ConditionalSelect(notNaNOrInf, exp, Const(0x7ff)); + exp = context.ConditionalSelect(notDenorm, exp, normExp); + mantissa = context.ConditionalSelect(expNotZero, mantissa, normMant); + + Operand resultLow = context.ShiftLeft(mantissa, Const(29)); + Operand resultHigh = context.ShiftRightU32(mantissa, Const(3)); + + resultHigh = context.IAdd(resultHigh, context.ShiftLeft(exp, Const(20))); + resultHigh = context.IAdd(resultHigh, context.ShiftLeft(sign, Const(31))); + + context.Copy(Argument(1), resultLow); + context.Copy(Argument(2), resultHigh); + context.Return(); + + return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertFloatToDouble", false, 1, 2); + } + private Function GenerateTexelFetchScaleFunction() { EmitterContext context = new EmitterContext(); diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs index 5accdf65..8c37c34c 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs @@ -1,10 +1,9 @@ -using Ryujinx.Graphics.Shader.IntermediateRepresentation; -using System.Collections.Generic; - namespace Ryujinx.Graphics.Shader.Translation { enum HelperFunctionName { + ConvertDoubleToFloat, + ConvertFloatToDouble, TexelFetchScale, TextureSizeUnscale } diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/DoubleToFloat.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/DoubleToFloat.cs new file mode 100644 index 00000000..42bce5cc --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/DoubleToFloat.cs @@ -0,0 +1,70 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class DoubleToFloat + { + public static void RunPass(HelperFunctionManager hfm, BasicBlock block) + { + for (LinkedListNode node = block.Operations.First; node != null; node = node.Next) + { + if (node.Value is not Operation operation) + { + continue; + } + + node = InsertSoftFloat64(hfm, node); + } + } + + private static LinkedListNode InsertSoftFloat64(HelperFunctionManager hfm, LinkedListNode node) + { + Operation operation = (Operation)node.Value; + + if (operation.Inst == Instruction.PackDouble2x32) + { + int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.ConvertDoubleToFloat); + + Operand[] callArgs = new Operand[] { Const(functionId), operation.GetSource(0), operation.GetSource(1) }; + + Operand floatValue = operation.Dest; + + operation.Dest = null; + + LinkedListNode newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, floatValue, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + else if (operation.Inst == Instruction.UnpackDouble2x32) + { + int functionId = hfm.GetOrCreateFunctionId(HelperFunctionName.ConvertFloatToDouble); + + // TODO: Allow UnpackDouble2x32 to produce two outputs and get rid of "operation.Index". + + Operand resultLow = operation.Index == 0 ? operation.Dest : Local(); + Operand resultHigh = operation.Index == 1 ? operation.Dest : Local(); + + operation.Dest = null; + + Operand[] callArgs = new Operand[] { Const(functionId), operation.GetSource(0), resultLow, resultHigh }; + + LinkedListNode newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + else + { + operation.TurnDoubleIntoFloat(); + + return node; + } + } + } +} \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index bdb3a62e..8d2669c0 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -11,8 +11,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { RunOptimizationPasses(blocks, config); + // TODO: Some of those are not optimizations and shouldn't be here. + GlobalToStorage.RunPass(hfm, blocks, config); + bool hostSupportsShaderFloat64 = config.GpuAccessor.QueryHostSupportsShaderFloat64(); + // Those passes are looking for specific patterns and only needs to run once. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { @@ -24,6 +28,12 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { EliminateMultiplyByFragmentCoordW(blocks[blkIndex]); } + + // If the host does not support double operations, we need to turn them into float operations. + if (!hostSupportsShaderFloat64) + { + DoubleToFloat.RunPass(hfm, blocks[blkIndex]); + } } // Run optimizations one last time to remove any code that is now optimizable after above passes. diff --git a/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs b/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs index f600d93f..393bcf1a 100644 --- a/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs +++ b/src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs @@ -26,6 +26,7 @@ namespace Ryujinx.Graphics.Vulkan public readonly bool SupportsFragmentShaderInterlock; public readonly bool SupportsGeometryShaderPassthrough; public readonly bool SupportsSubgroupSizeControl; + public readonly bool SupportsShaderFloat64; public readonly bool SupportsShaderInt8; public readonly bool SupportsShaderStencilExport; public readonly bool SupportsShaderStorageImageMultisample; @@ -63,6 +64,7 @@ namespace Ryujinx.Graphics.Vulkan bool supportsFragmentShaderInterlock, bool supportsGeometryShaderPassthrough, bool supportsSubgroupSizeControl, + bool supportsShaderFloat64, bool supportsShaderInt8, bool supportsShaderStencilExport, bool supportsShaderStorageImageMultisample, @@ -99,6 +101,7 @@ namespace Ryujinx.Graphics.Vulkan SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock; SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; SupportsSubgroupSizeControl = supportsSubgroupSizeControl; + SupportsShaderFloat64 = supportsShaderFloat64; SupportsShaderInt8 = supportsShaderInt8; SupportsShaderStencilExport = supportsShaderStencilExport; SupportsShaderStorageImageMultisample = supportsShaderStorageImageMultisample; diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 3987be9b..0daec00c 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -306,6 +306,7 @@ namespace Ryujinx.Graphics.Vulkan _physicalDevice.IsDeviceExtensionPresent("VK_EXT_fragment_shader_interlock"), _physicalDevice.IsDeviceExtensionPresent("VK_NV_geometry_shader_passthrough"), supportsSubgroupSizeControl, + features2.Features.ShaderFloat64, featuresShaderInt8.ShaderInt8, _physicalDevice.IsDeviceExtensionPresent("VK_EXT_shader_stencil_export"), features2.Features.ShaderStorageImageMultisample, @@ -594,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan supportsCubemapView: !IsAmdGcn, supportsNonConstantTextureOffset: false, supportsShaderBallot: false, + supportsShaderFloat64: Capabilities.SupportsShaderFloat64, supportsTextureShadowLod: false, supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex, supportsViewportMask: Capabilities.SupportsViewportArray2,