From 2be8b6ea4527239fff1b2fdf9dc7ce1346b173e6 Mon Sep 17 00:00:00 2001 From: Domenico V <35856442+LDj3SNuD@users.noreply.github.com> Date: Mon, 31 Jul 2023 01:57:37 +0200 Subject: [PATCH] CPU (A64): Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests. (#5502) * Add Fmaxp & Fminp Scalar Inst.s, Fast & Slow Paths; with Tests. * Ptc.InternalVersion = 5502 --- src/ARMeilleure/Decoders/OpCodeTable.cs | 2 + .../Instructions/InstEmitSimdArithmetic.cs | 50 +++++++++++++++++++ src/ARMeilleure/Instructions/InstName.cs | 2 + src/ARMeilleure/Translation/PTC/Ptc.cs | 2 +- src/Ryujinx.Tests/Cpu/CpuTestSimd.cs | 4 ++ 5 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs index 5cfd0bb8..9e13bd9b 100644 --- a/src/ARMeilleure/Decoders/OpCodeTable.cs +++ b/src/ARMeilleure/Decoders/OpCodeTable.cs @@ -330,6 +330,7 @@ namespace ARMeilleure.Decoders SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create); SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create); SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create); + SetA64("011111100x110000111110xxxxxxxxxx", InstName.Fmaxp_S, InstEmit.Fmaxp_S, OpCodeSimd.Create); SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create); SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create); SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create); @@ -339,6 +340,7 @@ namespace ARMeilleure.Decoders SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create); SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create); SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create); + SetA64("011111101x110000111110xxxxxxxxxx", InstName.Fminp_S, InstEmit.Fminp_S, OpCodeSimd.Create); SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create); SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create); SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create); diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs index 7b308fa9..543aab02 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -883,6 +883,31 @@ namespace ARMeilleure.Instructions } } + public static void Fmaxp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2ScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: true, op1, op2); + }); + } + else + { + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + public static void Fmaxp_V(ArmEmitterContext context) { if (Optimizations.UseAdvSimd) @@ -1081,6 +1106,31 @@ namespace ARMeilleure.Instructions } } + public static void Fminp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2ScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: true, op1, op2); + }); + } + else + { + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + public static void Fminp_V(ArmEmitterContext context) { if (Optimizations.UseAdvSimd) diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs index fd71d92e..32ae38da 100644 --- a/src/ARMeilleure/Instructions/InstName.cs +++ b/src/ARMeilleure/Instructions/InstName.cs @@ -228,6 +228,7 @@ namespace ARMeilleure.Instructions Fmaxnmp_S, Fmaxnmp_V, Fmaxnmv_V, + Fmaxp_S, Fmaxp_V, Fmaxv_V, Fmin_S, @@ -237,6 +238,7 @@ namespace ARMeilleure.Instructions Fminnmp_S, Fminnmp_V, Fminnmv_V, + Fminp_S, Fminp_V, Fminv_V, Fmla_Se, diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index 14d4e471..ce653383 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -29,7 +29,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 5343; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 5502; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; diff --git a/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 4c568a8f..eb763618 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -764,7 +764,9 @@ namespace Ryujinx.Tests.Cpu { 0x7E30D820u, // FADDP S0, V1.2S 0x7E30C820u, // FMAXNMP S0, V1.2S + 0x7E30F820u, // FMAXP S0, V1.2S 0x7EB0C820u, // FMINNMP S0, V1.2S + 0x7EB0F820u, // FMINP S0, V1.2S }; } @@ -774,7 +776,9 @@ namespace Ryujinx.Tests.Cpu { 0x7E70D820u, // FADDP D0, V1.2D 0x7E70C820u, // FMAXNMP D0, V1.2D + 0x7E70F820u, // FMAXP D0, V1.2D 0x7EF0C820u, // FMINNMP D0, V1.2D + 0x7EF0F820u, // FMINP D0, V1.2D }; }