From bba9bf97d03596b89972cc77390311b9e9472688 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sat, 6 Oct 2018 03:45:59 +0200 Subject: [PATCH] Add 9+7 fast/slow FP inst. impls.; add 14 FP Tests. (#437) * Update CpuTest.cs * Delete CpuTestSimdCmp.cs Obsolete. * Update CpuTestSimdArithmetic.cs Superseded. * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Update ASoftFloat.cs * Nit. * Update AOpCodeTable.cs * Update AOptimizations.cs * Update AInstEmitSimdArithmetic.cs * Update ASoftFloat.cs * Update CpuTest.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update AOpCodeTable.cs * Update AInstEmitSimdArithmetic.cs * Update ASoftFloat.cs * Update CpuTestSimdReg.cs * Update AOpCodeTable.cs * Update AInstEmitSimdArithmetic.cs * Update ASoftFloat.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs --- ChocolArm64/AOpCodeTable.cs | 4 + ChocolArm64/AOptimizations.cs | 4 +- .../Instruction/AInstEmitSimdArithmetic.cs | 530 +++-- .../Instruction/AInstEmitSimdHelper.cs | 33 +- ChocolArm64/Instruction/ASoftFloat.cs | 1732 ++++++++++++++--- Ryujinx.Tests/Cpu/CpuTest.cs | 189 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 188 +- Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 152 -- Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs | 407 ---- Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 394 +++- 10 files changed, 2471 insertions(+), 1162 deletions(-) delete mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 6404e14f..30025712 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -301,6 +301,8 @@ namespace ChocolArm64 SetA64("010111111<1011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V, typeof(AOpCodeSimdReg)); SetA64("0x0011111<0011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_V, typeof(AOpCodeSimdReg)); SetA64("000111100x100001010000xxxxxxxxxx", AInstEmit.Fneg_S, typeof(AOpCodeSimd)); SetA64("0>1011101<100000111110xxxxxxxxxx", AInstEmit.Fneg_V, typeof(AOpCodeSimd)); SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fnmadd_S, typeof(AOpCodeSimdReg)); @@ -310,6 +312,7 @@ namespace ChocolArm64 SetA64("0>0011101<100001110110xxxxxxxxxx", AInstEmit.Frecpe_V, typeof(AOpCodeSimd)); SetA64("010111100x1xxxxx111111xxxxxxxxxx", AInstEmit.Frecps_S, typeof(AOpCodeSimdReg)); SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", AInstEmit.Frecps_V, typeof(AOpCodeSimdReg)); + SetA64("010111101x100001111110xxxxxxxxxx", AInstEmit.Frecpx_S, typeof(AOpCodeSimd)); SetA64("000111100x100110010000xxxxxxxxxx", AInstEmit.Frinta_S, typeof(AOpCodeSimd)); SetA64("0>1011100<100001100010xxxxxxxxxx", AInstEmit.Frinta_V, typeof(AOpCodeSimd)); SetA64("000111100x100111110000xxxxxxxxxx", AInstEmit.Frinti_S, typeof(AOpCodeSimd)); @@ -327,6 +330,7 @@ namespace ChocolArm64 SetA64("010111101x1xxxxx111111xxxxxxxxxx", AInstEmit.Frsqrts_S, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", AInstEmit.Frsqrts_V, typeof(AOpCodeSimdReg)); SetA64("000111100x100001110000xxxxxxxxxx", AInstEmit.Fsqrt_S, typeof(AOpCodeSimd)); + SetA64("0>1011101<100001111110xxxxxxxxxx", AInstEmit.Fsqrt_V, typeof(AOpCodeSimd)); SetA64("000111100x1xxxxx001110xxxxxxxxxx", AInstEmit.Fsub_S, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", AInstEmit.Fsub_V, typeof(AOpCodeSimdReg)); SetA64("01001110000xxxxx000111xxxxxxxxxx", AInstEmit.Ins_Gp, typeof(AOpCodeSimdIns)); diff --git a/ChocolArm64/AOptimizations.cs b/ChocolArm64/AOptimizations.cs index 40e1674a..17205489 100644 --- a/ChocolArm64/AOptimizations.cs +++ b/ChocolArm64/AOptimizations.cs @@ -2,6 +2,8 @@ using System.Runtime.Intrinsics.X86; public static class AOptimizations { + internal static bool FastFP = true; + private static bool UseAllSseIfAvailable = true; private static bool UseSseIfAvailable = true; @@ -13,4 +15,4 @@ public static class AOptimizations internal static bool UseSse2 = (UseAllSseIfAvailable && UseSse2IfAvailable) && Sse2.IsSupported; internal static bool UseSse41 = (UseAllSseIfAvailable && UseSse41IfAvailable) && Sse41.IsSupported; internal static bool UseSse42 = (UseAllSseIfAvailable && UseSse42IfAvailable) && Sse42.IsSupported; -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 811730fc..d11a0b84 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -174,25 +174,33 @@ namespace ChocolArm64.Instruction public static void Fadd_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitScalarSseOrSse2OpF(Context, nameof(Sse.AddScalar)); } else { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add)); + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPAdd)); + }); } } public static void Fadd_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitVectorSseOrSse2OpF(Context, nameof(Sse.Add)); } else { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add)); + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPAdd)); + }); } } @@ -217,42 +225,50 @@ namespace ChocolArm64.Instruction public static void Fdiv_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitScalarSseOrSse2OpF(Context, nameof(Sse.DivideScalar)); } else { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div)); + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPDiv)); + }); } } public static void Fdiv_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitVectorSseOrSse2OpF(Context, nameof(Sse.Divide)); } else { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div)); + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPDiv)); + }); } } public static void Fmadd_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse2) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; if (Op.Size == 0) { + Type[] Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + Context.EmitLdvec(Op.Ra); Context.EmitLdvec(Op.Rn); Context.EmitLdvec(Op.Rm); - Type[] Types = new Type[] { typeof(Vector128), typeof(Vector128) }; - Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), Types)); Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AddScalar), Types)); @@ -262,12 +278,12 @@ namespace ChocolArm64.Instruction } else /* if (Op.Size == 1) */ { + Type[] Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + EmitLdvecWithCastToDouble(Context, Op.Ra); EmitLdvecWithCastToDouble(Context, Op.Rn); EmitLdvecWithCastToDouble(Context, Op.Rm); - Type[] Types = new Type[] { typeof(Vector128), typeof(Vector128) }; - Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), Types)); Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), Types)); @@ -280,33 +296,48 @@ namespace ChocolArm64.Instruction { EmitScalarTernaryRaOpF(Context, () => { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Add); + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulAdd)); }); } } public static void Fmax_S(AILEmitterCtx Context) { - EmitScalarBinaryOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max)); - }); + EmitScalarSseOrSse2OpF(Context, nameof(Sse.MaxScalar)); + } + else + { + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMax)); + }); + } } public static void Fmax_V(AILEmitterCtx Context) { - EmitVectorBinaryOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max)); - }); + EmitVectorSseOrSse2OpF(Context, nameof(Sse.Max)); + } + else + { + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMax)); + }); + } } public static void Fmaxnm_S(AILEmitterCtx Context) { EmitScalarBinaryOpF(Context, () => { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MaxNum)); + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMaxNum)); }); } @@ -314,36 +345,55 @@ namespace ChocolArm64.Instruction { EmitVectorBinaryOpF(Context, () => { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MaxNum)); + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMaxNum)); }); } public static void Fmaxp_V(AILEmitterCtx Context) { - EmitVectorPairwiseOpF(Context, () => EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Max))); + EmitVectorPairwiseOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMax)); + }); } public static void Fmin_S(AILEmitterCtx Context) { - EmitScalarBinaryOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min)); - }); + EmitScalarSseOrSse2OpF(Context, nameof(Sse.MinScalar)); + } + else + { + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMin)); + }); + } } public static void Fmin_V(AILEmitterCtx Context) { - EmitVectorBinaryOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min)); - }); + EmitVectorSseOrSse2OpF(Context, nameof(Sse.Min)); + } + else + { + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMin)); + }); + } } public static void Fminnm_S(AILEmitterCtx Context) { EmitScalarBinaryOpF(Context, () => { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MinNum)); + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMinNum)); }); } @@ -351,13 +401,16 @@ namespace ChocolArm64.Instruction { EmitVectorBinaryOpF(Context, () => { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.MinNum)); + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMinNum)); }); } public static void Fminp_V(AILEmitterCtx Context) { - EmitVectorPairwiseOpF(Context, () => EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.Min))); + EmitVectorPairwiseOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMin)); + }); } public static void Fmla_Se(AILEmitterCtx Context) @@ -407,22 +460,63 @@ namespace ChocolArm64.Instruction public static void Fmsub_S(AILEmitterCtx Context) { - EmitScalarTernaryRaOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse2) { - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - }); + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + if (Op.Size == 0) + { + Type[] Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + Context.EmitLdvec(Op.Ra); + Context.EmitLdvec(Op.Rn); + Context.EmitLdvec(Op.Rm); + + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), Types)); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), Types)); + + Context.EmitStvec(Op.Rd); + + EmitVectorZero32_128(Context, Op.Rd); + } + else /* if (Op.Size == 1) */ + { + Type[] Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(Context, Op.Ra); + EmitLdvecWithCastToDouble(Context, Op.Rn); + EmitLdvecWithCastToDouble(Context, Op.Rm); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), Types)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), Types)); + + EmitStvecWithCastFromDouble(Context, Op.Rd); + + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitScalarTernaryRaOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulSub)); + }); + } } public static void Fmul_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitScalarSseOrSse2OpF(Context, nameof(Sse.MultiplyScalar)); } else { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMul)); + }); } } @@ -433,13 +527,17 @@ namespace ChocolArm64.Instruction public static void Fmul_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitVectorSseOrSse2OpF(Context, nameof(Sse.Multiply)); } else { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMul)); + }); } } @@ -448,6 +546,22 @@ namespace ChocolArm64.Instruction EmitVectorBinaryOpByElemF(Context, () => Context.Emit(OpCodes.Mul)); } + public static void Fmulx_S(AILEmitterCtx Context) + { + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + + public static void Fmulx_V(AILEmitterCtx Context) + { + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + public static void Fneg_S(AILEmitterCtx Context) { EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg)); @@ -524,17 +638,122 @@ namespace ChocolArm64.Instruction public static void Frecps_S(AILEmitterCtx Context) { - EmitScalarBinaryOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse2) { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); - }); + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int SizeF = Op.Size & 1; + + if (SizeF == 0) + { + Type[] Types = new Type[] { typeof(float) }; + + Context.EmitLdc_R4(2f); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + Context.EmitLdvec(Op.Rn); + Context.EmitLdvec(Op.Rm); + + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), Types)); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), Types)); + + Context.EmitStvec(Op.Rd); + + EmitVectorZero32_128(Context, Op.Rd); + } + else /* if (SizeF == 1) */ + { + Type[] Types = new Type[] { typeof(double) }; + + Context.EmitLdc_R8(2d); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(Context, Op.Rn); + EmitLdvecWithCastToDouble(Context, Op.Rm); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), Types)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), Types)); + + EmitStvecWithCastFromDouble(Context, Op.Rd); + + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRecipStepFused)); + }); + } } public static void Frecps_V(AILEmitterCtx Context) { - EmitVectorBinaryOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse2) { - EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int SizeF = Op.Size & 1; + + if (SizeF == 0) + { + Type[] Types = new Type[] { typeof(float) }; + + Context.EmitLdc_R4(2f); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + Context.EmitLdvec(Op.Rn); + Context.EmitLdvec(Op.Rm); + + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), Types)); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), Types)); + + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else /* if (SizeF == 1) */ + { + Type[] Types = new Type[] { typeof(double) }; + + Context.EmitLdc_R8(2d); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(Context, Op.Rn); + EmitLdvecWithCastToDouble(Context, Op.Rm); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), Types)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), Types)); + + EmitStvecWithCastFromDouble(Context, Op.Rd); + } + } + else + { + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRecipStepFused)); + }); + } + } + + public static void Frecpx_S(AILEmitterCtx Context) + { + EmitScalarUnaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRecpX)); }); } @@ -728,97 +947,194 @@ namespace ChocolArm64.Instruction public static void Frsqrts_S(AILEmitterCtx Context) { - EmitFrsqrts(Context, 0, Scalar: true); + if (AOptimizations.FastFP && AOptimizations.UseSse2) + { + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; + + int SizeF = Op.Size & 1; + + if (SizeF == 0) + { + Type[] Types = new Type[] { typeof(float) }; + + Context.EmitLdc_R4(0.5f); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), Types)); + + Context.EmitLdc_R4(3f); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + Context.EmitLdvec(Op.Rn); + Context.EmitLdvec(Op.Rm); + + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), Types)); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), Types)); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), Types)); + + Context.EmitStvec(Op.Rd); + + EmitVectorZero32_128(Context, Op.Rd); + } + else /* if (SizeF == 1) */ + { + Type[] Types = new Type[] { typeof(double) }; + + Context.EmitLdc_R8(0.5d); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), Types)); + + Context.EmitLdc_R8(3d); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(Context, Op.Rn); + EmitLdvecWithCastToDouble(Context, Op.Rm); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), Types)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), Types)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), Types)); + + EmitStvecWithCastFromDouble(Context, Op.Rd); + + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRSqrtStepFused)); + }); + } } public static void Frsqrts_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Op.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) + if (AOptimizations.FastFP && AOptimizations.UseSse2) { - EmitFrsqrts(Context, Index, Scalar: false); - } + AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - if (Op.RegisterSize == ARegisterSize.SIMD64) + int SizeF = Op.Size & 1; + + if (SizeF == 0) + { + Type[] Types = new Type[] { typeof(float) }; + + Context.EmitLdc_R4(0.5f); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), Types)); + + Context.EmitLdc_R4(3f); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + Context.EmitLdvec(Op.Rn); + Context.EmitLdvec(Op.Rm); + + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), Types)); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), Types)); + Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), Types)); + + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else /* if (SizeF == 1) */ + { + Type[] Types = new Type[] { typeof(double) }; + + Context.EmitLdc_R8(0.5d); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), Types)); + + Context.EmitLdc_R8(3d); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), Types)); + + Types = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(Context, Op.Rn); + EmitLdvecWithCastToDouble(Context, Op.Rm); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), Types)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), Types)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), Types)); + + EmitStvecWithCastFromDouble(Context, Op.Rd); + } + } + else { - EmitVectorZeroUpper(Context, Op.Rd); + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPRSqrtStepFused)); + }); } } - private static void EmitFrsqrts(AILEmitterCtx Context, int Index, bool Scalar) - { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(3); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(3); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - EmitVectorExtractF(Context, Op.Rm, Index, SizeF); - - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - - if (SizeF == 0) - { - Context.EmitLdc_R4(0.5f); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(0.5); - } - - Context.Emit(OpCodes.Mul); - - if (Scalar) - { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); - } - public static void Fsqrt_S(AILEmitterCtx Context) { - EmitScalarUnaryOpF(Context, () => + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitUnaryMathCall(Context, nameof(Math.Sqrt)); - }); + EmitScalarSseOrSse2OpF(Context, nameof(Sse.SqrtScalar)); + } + else + { + EmitScalarUnaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSqrt)); + }); + } + } + + public static void Fsqrt_V(AILEmitterCtx Context) + { + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) + { + EmitVectorSseOrSse2OpF(Context, nameof(Sse.Sqrt)); + } + else + { + EmitVectorUnaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSqrt)); + }); + } } public static void Fsub_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitScalarSseOrSse2OpF(Context, nameof(Sse.SubtractScalar)); } else { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub)); + EmitScalarBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSub)); + }); } } public static void Fsub_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse && AOptimizations.UseSse2) + if (AOptimizations.FastFP && AOptimizations.UseSse + && AOptimizations.UseSse2) { EmitVectorSseOrSse2OpF(Context, nameof(Sse.Subtract)); } else { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub)); + EmitVectorBinaryOpF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPSub)); + }); } } @@ -1170,7 +1486,6 @@ namespace ChocolArm64.Instruction EmitVectorTernaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); - EmitAbs(Context); Context.Emit(OpCodes.Add); @@ -1182,7 +1497,6 @@ namespace ChocolArm64.Instruction EmitVectorWidenRnRmTernaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); - EmitAbs(Context); Context.Emit(OpCodes.Add); @@ -1194,7 +1508,6 @@ namespace ChocolArm64.Instruction EmitVectorBinaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); - EmitAbs(Context); }); } @@ -1204,7 +1517,6 @@ namespace ChocolArm64.Instruction EmitVectorWidenRnRmBinaryOpZx(Context, () => { Context.Emit(OpCodes.Sub); - EmitAbs(Context); }); } diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 381fc46a..dd39f52d 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -306,25 +306,19 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; - Context.EmitLdc_I4((int)RoundMode); - MethodInfo MthdInfo; - Type[] Types = new Type[] { null, typeof(MidpointRounding) }; - - Types[0] = SizeF == 0 - ? typeof(float) - : typeof(double); - if (SizeF == 0) { - MthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), Types); + MthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) }); } else /* if (SizeF == 1) */ { - MthdInfo = typeof(Math).GetMethod(nameof(Math.Round), Types); + MthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) }); } + Context.EmitLdc_I4((int)RoundMode); + Context.EmitCall(MthdInfo); } @@ -348,24 +342,17 @@ namespace ChocolArm64.Instruction Context.EmitCall(MthdInfo); } - public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name) + public static void EmitSoftFloatCall(AILEmitterCtx Context, string Name) { IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp; - int SizeF = Op.Size & 1; + Type Type = (Op.Size & 1) == 0 + ? typeof(ASoftFloat_32) + : typeof(ASoftFloat_64); - MethodInfo MthdInfo; + Context.EmitLdarg(ATranslatedSub.StateArgIdx); - if (SizeF == 0) - { - MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) }); - } - else /* if (SizeF == 1) */ - { - MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) }); - } - - Context.EmitCall(MthdInfo); + Context.EmitCall(Type, Name); } public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit) diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index e3f067ed..7412c976 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -1,4 +1,7 @@ +using ChocolArm64.State; using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; namespace ChocolArm64.Instruction { @@ -6,13 +9,29 @@ namespace ChocolArm64.Instruction { static ASoftFloat() { + RecipEstimateTable = BuildRecipEstimateTable(); InvSqrtEstimateTable = BuildInvSqrtEstimateTable(); - RecipEstimateTable = BuildRecipEstimateTable(); } private static readonly byte[] RecipEstimateTable; private static readonly byte[] InvSqrtEstimateTable; + private static byte[] BuildRecipEstimateTable() + { + byte[] Table = new byte[256]; + for (ulong index = 0; index < 256; index++) + { + ulong a = index | 0x100; + + a = (a << 1) + 1; + ulong b = 0x80000 / a; + b = (b + 1) >> 1; + + Table[index] = (byte)(b & 0xFF); + } + return Table; + } + private static byte[] BuildInvSqrtEstimateTable() { byte[] Table = new byte[512]; @@ -40,22 +59,75 @@ namespace ChocolArm64.Instruction return Table; } - private static byte[] BuildRecipEstimateTable() + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float RecipEstimate(float x) { - byte[] Table = new byte[256]; - for (ulong index = 0; index < 256; index++) - { - ulong a = index | 0x100; - - a = (a << 1) + 1; - ulong b = 0x80000 / a; - b = (b + 1) >> 1; - - Table[index] = (byte)(b & 0xFF); - } - return Table; + return (float)RecipEstimate((double)x); } + public static double RecipEstimate(double x) + { + ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x); + ulong x_sign = x_bits & 0x8000000000000000; + ulong x_exp = (x_bits >> 52) & 0x7FF; + ulong scaled = x_bits & ((1ul << 52) - 1); + + if (x_exp >= 2045) + { + if (x_exp == 0x7ff && scaled != 0) + { + // NaN + return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); + } + + // Infinity, or Out of range -> Zero + return BitConverter.Int64BitsToDouble((long)x_sign); + } + + if (x_exp == 0) + { + if (scaled == 0) + { + // Zero -> Infinity + return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000)); + } + + // Denormal + if ((scaled & (1ul << 51)) == 0) + { + x_exp = ~0ul; + scaled <<= 2; + } + else + { + scaled <<= 1; + } + } + + scaled >>= 44; + scaled &= 0xFF; + + ulong result_exp = (2045 - x_exp) & 0x7FF; + ulong estimate = (ulong)RecipEstimateTable[scaled]; + ulong fraction = estimate << 44; + + if (result_exp == 0) + { + fraction >>= 1; + fraction |= 1ul << 51; + } + else if (result_exp == 0x7FF) + { + result_exp = 0; + fraction >>= 2; + fraction |= 1ul << 50; + } + + ulong result = x_sign | (result_exp << 52) | fraction; + return BitConverter.Int64BitsToDouble((long)result); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float InvSqrtEstimate(float x) { return (float)InvSqrtEstimate((double)x); @@ -124,108 +196,6 @@ namespace ChocolArm64.Instruction return BitConverter.Int64BitsToDouble((long)result); } - public static float RecipEstimate(float x) - { - return (float)RecipEstimate((double)x); - } - - public static double RecipEstimate(double x) - { - ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x); - ulong x_sign = x_bits & 0x8000000000000000; - ulong x_exp = (x_bits >> 52) & 0x7FF; - ulong scaled = x_bits & ((1ul << 52) - 1); - - if (x_exp >= 2045) - { - if (x_exp == 0x7ff && scaled != 0) - { - // NaN - return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); - } - - // Infinity, or Out of range -> Zero - return BitConverter.Int64BitsToDouble((long)x_sign); - } - - if (x_exp == 0) - { - if (scaled == 0) - { - // Zero -> Infinity - return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7FF0000000000000)); - } - - // Denormal - if ((scaled & (1ul << 51)) == 0) - { - x_exp = ~0ul; - scaled <<= 2; - } - else - { - scaled <<= 1; - } - } - - scaled >>= 44; - scaled &= 0xFF; - - ulong result_exp = (2045 - x_exp) & 0x7FF; - ulong estimate = (ulong)RecipEstimateTable[scaled]; - ulong fraction = estimate << 44; - - if (result_exp == 0) - { - fraction >>= 1; - fraction |= 1ul << 51; - } - else if (result_exp == 0x7FF) - { - result_exp = 0; - fraction >>= 2; - fraction |= 1ul << 50; - } - - ulong result = x_sign | (result_exp << 52) | fraction; - return BitConverter.Int64BitsToDouble((long)result); - } - - public static float RecipStep(float op1, float op2) - { - return (float)RecipStep((double)op1, (double)op2); - } - - public static double RecipStep(double op1, double op2) - { - op1 = -op1; - - ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); - ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); - - ulong op1_sign = op1_bits & 0x8000000000000000; - ulong op2_sign = op2_bits & 0x8000000000000000; - ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF; - ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF; - - bool inf1 = op1_other == 0x7FF0000000000000; - bool inf2 = op2_other == 0x7FF0000000000000; - bool zero1 = op1_other == 0; - bool zero2 = op2_other == 0; - - if ((inf1 && zero2) || (zero1 && inf2)) - { - return 2.0; - } - else if (inf1 || inf2) - { - // Infinity - return BitConverter.Int64BitsToDouble((long)(0x7FF0000000000000 | (op1_sign ^ op2_sign))); - } - - return 2.0 + op1 * op2; - } - public static float ConvertHalfToSingle(ushort x) { uint x_sign = (uint)(x >> 15) & 0x0001; @@ -261,277 +231,1457 @@ namespace ChocolArm64.Instruction uint new_exp = (uint)((exponent + 127) & 0xFF) << 23; return BitConverter.Int32BitsToSingle((int)((x_sign << 31) | new_exp | (x_mantissa << 13))); } + } - public static float MaxNum(float op1, float op2) + static class ASoftFloat_32 + { + public static float FPAdd(float Value1, float Value2, AThreadState State) { - uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); - uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPAdd: State.Fpcr = 0x{State.Fpcr:X8}"); - if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - op1 = float.NegativeInfinity; - } - else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) - { - op2 = float.NegativeInfinity; + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if (Inf1 && Inf2 && Sign1 == !Sign2) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if ((Inf1 && !Sign1) || (Inf2 && !Sign2)) + { + Result = FPInfinity(false); + } + else if ((Inf1 && Sign1) || (Inf2 && Sign2)) + { + Result = FPInfinity(true); + } + else if (Zero1 && Zero2 && Sign1 == Sign2) + { + Result = FPZero(Sign1); + } + else + { + Result = Value1 + Value2; + } } - return Max(op1, op2); + return Result; } - public static double MaxNum(double op1, double op2) + public static float FPDiv(float Value1, float Value2, AThreadState State) { - ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); - ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPDiv: State.Fpcr = 0x{State.Fpcr:X8}"); - if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - op1 = double.NegativeInfinity; - } - else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) - { - op2 = double.NegativeInfinity; + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Inf2) || (Zero1 && Zero2)) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if (Inf1 || Zero2) + { + Result = FPInfinity(Sign1 ^ Sign2); + + if (!Inf1) FPProcessException(FPExc.DivideByZero, State); + } + else if (Zero1 || Inf2) + { + Result = FPZero(Sign1 ^ Sign2); + } + else + { + Result = Value1 / Value2; + } } - return Max(op1, op2); + return Result; } - public static float Max(float op1, float op2) + public static float FPMax(float Value1, float Value2, AThreadState State) { - // Fast path - if (op1 > op2) + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPMax: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - return op1; + if (Value1 > Value2) + { + if (Type1 == FPType.Infinity) + { + Result = FPInfinity(Sign1); + } + else if (Type1 == FPType.Zero) + { + Result = FPZero(Sign1 && Sign2); + } + else + { + Result = Value1; + } + } + else + { + if (Type2 == FPType.Infinity) + { + Result = FPInfinity(Sign2); + } + else if (Type2 == FPType.Zero) + { + Result = FPZero(Sign1 && Sign2); + } + else + { + Result = Value2; + } + } } - if (op1 < op2 || (op1 == op2 && op2 != 0)) - { - return op2; - } - - uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); - uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); - - // Handle NaN cases - if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits)) - { - return BitConverter.Int32BitsToSingle((int)op_bits); - } - - // Return the most positive zero - if ((op1_bits & op2_bits) == 0x80000000u) - { - return BitConverter.Int32BitsToSingle(int.MinValue); - } - - return 0; + return Result; } - public static double Max(double op1, double op2) + public static float FPMaxNum(float Value1, float Value2, AThreadState State) { - // Fast path - if (op1 > op2) + Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMaxNum: "); + + Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { - return op1; + Value1 = FPInfinity(true); + } + else if (Type1 != FPType.QNaN && Type2 == FPType.QNaN) + { + Value2 = FPInfinity(true); } - if (op1 < op2 || (op1 == op2 && op2 != 0)) - { - return op2; - } - - ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); - ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); - - // Handle NaN cases - if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits)) - { - return BitConverter.Int64BitsToDouble((long)op_bits); - } - - // Return the most positive zero - if ((op1_bits & op2_bits) == 0x8000000000000000ul) - { - return BitConverter.Int64BitsToDouble(long.MinValue); - } - - return 0; + return FPMax(Value1, Value2, State); } - public static float MinNum(float op1, float op2) + public static float FPMin(float Value1, float Value2, AThreadState State) { - uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); - uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPMin: State.Fpcr = 0x{State.Fpcr:X8}"); - if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - op1 = float.PositiveInfinity; - } - else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) - { - op2 = float.PositiveInfinity; + if (Value1 < Value2) + { + if (Type1 == FPType.Infinity) + { + Result = FPInfinity(Sign1); + } + else if (Type1 == FPType.Zero) + { + Result = FPZero(Sign1 || Sign2); + } + else + { + Result = Value1; + } + } + else + { + if (Type2 == FPType.Infinity) + { + Result = FPInfinity(Sign2); + } + else if (Type2 == FPType.Zero) + { + Result = FPZero(Sign1 || Sign2); + } + else + { + Result = Value2; + } + } } - return Min(op1, op2); + return Result; } - public static double MinNum(double op1, double op2) + public static float FPMinNum(float Value1, float Value2, AThreadState State) { - ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); - ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMinNum: "); - if (IsQNaN(op1_bits) && !IsQNaN(op2_bits)) + Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { - op1 = double.PositiveInfinity; + Value1 = FPInfinity(false); } - else if (!IsQNaN(op1_bits) && IsQNaN(op2_bits)) + else if (Type1 != FPType.QNaN && Type2 == FPType.QNaN) { - op2 = double.PositiveInfinity; + Value2 = FPInfinity(false); } - return Min(op1, op2); + return FPMin(Value1, Value2, State); } - public static float Min(float op1, float op2) + public static float FPMul(float Value1, float Value2, AThreadState State) { - // Fast path - if (op1 < op2) + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPMul: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - return op1; + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else if (Zero1 || Zero2) + { + Result = FPZero(Sign1 ^ Sign2); + } + else + { + Result = Value1 * Value2; + } } - if (op1 > op2 || (op1 == op2 && op2 != 0)) - { - return op2; - } - - uint op1_bits = (uint)BitConverter.SingleToInt32Bits(op1); - uint op2_bits = (uint)BitConverter.SingleToInt32Bits(op2); - - // Handle NaN cases - if (ProcessNaNs(op1_bits, op2_bits, out uint op_bits)) - { - return BitConverter.Int32BitsToSingle((int)op_bits); - } - - // Return the most negative zero - if ((op1_bits | op2_bits) == 0x80000000u) - { - return BitConverter.Int32BitsToSingle(int.MinValue); - } - - return 0; + return Result; } - public static double Min(double op1, double op2) + public static float FPMulAdd(float ValueA, float Value1, float Value2, AThreadState State) { - // Fast path - if (op1 < op2) + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPMulAdd: State.Fpcr = 0x{State.Fpcr:X8}"); + + ValueA = ValueA.FPUnpack(out FPType TypeA, out bool SignA, out uint Addend); + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + float Result = FPProcessNaNs3(TypeA, Type1, Type2, Addend, Op1, Op2, State, out bool Done); + + if (TypeA == FPType.QNaN && ((Inf1 && Zero2) || (Zero1 && Inf2))) { - return op1; + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); } - if (op1 > op2 || (op1 == op2 && op2 != 0)) + if (!Done) { - return op2; + bool InfA = TypeA == FPType.Infinity; bool ZeroA = TypeA == FPType.Zero; + + bool SignP = Sign1 ^ Sign2; + bool InfP = Inf1 || Inf2; + bool ZeroP = Zero1 || Zero2; + + if ((Inf1 && Zero2) || (Zero1 && Inf2) || (InfA && InfP && SignA != SignP)) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if ((InfA && !SignA) || (InfP && !SignP)) + { + Result = FPInfinity(false); + } + else if ((InfA && SignA) || (InfP && SignP)) + { + Result = FPInfinity(true); + } + else if (ZeroA && ZeroP && SignA == SignP) + { + Result = FPZero(SignA); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + Result = ValueA + (Value1 * Value2); + } } - ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); - ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); - - // Handle NaN cases - if (ProcessNaNs(op1_bits, op2_bits, out ulong op_bits)) - { - return BitConverter.Int64BitsToDouble((long)op_bits); - } - - // Return the most negative zero - if ((op1_bits | op2_bits) == 0x8000000000000000ul) - { - return BitConverter.Int64BitsToDouble(long.MinValue); - } - - return 0; + return Result; } - private static bool ProcessNaNs(uint op1_bits, uint op2_bits, out uint op_bits) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float FPMulSub(float ValueA, float Value1, float Value2, AThreadState State) { - if (IsSNaN(op1_bits)) + Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMulSub: "); + + Value1 = Value1.FPNeg(); + + return FPMulAdd(ValueA, Value1, Value2, State); + } + + public static float FPMulX(float Value1, float Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPMulX: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - op_bits = op1_bits | (1u << 22); // op1 is SNaN, return QNaN op1 + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPTwo(Sign1 ^ Sign2); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else if (Zero1 || Zero2) + { + Result = FPZero(Sign1 ^ Sign2); + } + else + { + Result = Value1 * Value2; + } } - else if (IsSNaN(op2_bits)) + + return Result; + } + + public static float FPRecipStepFused(float Value1, float Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPRecipStepFused: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPNeg(); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - op_bits = op2_bits | (1u << 22); // op2 is SNaN, return QNaN op2 + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPTwo(false); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + Result = 2f + (Value1 * Value2); + } } - else if (IsQNaN(op1_bits)) + + return Result; + } + + public static float FPRecpX(float Value, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPRecpX: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value.FPUnpack(out FPType Type, out bool Sign, out uint Op); + + float Result; + + if (Type == FPType.SNaN || Type == FPType.QNaN) { - op_bits = op1_bits; // op1 is QNaN, return QNaN op1 - } - else if (IsQNaN(op2_bits)) - { - op_bits = op2_bits; // op2 is QNaN, return QNaN op2 + Result = FPProcessNaN(Type, Op, State); } else { - op_bits = 0; + uint NotExp = (~Op >> 23) & 0xFFu; + uint MaxExp = 0xFEu; - return false; + Result = BitConverter.Int32BitsToSingle( + (int)((Sign ? 1u : 0u) << 31 | (NotExp == 0xFFu ? MaxExp : NotExp) << 23)); } - return true; + return Result; } - private static bool ProcessNaNs(ulong op1_bits, ulong op2_bits, out ulong op_bits) + public static float FPRSqrtStepFused(float Value1, float Value2, AThreadState State) { - if (IsSNaN(op1_bits)) + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPRSqrtStepFused: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPNeg(); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) { - op_bits = op1_bits | (1ul << 51); // op1 is SNaN, return QNaN op1 + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPOnePointFive(false); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else + { + // TODO: When available, use: T MathF.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + Result = (3f + (Value1 * Value2)) / 2f; + } } - else if (IsSNaN(op2_bits)) + + return Result; + } + + public static float FPSqrt(float Value, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPSqrt: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value = Value.FPUnpack(out FPType Type, out bool Sign, out uint Op); + + float Result; + + if (Type == FPType.SNaN || Type == FPType.QNaN) { - op_bits = op2_bits | (1ul << 51); // op2 is SNaN, return QNaN op2 + Result = FPProcessNaN(Type, Op, State); } - else if (IsQNaN(op1_bits)) + else if (Type == FPType.Zero) { - op_bits = op1_bits; // op1 is QNaN, return QNaN op1 + Result = FPZero(Sign); } - else if (IsQNaN(op2_bits)) + else if (Type == FPType.Infinity && !Sign) { - op_bits = op2_bits; // op2 is QNaN, return QNaN op2 + Result = FPInfinity(Sign); + } + else if (Sign) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); } else { - op_bits = 0; - - return false; + Result = MathF.Sqrt(Value); } - return true; + return Result; } - private static bool IsQNaN(uint op_bits) + public static float FPSub(float Value1, float Value2, AThreadState State) { - return (op_bits & 0x007FFFFF) != 0 && - (op_bits & 0x7FC00000) == 0x7FC00000; + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_32.FPSub: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + + float Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if (Inf1 && Inf2 && Sign1 == Sign2) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if ((Inf1 && !Sign1) || (Inf2 && Sign2)) + { + Result = FPInfinity(false); + } + else if ((Inf1 && Sign1) || (Inf2 && !Sign2)) + { + Result = FPInfinity(true); + } + else if (Zero1 && Zero2 && Sign1 == !Sign2) + { + Result = FPZero(Sign1); + } + else + { + Result = Value1 - Value2; + } + } + + return Result; } - private static bool IsQNaN(ulong op_bits) + private enum FPType { - return (op_bits & 0x000FFFFFFFFFFFFF) != 0 && - (op_bits & 0x7FF8000000000000) == 0x7FF8000000000000; + Nonzero, + Zero, + Infinity, + QNaN, + SNaN } - private static bool IsSNaN(uint op_bits) + private enum FPExc { - return (op_bits & 0x007FFFFF) != 0 && - (op_bits & 0x7FC00000) == 0x7F800000; + InvalidOp, + DivideByZero, + Overflow, + Underflow, + Inexact, + InputDenorm = 7 } - private static bool IsSNaN(ulong op_bits) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float FPDefaultNaN() { - return (op_bits & 0x000FFFFFFFFFFFFF) != 0 && - (op_bits & 0x7FF8000000000000) == 0x7FF0000000000000; + return -float.NaN; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float FPInfinity(bool Sign) + { + return Sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float FPZero(bool Sign) + { + return Sign ? -0f : +0f; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float FPTwo(bool Sign) + { + return Sign ? -2f : +2f; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float FPOnePointFive(bool Sign) + { + return Sign ? -1.5f : +1.5f; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static float FPNeg(this float Value) + { + return -Value; + } + + private static float FPUnpack(this float Value, out FPType Type, out bool Sign, out uint ValueBits) + { + ValueBits = (uint)BitConverter.SingleToInt32Bits(Value); + + Sign = (~ValueBits & 0x80000000u) == 0u; + + if ((ValueBits & 0x7F800000u) == 0u) + { + if ((ValueBits & 0x007FFFFFu) == 0u) + { + Type = FPType.Zero; + } + else + { + Type = FPType.Nonzero; + } + } + else if ((~ValueBits & 0x7F800000u) == 0u) + { + if ((ValueBits & 0x007FFFFFu) == 0u) + { + Type = FPType.Infinity; + } + else + { + Type = (~ValueBits & 0x00400000u) == 0u + ? FPType.QNaN + : FPType.SNaN; + + return FPZero(Sign); + } + } + else + { + Type = FPType.Nonzero; + } + + return Value; + } + + private static float FPProcessNaNs( + FPType Type1, + FPType Type2, + uint Op1, + uint Op2, + AThreadState State, + out bool Done) + { + Done = true; + + if (Type1 == FPType.SNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.SNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + else if (Type1 == FPType.QNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.QNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + + Done = false; + + return FPZero(false); + } + + private static float FPProcessNaNs3( + FPType Type1, + FPType Type2, + FPType Type3, + uint Op1, + uint Op2, + uint Op3, + AThreadState State, + out bool Done) + { + Done = true; + + if (Type1 == FPType.SNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.SNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + else if (Type3 == FPType.SNaN) + { + return FPProcessNaN(Type3, Op3, State); + } + else if (Type1 == FPType.QNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.QNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + else if (Type3 == FPType.QNaN) + { + return FPProcessNaN(Type3, Op3, State); + } + + Done = false; + + return FPZero(false); + } + + private static float FPProcessNaN(FPType Type, uint Op, AThreadState State) + { + const int DNBit = 25; // Default NaN mode control bit. + + if (Type == FPType.SNaN) + { + Op |= 1u << 22; + + FPProcessException(FPExc.InvalidOp, State); + } + + if ((State.Fpcr & (1 << DNBit)) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int32BitsToSingle((int)Op); + } + + private static void FPProcessException(FPExc Exc, AThreadState State) + { + int Enable = (int)Exc + 8; + + if ((State.Fpcr & (1 << Enable)) != 0) + { + throw new NotImplementedException("floating-point trap handling"); + } + else + { + State.Fpsr |= 1 << (int)Exc; + } } } -} \ No newline at end of file + + static class ASoftFloat_64 + { + public static double FPAdd(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPAdd: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if (Inf1 && Inf2 && Sign1 == !Sign2) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if ((Inf1 && !Sign1) || (Inf2 && !Sign2)) + { + Result = FPInfinity(false); + } + else if ((Inf1 && Sign1) || (Inf2 && Sign2)) + { + Result = FPInfinity(true); + } + else if (Zero1 && Zero2 && Sign1 == Sign2) + { + Result = FPZero(Sign1); + } + else + { + Result = Value1 + Value2; + } + } + + return Result; + } + + public static double FPDiv(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPDiv: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Inf2) || (Zero1 && Zero2)) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if (Inf1 || Zero2) + { + Result = FPInfinity(Sign1 ^ Sign2); + + if (!Inf1) FPProcessException(FPExc.DivideByZero, State); + } + else if (Zero1 || Inf2) + { + Result = FPZero(Sign1 ^ Sign2); + } + else + { + Result = Value1 / Value2; + } + } + + return Result; + } + + public static double FPMax(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPMax: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + if (Value1 > Value2) + { + if (Type1 == FPType.Infinity) + { + Result = FPInfinity(Sign1); + } + else if (Type1 == FPType.Zero) + { + Result = FPZero(Sign1 && Sign2); + } + else + { + Result = Value1; + } + } + else + { + if (Type2 == FPType.Infinity) + { + Result = FPInfinity(Sign2); + } + else if (Type2 == FPType.Zero) + { + Result = FPZero(Sign1 && Sign2); + } + else + { + Result = Value2; + } + } + } + + return Result; + } + + public static double FPMaxNum(double Value1, double Value2, AThreadState State) + { + Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMaxNum: "); + + Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) + { + Value1 = FPInfinity(true); + } + else if (Type1 != FPType.QNaN && Type2 == FPType.QNaN) + { + Value2 = FPInfinity(true); + } + + return FPMax(Value1, Value2, State); + } + + public static double FPMin(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPMin: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + if (Value1 < Value2) + { + if (Type1 == FPType.Infinity) + { + Result = FPInfinity(Sign1); + } + else if (Type1 == FPType.Zero) + { + Result = FPZero(Sign1 || Sign2); + } + else + { + Result = Value1; + } + } + else + { + if (Type2 == FPType.Infinity) + { + Result = FPInfinity(Sign2); + } + else if (Type2 == FPType.Zero) + { + Result = FPZero(Sign1 || Sign2); + } + else + { + Result = Value2; + } + } + } + + return Result; + } + + public static double FPMinNum(double Value1, double Value2, AThreadState State) + { + Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMinNum: "); + + Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) + { + Value1 = FPInfinity(false); + } + else if (Type1 != FPType.QNaN && Type2 == FPType.QNaN) + { + Value2 = FPInfinity(false); + } + + return FPMin(Value1, Value2, State); + } + + public static double FPMul(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPMul: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else if (Zero1 || Zero2) + { + Result = FPZero(Sign1 ^ Sign2); + } + else + { + Result = Value1 * Value2; + } + } + + return Result; + } + + public static double FPMulAdd(double ValueA, double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPMulAdd: State.Fpcr = 0x{State.Fpcr:X8}"); + + ValueA = ValueA.FPUnpack(out FPType TypeA, out bool SignA, out ulong Addend); + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + double Result = FPProcessNaNs3(TypeA, Type1, Type2, Addend, Op1, Op2, State, out bool Done); + + if (TypeA == FPType.QNaN && ((Inf1 && Zero2) || (Zero1 && Inf2))) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + + if (!Done) + { + bool InfA = TypeA == FPType.Infinity; bool ZeroA = TypeA == FPType.Zero; + + bool SignP = Sign1 ^ Sign2; + bool InfP = Inf1 || Inf2; + bool ZeroP = Zero1 || Zero2; + + if ((Inf1 && Zero2) || (Zero1 && Inf2) || (InfA && InfP && SignA != SignP)) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if ((InfA && !SignA) || (InfP && !SignP)) + { + Result = FPInfinity(false); + } + else if ((InfA && SignA) || (InfP && SignP)) + { + Result = FPInfinity(true); + } + else if (ZeroA && ZeroP && SignA == SignP) + { + Result = FPZero(SignA); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + Result = ValueA + (Value1 * Value2); + } + } + + return Result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static double FPMulSub(double ValueA, double Value1, double Value2, AThreadState State) + { + Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMulSub: "); + + Value1 = Value1.FPNeg(); + + return FPMulAdd(ValueA, Value1, Value2, State); + } + + public static double FPMulX(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPMulX: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPTwo(Sign1 ^ Sign2); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else if (Zero1 || Zero2) + { + Result = FPZero(Sign1 ^ Sign2); + } + else + { + Result = Value1 * Value2; + } + } + + return Result; + } + + public static double FPRecipStepFused(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPRecipStepFused: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPNeg(); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPTwo(false); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + Result = 2d + (Value1 * Value2); + } + } + + return Result; + } + + public static double FPRecpX(double Value, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPRecpX: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value.FPUnpack(out FPType Type, out bool Sign, out ulong Op); + + double Result; + + if (Type == FPType.SNaN || Type == FPType.QNaN) + { + Result = FPProcessNaN(Type, Op, State); + } + else + { + ulong NotExp = (~Op >> 52) & 0x7FFul; + ulong MaxExp = 0x7FEul; + + Result = BitConverter.Int64BitsToDouble( + (long)((Sign ? 1ul : 0ul) << 63 | (NotExp == 0x7FFul ? MaxExp : NotExp) << 52)); + } + + return Result; + } + + public static double FPRSqrtStepFused(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPRSqrtStepFused: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPNeg(); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if ((Inf1 && Zero2) || (Zero1 && Inf2)) + { + Result = FPOnePointFive(false); + } + else if (Inf1 || Inf2) + { + Result = FPInfinity(Sign1 ^ Sign2); + } + else + { + // TODO: When available, use: T Math.FusedMultiplyAdd(T, T, T); + // https://github.com/dotnet/corefx/issues/31903 + + Result = (3d + (Value1 * Value2)) / 2d; + } + } + + return Result; + } + + public static double FPSqrt(double Value, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPSqrt: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value = Value.FPUnpack(out FPType Type, out bool Sign, out ulong Op); + + double Result; + + if (Type == FPType.SNaN || Type == FPType.QNaN) + { + Result = FPProcessNaN(Type, Op, State); + } + else if (Type == FPType.Zero) + { + Result = FPZero(Sign); + } + else if (Type == FPType.Infinity && !Sign) + { + Result = FPInfinity(Sign); + } + else if (Sign) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else + { + Result = Math.Sqrt(Value); + } + + return Result; + } + + public static double FPSub(double Value1, double Value2, AThreadState State) + { + Debug.WriteLineIf(State.Fpcr != 0, $"ASoftFloat_64.FPSub: State.Fpcr = 0x{State.Fpcr:X8}"); + + Value1 = Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); + Value2 = Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + + double Result = FPProcessNaNs(Type1, Type2, Op1, Op2, State, out bool Done); + + if (!Done) + { + bool Inf1 = Type1 == FPType.Infinity; bool Zero1 = Type1 == FPType.Zero; + bool Inf2 = Type2 == FPType.Infinity; bool Zero2 = Type2 == FPType.Zero; + + if (Inf1 && Inf2 && Sign1 == Sign2) + { + Result = FPDefaultNaN(); + + FPProcessException(FPExc.InvalidOp, State); + } + else if ((Inf1 && !Sign1) || (Inf2 && Sign2)) + { + Result = FPInfinity(false); + } + else if ((Inf1 && Sign1) || (Inf2 && !Sign2)) + { + Result = FPInfinity(true); + } + else if (Zero1 && Zero2 && Sign1 == !Sign2) + { + Result = FPZero(Sign1); + } + else + { + Result = Value1 - Value2; + } + } + + return Result; + } + + private enum FPType + { + Nonzero, + Zero, + Infinity, + QNaN, + SNaN + } + + private enum FPExc + { + InvalidOp, + DivideByZero, + Overflow, + Underflow, + Inexact, + InputDenorm = 7 + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double FPDefaultNaN() + { + return -double.NaN; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double FPInfinity(bool Sign) + { + return Sign ? double.NegativeInfinity : double.PositiveInfinity; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double FPZero(bool Sign) + { + return Sign ? -0d : +0d; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double FPTwo(bool Sign) + { + return Sign ? -2d : +2d; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double FPOnePointFive(bool Sign) + { + return Sign ? -1.5d : +1.5d; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double FPNeg(this double Value) + { + return -Value; + } + + private static double FPUnpack(this double Value, out FPType Type, out bool Sign, out ulong ValueBits) + { + ValueBits = (ulong)BitConverter.DoubleToInt64Bits(Value); + + Sign = (~ValueBits & 0x8000000000000000ul) == 0ul; + + if ((ValueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((ValueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + Type = FPType.Zero; + } + else + { + Type = FPType.Nonzero; + } + } + else if ((~ValueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((ValueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + Type = FPType.Infinity; + } + else + { + Type = (~ValueBits & 0x0008000000000000ul) == 0ul + ? FPType.QNaN + : FPType.SNaN; + + return FPZero(Sign); + } + } + else + { + Type = FPType.Nonzero; + } + + return Value; + } + + private static double FPProcessNaNs( + FPType Type1, + FPType Type2, + ulong Op1, + ulong Op2, + AThreadState State, + out bool Done) + { + Done = true; + + if (Type1 == FPType.SNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.SNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + else if (Type1 == FPType.QNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.QNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + + Done = false; + + return FPZero(false); + } + + private static double FPProcessNaNs3( + FPType Type1, + FPType Type2, + FPType Type3, + ulong Op1, + ulong Op2, + ulong Op3, + AThreadState State, + out bool Done) + { + Done = true; + + if (Type1 == FPType.SNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.SNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + else if (Type3 == FPType.SNaN) + { + return FPProcessNaN(Type3, Op3, State); + } + else if (Type1 == FPType.QNaN) + { + return FPProcessNaN(Type1, Op1, State); + } + else if (Type2 == FPType.QNaN) + { + return FPProcessNaN(Type2, Op2, State); + } + else if (Type3 == FPType.QNaN) + { + return FPProcessNaN(Type3, Op3, State); + } + + Done = false; + + return FPZero(false); + } + + private static double FPProcessNaN(FPType Type, ulong Op, AThreadState State) + { + const int DNBit = 25; // Default NaN mode control bit. + + if (Type == FPType.SNaN) + { + Op |= 1ul << 51; + + FPProcessException(FPExc.InvalidOp, State); + } + + if ((State.Fpcr & (1 << DNBit)) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int64BitsToDouble((long)Op); + } + + private static void FPProcessException(FPExc Exc, AThreadState State) + { + int Enable = (int)Exc + 8; + + if ((State.Fpcr & (1 << Enable)) != 0) + { + throw new NotImplementedException("floating-point trap handling"); + } + else + { + State.Fpsr |= 1 << (int)Exc; + } + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index c69c7a02..24585fe7 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -178,8 +178,15 @@ namespace Ryujinx.Tests.Cpu return GetThreadState(); } - [Flags] - protected enum FPSR + /// Floating-point Control Register. + protected enum FPCR + { + /// Default NaN mode control bit. + DN = 25 + } + + /// Floating-point Status Register. + [Flags] protected enum FPSR { None = 0, @@ -195,32 +202,43 @@ namespace Ryujinx.Tests.Cpu IXC = 1 << 4, /// Input Denormal cumulative floating-point exception bit. IDC = 1 << 7, + /// Cumulative saturation bit. - QC = 1 << 27 + QC = 1 << 27 } - protected enum FpSkips { None, IfNaN_S, IfNaN_D }; + [Flags] protected enum FpSkips + { + None = 0, - protected enum FpUseTolerance { None, OneUlps_S, OneUlps_D }; + IfNaN_S = 1, + IfNaN_D = 2, + + IfUnderflow = 4, + IfOverflow = 8 + } + + protected enum FpTolerances + { + None, + + UpToOneUlps_S, + UpToOneUlps_D + } protected void CompareAgainstUnicorn( - FPSR FpsrMask = FPSR.None, - FpSkips FpSkips = FpSkips.None, - FpUseTolerance FpUseTolerance = FpUseTolerance.None) + FPSR FpsrMask = FPSR.None, + FpSkips FpSkips = FpSkips.None, + FpTolerances FpTolerances = FpTolerances.None) { if (!UnicornAvailable) { return; } - if (FpSkips == FpSkips.IfNaN_S && float.IsNaN(VectorExtractSingle(UnicornEmu.Q[0], (byte)0))) + if (FpSkips != FpSkips.None) { - Assert.Ignore("NaN test."); - } - - if (FpSkips == FpSkips.IfNaN_D && double.IsNaN(VectorExtractDouble(UnicornEmu.Q[0], (byte)0))) - { - Assert.Ignore("NaN test."); + ManageFpSkips(FpSkips); } Assert.That(Thread.ThreadState.X0, Is.EqualTo(UnicornEmu.X[0])); @@ -257,50 +275,13 @@ namespace Ryujinx.Tests.Cpu Assert.That(Thread.ThreadState.X31, Is.EqualTo(UnicornEmu.SP)); - if (FpUseTolerance == FpUseTolerance.None) + if (FpTolerances == FpTolerances.None) { Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); } else { - if (!Is.EqualTo(UnicornEmu.Q[0]).ApplyTo(Thread.ThreadState.V0).IsSuccess) - { - if (FpUseTolerance == FpUseTolerance.OneUlps_S) - { - if (float.IsNormal (VectorExtractSingle(UnicornEmu.Q[0], (byte)0)) || - float.IsSubnormal(VectorExtractSingle(UnicornEmu.Q[0], (byte)0))) - { - Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)0), - Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)0)).Within(1).Ulps); - Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)1), - Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)1)).Within(1).Ulps); - Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)2), - Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)2)).Within(1).Ulps); - Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)3), - Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)3)).Within(1).Ulps); - } - else - { - Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); - } - } - - if (FpUseTolerance == FpUseTolerance.OneUlps_D) - { - if (double.IsNormal (VectorExtractDouble(UnicornEmu.Q[0], (byte)0)) || - double.IsSubnormal(VectorExtractDouble(UnicornEmu.Q[0], (byte)0))) - { - Assert.That (VectorExtractDouble(Thread.ThreadState.V0, (byte)0), - Is.EqualTo(VectorExtractDouble(UnicornEmu.Q[0], (byte)0)).Within(1).Ulps); - Assert.That (VectorExtractDouble(Thread.ThreadState.V0, (byte)1), - Is.EqualTo(VectorExtractDouble(UnicornEmu.Q[0], (byte)1)).Within(1).Ulps); - } - else - { - Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); - } - } - } + ManageFpTolerances(FpTolerances); } Assert.That(Thread.ThreadState.V1, Is.EqualTo(UnicornEmu.Q[1])); Assert.That(Thread.ThreadState.V2, Is.EqualTo(UnicornEmu.Q[2])); @@ -344,6 +325,90 @@ namespace Ryujinx.Tests.Cpu Assert.That(Thread.ThreadState.Negative, Is.EqualTo(UnicornEmu.NegativeFlag)); } + private void ManageFpSkips(FpSkips FpSkips) + { + if (FpSkips.HasFlag(FpSkips.IfNaN_S)) + { + if (float.IsNaN(VectorExtractSingle(UnicornEmu.Q[0], (byte)0))) + { + Assert.Ignore("NaN test."); + } + } + else if (FpSkips.HasFlag(FpSkips.IfNaN_D)) + { + if (double.IsNaN(VectorExtractDouble(UnicornEmu.Q[0], (byte)0))) + { + Assert.Ignore("NaN test."); + } + } + + if (FpSkips.HasFlag(FpSkips.IfUnderflow)) + { + if ((UnicornEmu.Fpsr & (int)FPSR.UFC) != 0) + { + Assert.Ignore("Underflow test."); + } + } + + if (FpSkips.HasFlag(FpSkips.IfOverflow)) + { + if ((UnicornEmu.Fpsr & (int)FPSR.OFC) != 0) + { + Assert.Ignore("Overflow test."); + } + } + } + + private void ManageFpTolerances(FpTolerances FpTolerances) + { + if (!Is.EqualTo(UnicornEmu.Q[0]).ApplyTo(Thread.ThreadState.V0).IsSuccess) + { + if (FpTolerances == FpTolerances.UpToOneUlps_S) + { + if (IsNormalOrSubnormal_S(VectorExtractSingle(UnicornEmu.Q[0], (byte)0)) && + IsNormalOrSubnormal_S(VectorExtractSingle(Thread.ThreadState.V0, (byte)0))) + { + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)0), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)0)).Within(1).Ulps); + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)1), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)1)).Within(1).Ulps); + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)2), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)2)).Within(1).Ulps); + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)3), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)3)).Within(1).Ulps); + + Console.WriteLine(FpTolerances); + } + else + { + Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); + } + } + + if (FpTolerances == FpTolerances.UpToOneUlps_D) + { + if (IsNormalOrSubnormal_D(VectorExtractDouble(UnicornEmu.Q[0], (byte)0)) && + IsNormalOrSubnormal_D(VectorExtractDouble(Thread.ThreadState.V0, (byte)0))) + { + Assert.That (VectorExtractDouble(Thread.ThreadState.V0, (byte)0), + Is.EqualTo(VectorExtractDouble(UnicornEmu.Q[0], (byte)0)).Within(1).Ulps); + Assert.That (VectorExtractDouble(Thread.ThreadState.V0, (byte)1), + Is.EqualTo(VectorExtractDouble(UnicornEmu.Q[0], (byte)1)).Within(1).Ulps); + + Console.WriteLine(FpTolerances); + } + else + { + Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); + } + } + } + + bool IsNormalOrSubnormal_S(float f) => float.IsNormal(f) || float.IsSubnormal(f); + + bool IsNormalOrSubnormal_D(double d) => double.IsNormal(d) || double.IsSubnormal(d); + } + protected static Vector128 MakeVectorE0(double E0) { if (!Sse2.IsSupported) @@ -453,14 +518,14 @@ namespace Ryujinx.Tests.Cpu { uint Rnd; - do Rnd = TestContext.CurrentContext.Random.NextUInt(); - while ((Rnd & 0x7F800000u) == 0u || - (Rnd & 0x7F800000u) == 0x7F800000u); + do Rnd = TestContext.CurrentContext.Random.NextUInt(); + while (( Rnd & 0x7F800000u) == 0u || + (~Rnd & 0x7F800000u) == 0u); return Rnd; } - protected static uint GenSubNormal_S() + protected static uint GenSubnormal_S() { uint Rnd; @@ -474,14 +539,14 @@ namespace Ryujinx.Tests.Cpu { ulong Rnd; - do Rnd = TestContext.CurrentContext.Random.NextULong(); - while ((Rnd & 0x7FF0000000000000ul) == 0ul || - (Rnd & 0x7FF0000000000000ul) == 0x7FF0000000000000ul); + do Rnd = TestContext.CurrentContext.Random.NextULong(); + while (( Rnd & 0x7FF0000000000000ul) == 0ul || + (~Rnd & 0x7FF0000000000000ul) == 0ul); return Rnd; } - protected static ulong GenSubNormal_D() + protected static ulong GenSubnormal_D() { ulong Rnd; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index b423b4de..2075ccf2 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -81,14 +81,14 @@ namespace Ryujinx.Tests.Cpu private static IEnumerable _1S_F_() { - yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) yield return 0x0000000080800000ul; // -Min Normal - yield return 0x00000000807FFFFFul; // -Max SubNormal - yield return 0x0000000080000001ul; // -Min SubNormal - yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) yield return 0x0000000000800000ul; // +Min Normal - yield return 0x00000000007FFFFFul; // +Max SubNormal - yield return 0x0000000000000001ul; // +Min SubNormal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) if (!NoZeros) { @@ -104,17 +104,17 @@ namespace Ryujinx.Tests.Cpu if (!NoNaNs) { - yield return 0x00000000FFFFFFFFul; // -QNaN (all ones payload) - yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) - yield return 0x000000007FFFFFFFul; // +QNaN (all ones payload) - yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) } for (int Cnt = 1; Cnt <= RndCnt; Cnt++) { ulong Grbg = TestContext.CurrentContext.Random.NextUInt(); ulong Rnd1 = GenNormal_S(); - ulong Rnd2 = GenSubNormal_S(); + ulong Rnd2 = GenSubnormal_S(); yield return (Grbg << 32) | Rnd1; yield return (Grbg << 32) | Rnd2; @@ -123,14 +123,14 @@ namespace Ryujinx.Tests.Cpu private static IEnumerable _2S_F_() { - yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) yield return 0x8080000080800000ul; // -Min Normal - yield return 0x807FFFFF807FFFFFul; // -Max SubNormal - yield return 0x8000000180000001ul; // -Min SubNormal - yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x807FFFFF807FFFFFul; // -Max Subnormal + yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) yield return 0x0080000000800000ul; // +Min Normal - yield return 0x007FFFFF007FFFFFul; // +Max SubNormal - yield return 0x0000000100000001ul; // +Min SubNormal + yield return 0x007FFFFF007FFFFFul; // +Max Subnormal + yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon) if (!NoZeros) { @@ -146,16 +146,16 @@ namespace Ryujinx.Tests.Cpu if (!NoNaNs) { - yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) - yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) - yield return 0x7FFFFFFF7FFFFFFFul; // +QNaN (all ones payload) - yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) } for (int Cnt = 1; Cnt <= RndCnt; Cnt++) { ulong Rnd1 = GenNormal_S(); - ulong Rnd2 = GenSubNormal_S(); + ulong Rnd2 = GenSubnormal_S(); yield return (Rnd1 << 32) | Rnd1; yield return (Rnd2 << 32) | Rnd2; @@ -164,14 +164,14 @@ namespace Ryujinx.Tests.Cpu private static IEnumerable _1D_F_() { - yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) yield return 0x8010000000000000ul; // -Min Normal - yield return 0x800FFFFFFFFFFFFFul; // -Max SubNormal - yield return 0x8000000000000001ul; // -Min SubNormal - yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) yield return 0x0010000000000000ul; // +Min Normal - yield return 0x000FFFFFFFFFFFFFul; // +Max SubNormal - yield return 0x0000000000000001ul; // +Min SubNormal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) if (!NoZeros) { @@ -187,16 +187,16 @@ namespace Ryujinx.Tests.Cpu if (!NoNaNs) { - yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) - yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) - yield return 0x7FFFFFFFFFFFFFFFul; // +QNaN (all ones payload) - yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) } for (int Cnt = 1; Cnt <= RndCnt; Cnt++) { ulong Rnd1 = GenNormal_D(); - ulong Rnd2 = GenSubNormal_D(); + ulong Rnd2 = GenSubnormal_D(); yield return Rnd1; yield return Rnd2; @@ -248,6 +248,40 @@ namespace Ryujinx.Tests.Cpu 0x6EE1B800u // FCVTZU V0.2D, V0.2D }; } + + private static uint[] _F_RecpX_Sqrt_S_S_() + { + return new uint[] + { + 0x5EA1F820u, // FRECPX S0, S1 + 0x1E21C020u // FSQRT S0, S1 + }; + } + + private static uint[] _F_RecpX_Sqrt_S_D_() + { + return new uint[] + { + 0x5EE1F820u, // FRECPX D0, D1 + 0x1E61C020u // FSQRT D0, D1 + }; + } + + private static uint[] _F_Sqrt_V_2S_4S_() + { + return new uint[] + { + 0x2EA1F800u // FSQRT V0.2S, V0.2S + }; + } + + private static uint[] _F_Sqrt_V_2D_() + { + return new uint[] + { + 0x6EE1F800u // FSQRT V0.2D, V0.2D + }; + } #endregion private const int RndCnt = 2; @@ -754,21 +788,15 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("FCVT
, ")] public void Fcvt_S_SD([ValueSource("_1S_F_")] ulong A) { - //const int DNFlagBit = 25; // Default NaN mode control bit. - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - uint Opcode = 0x1E22C020; // FCVT D0, S1 ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE1(Z); Vector128 V1 = MakeVectorE0(A); - //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. - //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); - - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC*/); + CompareAgainstUnicorn(); } [Test, Pairwise, Description("FCVT , ")] @@ -789,17 +817,13 @@ namespace Ryujinx.Tests.Cpu public void F_Cvt_NZ_SU_S_S([ValueSource("_F_Cvt_NZ_SU_S_S_")] uint Opcodes, [ValueSource("_1S_F_")] ulong A) { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); - AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1/*, Fpcr: Fpcr*/); - - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); + CompareAgainstUnicorn(); } [Test, Pairwise] @@ -851,6 +875,76 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] + public void F_RecpX_Sqrt_S_S([ValueSource("_F_RecpX_Sqrt_S_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A) + { + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + + [Test, Pairwise] + public void F_RecpX_Sqrt_S_D([ValueSource("_F_RecpX_Sqrt_S_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A) + { + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); + Vector128 V1 = MakeVectorE0(A); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + + [Test, Pairwise] + public void F_Sqrt_V_2S_4S([ValueSource("_F_Sqrt_V_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_2S_F_")] ulong Z, + [ValueSource("_2S_F_")] ulong A, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + + [Test, Pairwise] + public void F_Sqrt_V_2D([ValueSource("_F_Sqrt_V_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A) + { + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + [Test, Pairwise, Description("NEG , ")] public void Neg_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index 7a67d53b..a5ae1a5f 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -9,126 +9,6 @@ namespace Ryujinx.Tests.Cpu { public class CpuTestSimdArithmetic : CpuTest { - [TestCase(0x1E224820u, 0x0000000000000000ul, 0x0000000080000000ul, 0x0000000000000000ul)] // FMAX S0, S1, S2 - [TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0x1E224820u, 0x0000000080000000ul, 0x0000000080000000ul, 0x0000000080000000ul)] - [TestCase(0x1E224820u, 0x0000000080000000ul, 0x000000003DCCCCCDul, 0x000000003DCCCCCDul)] - [TestCase(0x1E224820u, 0x000000003DCCCCCDul, 0x000000003C9623B1ul, 0x000000003DCCCCCDul)] - [TestCase(0x1E224820u, 0x000000008BA98D27ul, 0x0000000000000076ul, 0x0000000000000076ul)] - [TestCase(0x1E224820u, 0x00000000807FFFFFul, 0x000000007F7FFFFFul, 0x000000007F7FFFFFul)] - [TestCase(0x1E224820u, 0x000000007F7FFFFFul, 0x00000000807FFFFFul, 0x000000007F7FFFFFul)] - [TestCase(0x1E224820u, 0x000000007FC00000ul, 0x000000003F800000ul, 0x000000007FC00000ul)] - [TestCase(0x1E224820u, 0x000000003F800000ul, 0x000000007FC00000ul, 0x000000007FC00000ul)] - [TestCase(0x1E224820u, 0x000000007F800001ul, 0x000000007FC00042ul, 0x000000007FC00001ul)] - [TestCase(0x1E224820u, 0x000000007FC00042ul, 0x000000007F800001ul, 0x000000007FC00001ul)] - [TestCase(0x1E224820u, 0x000000007FC0000Aul, 0x000000007FC0000Bul, 0x000000007FC0000Aul)] - [TestCase(0x1E624820u, 0x0000000000000000ul, 0x8000000000000000ul, 0x0000000000000000ul)] // FMAX D0, D1, D2 - [TestCase(0x1E624820u, 0x8000000000000000ul, 0x0000000000000000ul, 0x0000000000000000ul)] - [TestCase(0x1E624820u, 0x8000000000000000ul, 0x8000000000000000ul, 0x8000000000000000ul)] - [TestCase(0x1E624820u, 0x8000000000000000ul, 0x3FF3333333333333ul, 0x3FF3333333333333ul)] - public void Fmax_S(uint Opcode, ulong A, ulong B, ulong Result) - { - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(Result)); - - CompareAgainstUnicorn(); - } - - [TestCase(0x80000000u, 0x80000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u)] - [TestCase(0x00000000u, 0x00000000u, 0x80000000u, 0x80000000u, 0x00000000u, 0x00000000u)] - [TestCase(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u)] - [TestCase(0x80000000u, 0x80000000u, 0x3DCCCCCDu, 0x3DCCCCCDu, 0x3DCCCCCDu, 0x3DCCCCCDu)] - [TestCase(0x3DCCCCCDu, 0x3DCCCCCDu, 0x3C9623B1u, 0x3C9623B1u, 0x3DCCCCCDu, 0x3DCCCCCDu)] - [TestCase(0x8BA98D27u, 0x8BA98D27u, 0x00000076u, 0x00000076u, 0x00000076u, 0x00000076u)] - [TestCase(0x807FFFFFu, 0x807FFFFFu, 0x7F7FFFFFu, 0x7F7FFFFFu, 0x7F7FFFFFu, 0x7F7FFFFFu)] - [TestCase(0x7F7FFFFFu, 0x7F7FFFFFu, 0x807FFFFFu, 0x807FFFFFu, 0x7F7FFFFFu, 0x7F7FFFFFu)] - [TestCase(0x7FC00000u, 0x7FC00000u, 0x3F800000u, 0x3F800000u, 0x7FC00000u, 0x7FC00000u)] - [TestCase(0x3F800000u, 0x3F800000u, 0x7FC00000u, 0x7FC00000u, 0x7FC00000u, 0x7FC00000u)] - [TestCase(0x7F800001u, 0x7F800001u, 0x7FC00042u, 0x7FC00042u, 0x7FC00001u, 0x7FC00001u)] - [TestCase(0x7FC00042u, 0x7FC00042u, 0x7F800001u, 0x7F800001u, 0x7FC00001u, 0x7FC00001u)] - [TestCase(0x7FC0000Au, 0x7FC0000Au, 0x7FC0000Bu, 0x7FC0000Bu, 0x7FC0000Au, 0x7FC0000Au)] - public void Fmax_V(uint A, uint B, uint C, uint D, uint Result0, uint Result1) - { - uint Opcode = 0x4E22F420; // FMAX V0.4S, V1.4S, V2.4S - - Vector128 V1 = MakeVectorE0E1(A, B); - Vector128 V2 = MakeVectorE0E1(C, D); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(Result0)); - Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(Result1)); - }); - - CompareAgainstUnicorn(); - } - - [TestCase(0x1E225820u, 0x0000000000000000ul, 0x0000000080000000ul, 0x0000000080000000ul)] // FMIN S0, S1, S2 - [TestCase(0x1E225820u, 0x0000000080000000ul, 0x0000000000000000ul, 0x0000000080000000ul)] - [TestCase(0x1E225820u, 0x0000000080000000ul, 0x0000000080000000ul, 0x0000000080000000ul)] - [TestCase(0x1E225820u, 0x0000000080000000ul, 0x000000003DCCCCCDul, 0x0000000080000000ul)] - [TestCase(0x1E225820u, 0x000000003DCCCCCDul, 0x000000003C9623B1ul, 0x000000003C9623B1ul)] - [TestCase(0x1E225820u, 0x000000008BA98D27ul, 0x0000000000000076ul, 0x000000008BA98D27ul)] - [TestCase(0x1E225820u, 0x00000000807FFFFFul, 0x000000007F7FFFFFul, 0x00000000807FFFFFul)] - [TestCase(0x1E225820u, 0x000000007F7FFFFFul, 0x00000000807FFFFFul, 0x00000000807FFFFFul)] - [TestCase(0x1E225820u, 0x000000007FC00000ul, 0x000000003F800000ul, 0x000000007FC00000ul)] - [TestCase(0x1E225820u, 0x000000003F800000ul, 0x000000007FC00000ul, 0x000000007FC00000ul)] - [TestCase(0x1E225820u, 0x000000007F800001ul, 0x000000007FC00042ul, 0x000000007FC00001ul)] - [TestCase(0x1E225820u, 0x000000007FC00042ul, 0x000000007F800001ul, 0x000000007FC00001ul)] - [TestCase(0x1E225820u, 0x000000007FC0000Aul, 0x000000007FC0000Bul, 0x000000007FC0000Aul)] - [TestCase(0x1E625820u, 0x0000000000000000ul, 0x8000000000000000ul, 0x8000000000000000ul)] // FMIN D0, D1, D2 - [TestCase(0x1E625820u, 0x8000000000000000ul, 0x0000000000000000ul, 0x8000000000000000ul)] - [TestCase(0x1E625820u, 0x8000000000000000ul, 0x8000000000000000ul, 0x8000000000000000ul)] - [TestCase(0x1E625820u, 0x8000000000000000ul, 0x3FF3333333333333ul, 0x8000000000000000ul)] - public void Fmin_S(uint Opcode, ulong A, ulong B, ulong Result) - { - Vector128 V1 = MakeVectorE0(A); - Vector128 V2 = MakeVectorE0(B); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(Result)); - - CompareAgainstUnicorn(); - } - - [TestCase(0x80000000u, 0x80000000u, 0x00000000u, 0x00000000u, 0x80000000u, 0x80000000u)] - [TestCase(0x00000000u, 0x00000000u, 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u)] - [TestCase(0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u)] - [TestCase(0x80000000u, 0x80000000u, 0x3DCCCCCDu, 0x3DCCCCCDu, 0x80000000u, 0x80000000u)] - [TestCase(0x3DCCCCCDu, 0x3DCCCCCDu, 0x3C9623B1u, 0x3C9623B1u, 0x3C9623B1u, 0x3C9623B1u)] - [TestCase(0x8BA98D27u, 0x8BA98D27u, 0x00000076u, 0x00000076u, 0x8BA98D27u, 0x8BA98D27u)] - [TestCase(0x807FFFFFu, 0x807FFFFFu, 0x7F7FFFFFu, 0x7F7FFFFFu, 0x807FFFFFu, 0x807FFFFFu)] - [TestCase(0x7F7FFFFFu, 0x7F7FFFFFu, 0x807FFFFFu, 0x807FFFFFu, 0x807FFFFFu, 0x807FFFFFu)] - [TestCase(0x7FC00000u, 0x7FC00000u, 0x3F800000u, 0x3F800000u, 0x7FC00000u, 0x7FC00000u)] - [TestCase(0x3F800000u, 0x3F800000u, 0x7FC00000u, 0x7FC00000u, 0x7FC00000u, 0x7FC00000u)] - [TestCase(0x7F800001u, 0x7F800001u, 0x7FC00042u, 0x7FC00042u, 0x7FC00001u, 0x7FC00001u)] - [TestCase(0x7FC00042u, 0x7FC00042u, 0x7F800001u, 0x7F800001u, 0x7FC00001u, 0x7FC00001u)] - [TestCase(0x7FC0000Au, 0x7FC0000Au, 0x7FC0000Bu, 0x7FC0000Bu, 0x7FC0000Au, 0x7FC0000Au)] - public void Fmin_V(uint A, uint B, uint C, uint D, uint Result0, uint Result1) - { - uint Opcode = 0x4EA2F420; // FMIN V0.4S, V1.4S, V2.4S - - Vector128 V1 = MakeVectorE0E1(A, B); - Vector128 V2 = MakeVectorE0E1(C, D); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - Assert.Multiple(() => - { - Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(Result0)); - Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(Result1)); - }); - - CompareAgainstUnicorn(); - } - [Test, Description("FMUL S6, S1, V0.S[2]")] public void Fmul_Se([Random(10)] float A, [Random(10)] float B) { @@ -161,38 +41,6 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Description("FRECPS D0, D1, D2"), Ignore("Not accurate enough.")] - public void Frecps_S([Random(10)] double A, [Random(10)] double B) - { - AThreadState ThreadState = SingleOpcode(0x5E62FC20, - V1: MakeVectorE0(A), - V2: MakeVectorE0(B)); - - Assert.That(VectorExtractDouble(ThreadState.V0, (byte)0), Is.EqualTo(2 - (A * B))); - - CompareAgainstUnicorn(); - } - - [Test, Description("FRECPS V4.4S, V2.4S, V0.4S")] - public void Frecps_V([Random(10)] float A, [Random(10)] float B) - { - AThreadState ThreadState = SingleOpcode(0x4E20FC44, - V2: Sse.SetAllVector128(A), - V0: Sse.SetAllVector128(B)); - - float Result = (float)(2 - ((double)A * (double)B)); - - Assert.Multiple(() => - { - Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result)); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(Result)); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(Result)); - Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(Result)); - }); - - CompareAgainstUnicorn(); - } - [TestCase(0x3FE66666u, false, 0x40000000u)] [TestCase(0x3F99999Au, false, 0x3F800000u)] [TestCase(0x404CCCCDu, false, 0x40400000u)] diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs deleted file mode 100644 index a1558b05..00000000 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCmp.cs +++ /dev/null @@ -1,407 +0,0 @@ -using ChocolArm64.State; - -using NUnit.Framework; - -using System; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; - -namespace Ryujinx.Tests.Cpu -{ - public class CpuTestSimdCmp : CpuTest - { -#region "ValueSource" - private static float[] _floats_() - { - return new float[] { float.NegativeInfinity, float.MinValue, -1f, -0f, - +0f, +1f, float.MaxValue, float.PositiveInfinity }; - } - - private static double[] _doubles_() - { - return new double[] { double.NegativeInfinity, double.MinValue, -1d, -0d, - +0d, +1d, double.MaxValue, double.PositiveInfinity }; - } -#endregion - - private const int RndCnt = 2; - - [Test, Description("FCMEQ D0, D1, D2 | FCMGE D0, D1, D2 | FCMGT D0, D1, D2")] - public void Fcmeq_Fcmge_Fcmgt_Reg_S_D([ValueSource("_doubles_")] [Random(RndCnt)] double A, - [ValueSource("_doubles_")] [Random(RndCnt)] double B, - [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT - { - uint Opcode = 0x5E62E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); - - Vector128 V0 = Sse.StaticCast(Sse2.SetAllVector128(TestContext.CurrentContext.Random.NextDouble())); - Vector128 V1 = Sse.StaticCast(Sse2.SetScalarVector128(A)); - Vector128 V2 = Sse.StaticCast(Sse2.SetScalarVector128(B)); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - - switch (EU) - { - case 0: Exp = (A == B ? Ones : Zeros); break; - case 1: Exp = (A >= B ? Ones : Zeros); break; - case 3: Exp = (A > B ? Ones : Zeros); break; - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(VectorExtractDouble(ThreadState.V0, (byte)1), Is.Zero); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMEQ S0, S1, S2 | FCMGE S0, S1, S2 | FCMGT S0, S1, S2")] - public void Fcmeq_Fcmge_Fcmgt_Reg_S_S([ValueSource("_floats_")] [Random(RndCnt)] float A, - [ValueSource("_floats_")] [Random(RndCnt)] float B, - [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT - { - uint Opcode = 0x5E22E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); - - Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); - Vector128 V1 = Sse.SetScalarVector128(A); - Vector128 V2 = Sse.SetScalarVector128(B); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; - - switch (EU) - { - case 0: Exp = (A == B ? Ones : Zeros); break; - case 1: Exp = (A >= B ? Ones : Zeros); break; - case 3: Exp = (A > B ? Ones : Zeros); break; - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)1), Is.Zero); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMEQ V0.2D, V1.2D, V2.2D | FCMGE V0.2D, V1.2D, V2.2D | FCMGT V0.2D, V1.2D, V2.2D")] - public void Fcmeq_Fcmge_Fcmgt_Reg_V_2D([ValueSource("_doubles_")] [Random(RndCnt)] double A, - [ValueSource("_doubles_")] [Random(RndCnt)] double B, - [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT - { - uint Opcode = 0x4E62E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); - - Vector128 V1 = Sse.StaticCast(Sse2.SetAllVector128(A)); - Vector128 V2 = Sse.StaticCast(Sse2.SetAllVector128(B)); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - - switch (EU) - { - case 0: Exp = (A == B ? Ones : Zeros); break; - case 1: Exp = (A >= B ? Ones : Zeros); break; - case 3: Exp = (A > B ? Ones : Zeros); break; - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMEQ V0.2S, V1.2S, V2.2S | FCMGE V0.2S, V1.2S, V2.2S | FCMGT V0.2S, V1.2S, V2.2S")] - public void Fcmeq_Fcmge_Fcmgt_Reg_V_2S([ValueSource("_floats_")] [Random(RndCnt)] float A, - [ValueSource("_floats_")] [Random(RndCnt)] float B, - [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT - { - uint Opcode = 0x0E22E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); - - Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); - Vector128 V1 = Sse.SetVector128(0, 0, A, A); - Vector128 V2 = Sse.SetVector128(0, 0, B, B); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; - - switch (EU) - { - case 0: Exp = (A == B ? Ones : Zeros); break; - case 1: Exp = (A >= B ? Ones : Zeros); break; - case 3: Exp = (A > B ? Ones : Zeros); break; - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMEQ V0.4S, V1.4S, V2.4S | FCMGE V0.4S, V1.4S, V2.4S | FCMGT V0.4S, V1.4S, V2.4S")] - public void Fcmeq_Fcmge_Fcmgt_Reg_V_4S([ValueSource("_floats_")] [Random(RndCnt)] float A, - [ValueSource("_floats_")] [Random(RndCnt)] float B, - [Values(0u, 1u, 3u)] uint EU) // EQ, GE, GT - { - uint Opcode = 0x4E22E420 | ((EU & 1) << 29) | ((EU >> 1) << 23); - - Vector128 V1 = Sse.SetAllVector128(A); - Vector128 V2 = Sse.SetAllVector128(B); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2); - - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; - - switch (EU) - { - case 0: Exp = (A == B ? Ones : Zeros); break; - case 1: Exp = (A >= B ? Ones : Zeros); break; - case 3: Exp = (A > B ? Ones : Zeros); break; - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)2)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)3)), Is.EquivalentTo(Exp)); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMGT D0, D1, #0.0 | FCMGE D0, D1, #0.0 | FCMEQ D0, D1, #0.0 | FCMLE D0, D1, #0.0 | FCMLT D0, D1, #0.0")] - public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_S_D([ValueSource("_doubles_")] [Random(RndCnt)] double A, - [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE - [Values(0u, 1u)] uint bit13) // "LT" - { - uint Opcode = 0x5EE0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); - - Vector128 V0 = Sse.StaticCast(Sse2.SetAllVector128(TestContext.CurrentContext.Random.NextDouble())); - Vector128 V1 = Sse.StaticCast(Sse2.SetScalarVector128(A)); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - - double Zero = +0d; - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - - if (bit13 == 0) - { - switch (opU) - { - case 0: Exp = (A > Zero ? Ones : Zeros); break; - case 1: Exp = (A >= Zero ? Ones : Zeros); break; - case 2: Exp = (A == Zero ? Ones : Zeros); break; - case 3: Exp = (Zero >= A ? Ones : Zeros); break; - } - } - else - { - Exp = (Zero > A ? Ones : Zeros); - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(VectorExtractDouble(ThreadState.V0, (byte)1), Is.Zero); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMGT S0, S1, #0.0 | FCMGE S0, S1, #0.0 | FCMEQ S0, S1, #0.0 | FCMLE S0, S1, #0.0 | FCMLT S0, S1, #0.0")] - public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_S_S([ValueSource("_floats_")] [Random(RndCnt)] float A, - [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE - [Values(0u, 1u)] uint bit13) // "LT" - { - uint Opcode = 0x5EA0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); - - Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); - Vector128 V1 = Sse.SetScalarVector128(A); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - - float Zero = +0f; - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; - - if (bit13 == 0) - { - switch (opU) - { - case 0: Exp = (A > Zero ? Ones : Zeros); break; - case 1: Exp = (A >= Zero ? Ones : Zeros); break; - case 2: Exp = (A == Zero ? Ones : Zeros); break; - case 3: Exp = (Zero >= A ? Ones : Zeros); break; - } - } - else - { - Exp = (Zero > A ? Ones : Zeros); - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)1), Is.Zero); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMGT V0.2D, V1.2D, #0.0 | FCMGE V0.2D, V1.2D, #0.0 | FCMEQ V0.2D, V1.2D, #0.0 | FCMLE V0.2D, V1.2D, #0.0 | FCMLT V0.2D, V1.2D, #0.0")] - public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_V_2D([ValueSource("_doubles_")] [Random(RndCnt)] double A, - [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE - [Values(0u, 1u)] uint bit13) // "LT" - { - uint Opcode = 0x4EE0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); - - Vector128 V1 = Sse.StaticCast(Sse2.SetAllVector128(A)); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); - - double Zero = +0d; - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; - - if (bit13 == 0) - { - switch (opU) - { - case 0: Exp = (A > Zero ? Ones : Zeros); break; - case 1: Exp = (A >= Zero ? Ones : Zeros); break; - case 2: Exp = (A == Zero ? Ones : Zeros); break; - case 3: Exp = (Zero >= A ? Ones : Zeros); break; - } - } - else - { - Exp = (Zero > A ? Ones : Zeros); - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(VectorExtractDouble(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMGT V0.2S, V1.2S, #0.0 | FCMGE V0.2S, V1.2S, #0.0 | FCMEQ V0.2S, V1.2S, #0.0 | FCMLE V0.2S, V1.2S, #0.0 | FCMLT V0.2S, V1.2S, #0.0")] - public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_V_2S([ValueSource("_floats_")] [Random(RndCnt)] float A, - [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE - [Values(0u, 1u)] uint bit13) // "LT" - { - uint Opcode = 0x0EA0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); - - Vector128 V0 = Sse.SetAllVector128(TestContext.CurrentContext.Random.NextFloat()); - Vector128 V1 = Sse.SetVector128(0, 0, A, A); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); - - float Zero = +0f; - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; - - if (bit13 == 0) - { - switch (opU) - { - case 0: Exp = (A > Zero ? Ones : Zeros); break; - case 1: Exp = (A >= Zero ? Ones : Zeros); break; - case 2: Exp = (A == Zero ? Ones : Zeros); break; - case 3: Exp = (Zero >= A ? Ones : Zeros); break; - } - } - else - { - Exp = (Zero > A ? Ones : Zeros); - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)2), Is.Zero); - Assert.That(Sse41.Extract(ThreadState.V0, (byte)3), Is.Zero); - }); - - CompareAgainstUnicorn(); - } - - [Test, Description("FCMGT V0.4S, V1.4S, #0.0 | FCMGE V0.4S, V1.4S, #0.0 | FCMEQ V0.4S, V1.4S, #0.0 | FCMLE V0.4S, V1.4S, #0.0 | FCMLT V0.4S, V1.4S, #0.0")] - public void Fcmgt_Fcmge_Fcmeq_Fcmle_Fcmlt_Zero_V_4S([ValueSource("_floats_")] [Random(RndCnt)] float A, - [Values(0u, 1u, 2u, 3u)] uint opU, // GT, GE, EQ, LE - [Values(0u, 1u)] uint bit13) // "LT" - { - uint Opcode = 0x4EA0C820 | (((opU & 1) & ~bit13) << 29) | (bit13 << 13) | (((opU >> 1) & ~bit13) << 12); - - Vector128 V1 = Sse.SetAllVector128(A); - - AThreadState ThreadState = SingleOpcode(Opcode, V1: V1); - - float Zero = +0f; - byte[] Exp = default(byte[]); - byte[] Ones = new byte[] {0xFF, 0xFF, 0xFF, 0xFF}; - byte[] Zeros = new byte[] {0x00, 0x00, 0x00, 0x00}; - - if (bit13 == 0) - { - switch (opU) - { - case 0: Exp = (A > Zero ? Ones : Zeros); break; - case 1: Exp = (A >= Zero ? Ones : Zeros); break; - case 2: Exp = (A == Zero ? Ones : Zeros); break; - case 3: Exp = (Zero >= A ? Ones : Zeros); break; - } - } - else - { - Exp = (Zero > A ? Ones : Zeros); - } - - Assert.Multiple(() => - { - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)0)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)1)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)2)), Is.EquivalentTo(Exp)); - Assert.That(BitConverter.GetBytes(Sse41.Extract(ThreadState.V0, (byte)3)), Is.EquivalentTo(Exp)); - }); - - CompareAgainstUnicorn(); - } - } -} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index ae409a6d..7d47416f 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -80,14 +80,14 @@ namespace Ryujinx.Tests.Cpu private static IEnumerable _1S_F_() { - yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) yield return 0x0000000080800000ul; // -Min Normal - yield return 0x00000000807FFFFFul; // -Max SubNormal - yield return 0x0000000080000001ul; // -Min SubNormal - yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) yield return 0x0000000000800000ul; // +Min Normal - yield return 0x00000000007FFFFFul; // +Max SubNormal - yield return 0x0000000000000001ul; // +Min SubNormal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) if (!NoZeros) { @@ -103,17 +103,17 @@ namespace Ryujinx.Tests.Cpu if (!NoNaNs) { - yield return 0x00000000FFFFFFFFul; // -QNaN (all ones payload) - yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) - yield return 0x000000007FFFFFFFul; // +QNaN (all ones payload) - yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) } for (int Cnt = 1; Cnt <= RndCnt; Cnt++) { ulong Grbg = TestContext.CurrentContext.Random.NextUInt(); ulong Rnd1 = GenNormal_S(); - ulong Rnd2 = GenSubNormal_S(); + ulong Rnd2 = GenSubnormal_S(); yield return (Grbg << 32) | Rnd1; yield return (Grbg << 32) | Rnd2; @@ -122,14 +122,14 @@ namespace Ryujinx.Tests.Cpu private static IEnumerable _2S_F_() { - yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) yield return 0x8080000080800000ul; // -Min Normal - yield return 0x807FFFFF807FFFFFul; // -Max SubNormal - yield return 0x8000000180000001ul; // -Min SubNormal - yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x807FFFFF807FFFFFul; // -Max Subnormal + yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) yield return 0x0080000000800000ul; // +Min Normal - yield return 0x007FFFFF007FFFFFul; // +Max SubNormal - yield return 0x0000000100000001ul; // +Min SubNormal + yield return 0x007FFFFF007FFFFFul; // +Max Subnormal + yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon) if (!NoZeros) { @@ -145,16 +145,16 @@ namespace Ryujinx.Tests.Cpu if (!NoNaNs) { - yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) - yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) - yield return 0x7FFFFFFF7FFFFFFFul; // +QNaN (all ones payload) - yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) } for (int Cnt = 1; Cnt <= RndCnt; Cnt++) { ulong Rnd1 = GenNormal_S(); - ulong Rnd2 = GenSubNormal_S(); + ulong Rnd2 = GenSubnormal_S(); yield return (Rnd1 << 32) | Rnd1; yield return (Rnd2 << 32) | Rnd2; @@ -163,14 +163,14 @@ namespace Ryujinx.Tests.Cpu private static IEnumerable _1D_F_() { - yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) yield return 0x8010000000000000ul; // -Min Normal - yield return 0x800FFFFFFFFFFFFFul; // -Max SubNormal - yield return 0x8000000000000001ul; // -Min SubNormal - yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) yield return 0x0010000000000000ul; // +Min Normal - yield return 0x000FFFFFFFFFFFFFul; // +Max SubNormal - yield return 0x0000000000000001ul; // +Min SubNormal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) if (!NoZeros) { @@ -186,16 +186,16 @@ namespace Ryujinx.Tests.Cpu if (!NoNaNs) { - yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) - yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) - yield return 0x7FFFFFFFFFFFFFFFul; // +QNaN (all ones payload) - yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) } for (int Cnt = 1; Cnt <= RndCnt; Cnt++) { ulong Rnd1 = GenNormal_D(); - ulong Rnd2 = GenSubNormal_D(); + ulong Rnd2 = GenSubnormal_D(); yield return Rnd1; yield return Rnd2; @@ -204,6 +204,72 @@ namespace Ryujinx.Tests.Cpu #endregion #region "ValueSource (Opcodes)" + private static uint[] _F_Add_Div_Mul_MulX_Sub_S_S_() + { + return new uint[] + { + 0x1E222820u, // FADD S0, S1, S2 + 0x1E221820u, // FDIV S0, S1, S2 + 0x1E220820u, // FMUL S0, S1, S2 + 0x5E22DC20u, // FMULX S0, S1, S2 + 0x1E223820u // FSUB S0, S1, S2 + }; + } + + private static uint[] _F_Add_Div_Mul_MulX_Sub_S_D_() + { + return new uint[] + { + 0x1E622820u, // FADD D0, D1, D2 + 0x1E621820u, // FDIV D0, D1, D2 + 0x1E620820u, // FMUL D0, D1, D2 + 0x5E62DC20u, // FMULX D0, D1, D2 + 0x1E623820u // FSUB D0, D1, D2 + }; + } + + private static uint[] _F_Add_Div_Mul_MulX_Sub_V_2S_4S_() + { + return new uint[] + { + 0x0E20D400u, // FADD V0.2S, V0.2S, V0.2S + 0x2E20FC00u, // FDIV V0.2S, V0.2S, V0.2S + 0x2E20DC00u, // FMUL V0.2S, V0.2S, V0.2S + 0x0E20DC00u, // FMULX V0.2S, V0.2S, V0.2S + 0x0EA0D400u // FSUB V0.2S, V0.2S, V0.2S + }; + } + + private static uint[] _F_Add_Div_Mul_MulX_Sub_V_2D_() + { + return new uint[] + { + 0x4E60D400u, // FADD V0.2D, V0.2D, V0.2D + 0x6E60FC00u, // FDIV V0.2D, V0.2D, V0.2D + 0x6E60DC00u, // FMUL V0.2D, V0.2D, V0.2D + 0x4E60DC00u, // FMULX V0.2D, V0.2D, V0.2D + 0x4EE0D400u // FSUB V0.2D, V0.2D, V0.2D + }; + } + + private static uint[] _Fmadd_Fmsub_S_S_() + { + return new uint[] + { + 0x1F020C20u, // FMADD S0, S1, S2, S3 + 0x1F028C20u // FMSUB S0, S1, S2, S3 + }; + } + + private static uint[] _Fmadd_Fmsub_S_D_() + { + return new uint[] + { + 0x1F420C20u, // FMADD D0, D1, D2, D3 + 0x1F428C20u // FMSUB D0, D1, D2, D3 + }; + } + private static uint[] _F_Max_Min_Nm_S_S_() { return new uint[] @@ -251,6 +317,42 @@ namespace Ryujinx.Tests.Cpu 0x6EE0F400u // FMINP V0.2D, V0.2D, V0.2D }; } + + private static uint[] _Frecps_Frsqrts_S_S_() + { + return new uint[] + { + 0x5E22FC20u, // FRECPS S0, S1, S2 + 0x5EA2FC20u // FRSQRTS S0, S1, S2 + }; + } + + private static uint[] _Frecps_Frsqrts_S_D_() + { + return new uint[] + { + 0x5E62FC20u, // FRECPS D0, D1, D2 + 0x5EE2FC20u // FRSQRTS D0, D1, D2 + }; + } + + private static uint[] _Frecps_Frsqrts_V_2S_4S_() + { + return new uint[] + { + 0x0E20FC00u, // FRECPS V0.2S, V0.2S, V0.2S + 0x0EA0FC00u // FRSQRTS V0.2S, V0.2S, V0.2S + }; + } + + private static uint[] _Frecps_Frsqrts_V_2D_() + { + return new uint[] + { + 0x4E60FC00u, // FRECPS V0.2D, V0.2D, V0.2D + 0x4EE0FC00u // FRSQRTS V0.2D, V0.2D, V0.2D + }; + } #endregion private const int RndCnt = 2; @@ -1035,46 +1137,122 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("FMADD , , , ")] - public void Fmadd_S_S([ValueSource("_1S_F_")] ulong A, - [ValueSource("_1S_F_")] ulong B, - [ValueSource("_1S_F_")] ulong C) + [Test, Pairwise] + public void F_Add_Div_Mul_MulX_Sub_S_S([ValueSource("_F_Add_Div_Mul_MulX_Sub_S_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A, + [ValueSource("_1S_F_")] ulong B) { - //const int DNFlagBit = 25; // Default NaN mode control bit. - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); - uint Opcode = 0x1F020C20; // FMADD S0, S1, S2, S3 + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); + } + + [Test, Pairwise] + public void F_Add_Div_Mul_MulX_Sub_S_D([ValueSource("_F_Add_Div_Mul_MulX_Sub_S_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B) + { + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); + } + + [Test, Pairwise] + public void F_Add_Div_Mul_MulX_Sub_V_2S_4S([ValueSource("_F_Add_Div_Mul_MulX_Sub_V_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_F_")] ulong Z, + [ValueSource("_2S_F_")] ulong A, + [ValueSource("_2S_F_")] ulong B, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * Q); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); + } + + [Test, Pairwise] + public void F_Add_Div_Mul_MulX_Sub_V_2D([ValueSource("_F_Add_Div_Mul_MulX_Sub_V_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B) + { + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); + } + + [Test, Pairwise] // Fused. + public void Fmadd_Fmsub_S_S([ValueSource("_Fmadd_Fmsub_S_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A, + [ValueSource("_1S_F_")] ulong B, + [ValueSource("_1S_F_")] ulong C) + { ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); Vector128 V3 = MakeVectorE0(C); - //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. - //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, V3: V3/*, Fpcr: Fpcr*/); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, V3: V3, Fpcr: Fpcr); - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC, */FpSkips: FpSkips.IfNaN_S/*, FpUseTolerance: FpUseTolerance.OneUlps_S*/); + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); } - [Test, Pairwise, Description("FMADD
, , , ")] - public void Fmadd_S_D([ValueSource("_1D_F_")] ulong A, - [ValueSource("_1D_F_")] ulong B, - [ValueSource("_1D_F_")] ulong C) + [Test, Pairwise] // Fused. + public void Fmadd_Fmsub_S_D([ValueSource("_Fmadd_Fmsub_S_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B, + [ValueSource("_1D_F_")] ulong C) { - uint Opcode = 0x1F420C20; // FMADD D0, D1, D2, D3 - ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE1(Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); Vector128 V3 = MakeVectorE0(C); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, V3: V3); + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); - CompareAgainstUnicorn(FpSkips: FpSkips.IfNaN_D/*, FpUseTolerance: FpUseTolerance.OneUlps_D*/); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, V3: V3, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D); } [Test, Pairwise] @@ -1082,20 +1260,16 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_1S_F_")] ulong A, [ValueSource("_1S_F_")] ulong B) { - //const int DNFlagBit = 25; // Default NaN mode control bit. - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); - //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. - //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); - AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2/*, Fpcr: Fpcr*/); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC*/); + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } [Test, Pairwise] @@ -1108,9 +1282,11 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0(A); Vector128 V2 = MakeVectorE0(B); - AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); - CompareAgainstUnicorn(); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } [Test, Pairwise] @@ -1123,9 +1299,6 @@ namespace Ryujinx.Tests.Cpu [ValueSource("_2S_F_")] ulong B, [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> { - //const int DNFlagBit = 25; // Default NaN mode control bit. - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); Opcodes |= ((Q & 1) << 30); @@ -1133,12 +1306,11 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0E1(A, A * Q); Vector128 V2 = MakeVectorE0E1(B, B * Q); - //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. - //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); - AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2/*, Fpcr: Fpcr*/); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC*/); + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } [Test, Pairwise] @@ -1156,9 +1328,91 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0E1(A, A); Vector128 V2 = MakeVectorE0E1(B, B); - AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); - CompareAgainstUnicorn(); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + + [Test, Pairwise] // Fused. + public void Frecps_Frsqrts_S_S([ValueSource("_Frecps_Frsqrts_S_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A, + [ValueSource("_1S_F_")] ulong B) + { + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); + } + + [Test, Pairwise] // Fused. + public void Frecps_Frsqrts_S_D([ValueSource("_Frecps_Frsqrts_S_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B) + { + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D); + } + + [Test, Pairwise] // Fused. + public void Frecps_Frsqrts_V_2S_4S([ValueSource("_Frecps_Frsqrts_V_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_F_")] ulong Z, + [ValueSource("_2S_F_")] ulong A, + [ValueSource("_2S_F_")] ulong B, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * Q); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); + } + + [Test, Pairwise] // Fused. + public void Frecps_Frsqrts_V_2D([ValueSource("_Frecps_Frsqrts_V_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B) + { + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D); } [Test, Pairwise, Description("ORN ., ., .")]