Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests. (#1471)

* Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests. * Address PR feedback & Nit.
2020-08-13 07:34:02 +02:00 · 2020-08-13 07:34:02 +02:00 · 6938988427
commit 6938988427
parent 1ad9045c6b
15 changed files with 309 additions and 31 deletions
--- a/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
+++ b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
@ -2,12 +2,20 @@
 {
    class OpCode32SimdCvtFI : OpCode32SimdS
    {
-        public int Opc2 { get; private set; }
-
        public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
        {
-            Opc2 = (opCode >> 16) & 0x7;
            Opc = (opCode >> 7) & 0x1;
+
+            bool toInteger = (Opc2 & 0b100) != 0;
+
+            if (toInteger)
+            {
+                Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+            }
+            else
+            {
+                Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+            }
        }
    }
 }
--- a/ARMeilleure/Decoders/OpCode32SimdS.cs
+++ b/ARMeilleure/Decoders/OpCode32SimdS.cs
@ -2,14 +2,17 @@
 {
    class OpCode32SimdS : OpCode32, IOpCode32Simd
    {
-        public int Vd { get; private set; }
-        public int Vm { get; private set; }
-        public int Opc { get; protected set; }
+        public int Vd { get; protected set; }
+        public int Vm { get; protected set; }
+        public int Opc { get; protected set; } // "with_zero" (Opc<1>) [Vcmp, Vcmpe].
+        public int Opc2 { get; private set; } // opc2 or RM (opc2<1:0>) [Vcvt, Vrint].
        public int Size { get; protected set; }

        public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
        {
            Opc = (opCode >> 15) & 0x3;
+            Opc2 = (opCode >> 16) & 0x7;
+
            Size = (opCode >> 8) & 0x3;

            bool single = Size != 3;
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@ -825,15 +825,17 @@ namespace ARMeilleure.Decoders
            SetA32("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp,     InstEmit32.Vcmp,     typeof(OpCode32SimdS));
            SetA32("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe,    InstEmit32.Vcmpe,    typeof(OpCode32SimdS));
            SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_FD,  typeof(OpCode32SimdS)); // FP 32 and 64, scalar.
-            SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_FI,  typeof(OpCode32SimdCvtFI)); // FP32 to int.
-            SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_FI,  typeof(OpCode32SimdCvtFI)); // Int to FP32.
-            SetA32("111111101x1111xxxxxx10>>x1x0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_R,   typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp).
+            SetA32("<<<<11101x11110xxxxx101x11x0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_FI,  typeof(OpCode32SimdCvtFI)); // FP32 to int.
+            SetA32("<<<<11101x111000xxxx101xx1x0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_FI,  typeof(OpCode32SimdCvtFI)); // Int to FP32.
+            SetA32("111111101x1111xxxxxx101xx1x0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_RM,  typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp).
            SetA32("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt,     InstEmit32.Vcvt_V,   typeof(OpCode32SimdCmpZ)); // FP and integer, vector.
            SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv,     InstEmit32.Vdiv_S,   typeof(OpCode32SimdRegS));
            SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup,     InstEmit32.Vdup,     typeof(OpCode32SimdDupGP));
            SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup,     InstEmit32.Vdup_1,   typeof(OpCode32SimdDupElem));
            SetA32("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor,     InstEmit32.Veor_I,   typeof(OpCode32SimdBinary));
            SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext,     InstEmit32.Vext,     typeof(OpCode32SimdExt));
+            SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma,     InstEmit32.Vfma_S,   typeof(OpCode32SimdRegS));
+            SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms,     InstEmit32.Vfms_S,   typeof(OpCode32SimdRegS));
            SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1,     InstEmit32.Vld1,     typeof(OpCode32SimdMemSingle));
            SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1,     InstEmit32.Vld1,     typeof(OpCode32SimdMemPair)); // Regs = 1.
            SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1,     InstEmit32.Vld1,     typeof(OpCode32SimdMemPair)); // Regs = 2.
@ -918,8 +920,8 @@ namespace ARMeilleure.Decoders
            SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe,   InstEmit32.Vrecpe,   typeof(OpCode32SimdSqrte));
            SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps,   InstEmit32.Vrecps,   typeof(OpCode32SimdReg));
            SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev,     InstEmit32.Vrev,     typeof(OpCode32SimdRev));
-            SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint,    InstEmit32.Vrint_RM, typeof(OpCode32SimdCvtFI));
-            SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint,    InstEmit32.Vrint_Z,  typeof(OpCode32SimdCvtFI));
+            SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint,    InstEmit32.Vrint_RM, typeof(OpCode32SimdS));
+            SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint,    InstEmit32.Vrint_Z,  typeof(OpCode32SimdS));
            SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr,    InstEmit32.Vrshr,    typeof(OpCode32SimdShImm));
            SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte,  InstEmit32.Vrsqrte,  typeof(OpCode32SimdSqrte));
            SetA32("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts,  InstEmit32.Vrsqrts,  typeof(OpCode32SimdReg));
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@ -231,6 +231,38 @@ namespace ARMeilleure.Instructions
            }
        }

+        public static void Vfma_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                // TODO: Use FMA instruction set.
+                EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+            }
+            else
+            {
+                EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+                });
+            }
+        }
+
+        public static void Vfms_S(ArmEmitterContext context) // Fused.
+        {
+            if (Optimizations.FastFP && Optimizations.UseSse2)
+            {
+                // TODO: Use FMA instruction set.
+                EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+            }
+            else
+            {
+                EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+                {
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+                });
+            }
+        }
+
        public static void Vmov_S(ArmEmitterContext context)
        {
            if (Optimizations.FastFP && Optimizations.UseSse2)
@ -586,7 +618,8 @@ namespace ARMeilleure.Instructions
            {
                EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
                {
-                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+                    Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res);
                });
            }
        }
@ -657,7 +690,8 @@ namespace ARMeilleure.Instructions
            {
                EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
                {
-                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+                    Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+                    return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res);
                });
            }
        }
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@ -139,6 +139,7 @@ namespace ARMeilleure.Instructions
            }
        }

+        // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
        public static void Vcvt_FI(ArmEmitterContext context)
        {
            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
@ -236,13 +237,14 @@ namespace ARMeilleure.Instructions
            return roundMode;
        }

-        public static void Vcvt_R(ArmEmitterContext context)
+        // VCVTA/M/N/P (floating-point).
+        public static void Vcvt_RM(ArmEmitterContext context)
        {
-            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).

            OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;

-            bool unsigned = (op.Opc & 1) == 0;
+            bool unsigned = op.Opc == 0;
            int rm = op.Opc2 & 3;

            if (Optimizations.UseSse41 && rm != 0b00)
@ -277,9 +279,10 @@ namespace ARMeilleure.Instructions
            }
        }

+        // VRINTA/M/N/P (floating-point).
        public static void Vrint_RM(ArmEmitterContext context)
        {
-            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;

            OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;

@ -320,9 +323,10 @@ namespace ARMeilleure.Instructions
            }
        }

+        // VRINTZ (floating-point).
        public static void Vrint_Z(ArmEmitterContext context)
        {
-            IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;

            if (Optimizations.UseSse2)
            {
@ -355,7 +359,7 @@ namespace ARMeilleure.Instructions
        private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
        {
            // A port of the similar round function in InstEmitSimdCvt.
-            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+            OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;

            bool doubleSize = (op.Size & 1) != 0;
            int shift = doubleSize ? 1 : 2;
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@ -906,7 +906,7 @@ namespace ARMeilleure.Instructions
            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;

            bool doubleSize = (op.Size & 1) != 0;
-            int shift = doubleSize ? 1 : 2;
+
            Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
            Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;

--- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@ -559,7 +559,7 @@ namespace ARMeilleure.Instructions
            }
        }

-        public static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
+        private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
        {
            OpCode32Simd op = (OpCode32Simd)context.CurrOp;

--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@ -563,6 +563,8 @@ namespace ARMeilleure.Instructions
        Vdup,
        Veor,
        Vext,
+        Vfma,
+        Vfms,
        Vld1,
        Vld2,
        Vld3,
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC
    {
        private const string HeaderMagic = "PTChd";

-        private const int InternalVersion = 20; //! To be incremented manually for each change to the ARMeilleure project.
+        private const int InternalVersion = 1471; //! To be incremented manually for each change to the ARMeilleure project.

        private const string BaseDir = "Ryujinx";