Add intrinsics support (#121)

* Initial intrinsics support

* Update tests to work with the new Vector128 type and intrinsics

* Drop SSE4.1 requirement

* Fix copy-paste mistake
This commit is contained in:
gdkchan 2018-05-11 20:10:27 -03:00 committed by GitHub
parent 8e306b3ac1
commit f9f111bc85
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
36 changed files with 1658 additions and 1111 deletions

View file

@ -4,6 +4,7 @@ using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -41,7 +42,14 @@ namespace ChocolArm64.Instruction
public static void Add_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Add));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Addhn_V(AILEmitterCtx Context)
@ -158,7 +166,7 @@ namespace ChocolArm64.Instruction
Context.Emit(OpCodes.Conv_U1);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.CountSetBits8));
Context.Emit(OpCodes.Conv_U8);
@ -303,12 +311,26 @@ namespace ChocolArm64.Instruction
public static void Fadd_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.AddScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Fadd_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Add));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Faddp_V(AILEmitterCtx Context)
@ -345,12 +367,26 @@ namespace ChocolArm64.Instruction
public static void Fdiv_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.DivideScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
}
}
public static void Fdiv_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Divide));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
}
}
public static void Fmadd_S(AILEmitterCtx Context)
@ -370,11 +406,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
}
else
{
@ -391,11 +427,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
}
else
{
@ -412,11 +448,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
}
else
{
@ -435,11 +471,11 @@ namespace ChocolArm64.Instruction
{
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
}
else if (SizeF == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
}
else
{
@ -505,7 +541,14 @@ namespace ChocolArm64.Instruction
public static void Fmul_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.MultiplyScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
}
}
public static void Fmul_Se(AILEmitterCtx Context)
@ -515,7 +558,14 @@ namespace ChocolArm64.Instruction
public static void Fmul_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Multiply));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
}
}
public static void Fmul_Ve(AILEmitterCtx Context)
@ -716,11 +766,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -743,11 +793,11 @@ namespace ChocolArm64.Instruction
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (SizeF == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -819,11 +869,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -844,11 +894,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -947,12 +997,26 @@ namespace ChocolArm64.Instruction
public static void Fsub_S(AILEmitterCtx Context)
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.SubtractScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Fsub_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Subtract));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Mla_V(AILEmitterCtx Context)
@ -1066,7 +1130,14 @@ namespace ChocolArm64.Instruction
public static void Sub_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Subtract));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Subhn_V(AILEmitterCtx Context)

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitAluHelper;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -13,17 +14,38 @@ namespace ChocolArm64.Instruction
{
public static void Cmeq_V(AILEmitterCtx Context)
{
EmitVectorCmp(Context, OpCodes.Beq_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareEqual));
}
else
{
EmitVectorCmp(Context, OpCodes.Beq_S);
}
}
public static void Cmge_V(AILEmitterCtx Context)
{
EmitVectorCmp(Context, OpCodes.Bge_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareGreaterThanOrEqual));
}
else
{
EmitVectorCmp(Context, OpCodes.Bge_S);
}
}
public static void Cmgt_V(AILEmitterCtx Context)
{
EmitVectorCmp(Context, OpCodes.Bgt_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan));
}
else
{
EmitVectorCmp(Context, OpCodes.Bgt_S);
}
}
public static void Cmhi_V(AILEmitterCtx Context)
@ -112,32 +134,74 @@ namespace ChocolArm64.Instruction
public static void Fcmeq_S(AILEmitterCtx Context)
{
EmitScalarFcmp(Context, OpCodes.Beq_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareEqualScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Beq_S);
}
}
public static void Fcmeq_V(AILEmitterCtx Context)
{
EmitVectorFcmp(Context, OpCodes.Beq_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareEqual));
}
else
{
EmitVectorFcmp(Context, OpCodes.Beq_S);
}
}
public static void Fcmge_S(AILEmitterCtx Context)
{
EmitScalarFcmp(Context, OpCodes.Bge_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqualScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Bge_S);
}
}
public static void Fcmge_V(AILEmitterCtx Context)
{
EmitVectorFcmp(Context, OpCodes.Bge_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqual));
}
else
{
EmitVectorFcmp(Context, OpCodes.Bge_S);
}
}
public static void Fcmgt_S(AILEmitterCtx Context)
{
EmitScalarFcmp(Context, OpCodes.Bgt_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Bgt_S);
}
}
public static void Fcmgt_V(AILEmitterCtx Context)
{
EmitVectorFcmp(Context, OpCodes.Bgt_S);
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThan));
}
else
{
EmitVectorFcmp(Context, OpCodes.Bgt_S);
}
}
public static void Fcmle_S(AILEmitterCtx Context)

View file

@ -382,15 +382,15 @@ namespace ChocolArm64.Instruction
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.SatF32ToS32)
: nameof(ASoftFallback.SatF32ToU32));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.SatF32ToS32)
: nameof(AVectorHelper.SatF32ToU32));
}
else /* if (SizeF == 1) */
{
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.SatF64ToS64)
: nameof(ASoftFallback.SatF64ToU64));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.SatF64ToS64)
: nameof(AVectorHelper.SatF64ToU64));
}
if (SizeF == 0)
@ -420,22 +420,22 @@ namespace ChocolArm64.Instruction
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS32));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS32));
}
}
else
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS64));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS64));
}
}
}
@ -453,22 +453,22 @@ namespace ChocolArm64.Instruction
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU32));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU32));
}
}
else
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU64));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU64));
}
}
}

View file

@ -3,6 +3,8 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instruction
{
@ -32,6 +34,129 @@ namespace ChocolArm64.Instruction
return (8 << (Op.Size + 1)) - Op.Imm;
}
public static void EmitSse2Call(AILEmitterCtx Context, string Name)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
void Ldvec(int Reg)
{
Context.EmitLdvec(Reg);
switch (Op.Size)
{
case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break;
case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break;
case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break;
case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break;
}
}
Ldvec(Op.Rn);
Type BaseType = null;
Type[] Types;
switch (Op.Size)
{
case 0: BaseType = typeof(Vector128<sbyte>); break;
case 1: BaseType = typeof(Vector128<short>); break;
case 2: BaseType = typeof(Vector128<int>); break;
case 3: BaseType = typeof(Vector128<long>); break;
}
if (Op is AOpCodeSimdReg BinOp)
{
Ldvec(BinOp.Rm);
Types = new Type[] { BaseType, BaseType };
}
else
{
Types = new Type[] { BaseType };
}
Context.EmitCall(typeof(Sse2).GetMethod(Name, Types));
switch (Op.Size)
{
case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break;
case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16ToSingle)); break;
case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32ToSingle)); break;
case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt64ToSingle)); break;
}
Context.EmitStvec(Op.Rd);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
public static void EmitSse2CallF(AILEmitterCtx Context, string Name)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
void Ldvec(int Reg)
{
Context.EmitLdvec(Reg);
if (SizeF == 1)
{
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble));
}
}
Ldvec(Op.Rn);
Type BaseType = SizeF == 0
? typeof(Vector128<float>)
: typeof(Vector128<double>);
Type[] Types;
if (Op is AOpCodeSimdReg BinOp)
{
Ldvec(BinOp.Rm);
Types = new Type[] { BaseType, BaseType };
}
else
{
Types = new Type[] { BaseType };
}
MethodInfo MthdInfo;
if (SizeF == 0)
{
MthdInfo = typeof(Sse).GetMethod(Name, Types);
}
else /* if (SizeF == 1) */
{
MthdInfo = typeof(Sse2).GetMethod(Name, Types);
}
Context.EmitCall(MthdInfo);
if (SizeF == 1)
{
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));
}
Context.EmitStvec(Op.Rd);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
public static void EmitUnaryMathCall(AILEmitterCtx Context, string Name)
{
IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
@ -596,9 +721,9 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.VectorExtractIntSx)
: nameof(ASoftFallback.VectorExtractIntZx));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.VectorExtractIntSx)
: nameof(AVectorHelper.VectorExtractIntZx));
}
public static void EmitVectorExtractF(AILEmitterCtx Context, int Reg, int Index, int Size)
@ -610,11 +735,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractDouble));
}
else
{
@ -646,7 +771,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvec(Reg);
}
@ -659,7 +784,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvectmp();
}
@ -673,7 +798,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvec(Reg);
}
@ -687,11 +812,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
}
else
{
@ -710,11 +835,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
}
else
{

View file

@ -2,6 +2,7 @@ using ChocolArm64.Decoder;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -11,7 +12,14 @@ namespace ChocolArm64.Instruction
{
public static void And_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.And));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And));
}
}
public static void Bic_V(AILEmitterCtx Context)
@ -95,7 +103,14 @@ namespace ChocolArm64.Instruction
public static void Eor_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Xor));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor));
}
}
public static void Not_V(AILEmitterCtx Context)
@ -114,7 +129,14 @@ namespace ChocolArm64.Instruction
public static void Orr_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or));
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Or));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or));
}
}
public static void Orr_Vi(AILEmitterCtx Context)

View file

@ -234,21 +234,21 @@ namespace ChocolArm64.Instruction
switch (Op.Size)
{
case 1: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl1_V64),
nameof(ASoftFallback.Tbl1_V128)); break;
case 1: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl1_V64),
nameof(AVectorHelper.Tbl1_V128)); break;
case 2: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl2_V64),
nameof(ASoftFallback.Tbl2_V128)); break;
case 2: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl2_V64),
nameof(AVectorHelper.Tbl2_V128)); break;
case 3: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl3_V64),
nameof(ASoftFallback.Tbl3_V128)); break;
case 3: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl3_V64),
nameof(AVectorHelper.Tbl3_V128)); break;
case 4: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl4_V64),
nameof(ASoftFallback.Tbl4_V128)); break;
case 4: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl4_V64),
nameof(AVectorHelper.Tbl4_V128)); break;
default: throw new InvalidOperationException();
}

View file

@ -1,20 +1,11 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace ChocolArm64.Instruction
{
static class ASoftFallback
{
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
Context.EmitCall(typeof(ASoftFallback), IsSimd64 ? Name64 : Name128);
}
public static void EmitCall(AILEmitterCtx Context, string MthdName)
{
Context.EmitCall(typeof(ASoftFallback), MthdName);
@ -160,78 +151,6 @@ namespace ChocolArm64.Instruction
throw new ArgumentException(nameof(Size));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
public static long SMulHi128(long LHS, long RHS)
{
return (long)(BigInteger.Multiply(LHS, RHS) >> 64);
@ -241,239 +160,5 @@ namespace ChocolArm64.Instruction
{
return (ulong)(BigInteger.Multiply(LHS, RHS) >> 64);
}
public static int CountSetBits8(byte Value)
{
return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
((Value >> 2) & 1) + ((Value >> 3) & 1) +
((Value >> 4) & 1) + ((Value >> 5) & 1) +
((Value >> 6) & 1) + (Value >> 7);
}
public static float MaxF(float val1, float val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.SingleToInt32Bits(val1) < 0 && BitConverter.SingleToInt32Bits(val2) < 0)
return -0.0f;
return 0.0f;
}
if (val1 > val2)
return val1;
if (float.IsNaN(val1))
return val1;
return val2;
}
public static double Max(double val1, double val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.DoubleToInt64Bits(val1) < 0 && BitConverter.DoubleToInt64Bits(val2) < 0)
return -0.0;
return 0.0;
}
if (val1 > val2)
return val1;
if (double.IsNaN(val1))
return val1;
return val2;
}
public static float MinF(float val1, float val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.SingleToInt32Bits(val1) < 0 || BitConverter.SingleToInt32Bits(val2) < 0)
return -0.0f;
return 0.0f;
}
if (val1 < val2)
return val1;
if (float.IsNaN(val1))
return val1;
return val2;
}
public static double Min(double val1, double val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.DoubleToInt64Bits(val1) < 0 || BitConverter.DoubleToInt64Bits(val2) < 0)
return -0.0;
return 0.0;
}
if (val1 < val2)
return val1;
if (double.IsNaN(val1))
return val1;
return val2;
}
public static float RoundF(float Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return MathF.Round (Value);
case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value);
case ARoundMode.TowardsZero: return MathF.Truncate(Value);
}
throw new InvalidOperationException();
}
public static double Round(double Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return Math.Round (Value);
case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value);
case ARoundMode.TowardsZero: return Math.Truncate(Value);
}
throw new InvalidOperationException();
}
public static AVec Tbl1_V64(AVec Vector, AVec Tb0)
{
return Tbl(Vector, 8, Tb0);
}
public static AVec Tbl1_V128(AVec Vector, AVec Tb0)
{
return Tbl(Vector, 16, Tb0);
}
public static AVec Tbl2_V64(AVec Vector, AVec Tb0, AVec Tb1)
{
return Tbl(Vector, 8, Tb0, Tb1);
}
public static AVec Tbl2_V128(AVec Vector, AVec Tb0, AVec Tb1)
{
return Tbl(Vector, 16, Tb0, Tb1);
}
public static AVec Tbl3_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2);
}
public static AVec Tbl3_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2);
}
public static AVec Tbl4_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
}
public static AVec Tbl4_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
}
private static AVec Tbl(AVec Vector, int Bytes, params AVec[] Tb)
{
AVec Res = new AVec();
byte[] Table = new byte[Tb.Length * 16];
for (int Index = 0; Index < Tb.Length; Index++)
for (int Index2 = 0; Index2 < 16; Index2++)
{
Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
}
for (int Index = 0; Index < Bytes; Index++)
{
byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
if (TblIdx < Table.Length)
{
Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
}
}
return Res;
}
public static ulong VectorExtractIntZx(AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return Vector.ExtractByte (Index);
case 1: return Vector.ExtractUInt16(Index);
case 2: return Vector.ExtractUInt32(Index);
case 3: return Vector.ExtractUInt64(Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
public static long VectorExtractIntSx(AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return (sbyte)Vector.ExtractByte (Index);
case 1: return (short)Vector.ExtractUInt16(Index);
case 2: return (int)Vector.ExtractUInt32(Index);
case 3: return (long)Vector.ExtractUInt64(Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
public static float VectorExtractSingle(AVec Vector, int Index)
{
return Vector.ExtractSingle(Index);
}
public static double VectorExtractDouble(AVec Vector, int Index)
{
return Vector.ExtractDouble(Index);
}
public static AVec VectorInsertSingle(float Value, AVec Vector, int Index)
{
return AVec.InsertSingle(Vector, Index, Value);
}
public static AVec VectorInsertDouble(double Value, AVec Vector, int Index)
{
return AVec.InsertDouble(Vector, Index, Value);
}
public static AVec VectorInsertInt(ulong Value, AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return AVec.InsertByte (Vector, Index, (byte)Value);
case 1: return AVec.InsertUInt16(Vector, Index, (ushort)Value);
case 2: return AVec.InsertUInt32(Vector, Index, (uint)Value);
case 3: return AVec.InsertUInt64(Vector, Index, (ulong)Value);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
}
}

View file

@ -0,0 +1,626 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instruction
{
static class AVectorHelper
{
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
Context.EmitCall(typeof(AVectorHelper), IsSimd64 ? Name64 : Name128);
}
public static void EmitCall(AILEmitterCtx Context, string MthdName)
{
Context.EmitCall(typeof(AVectorHelper), MthdName);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
public static int CountSetBits8(byte Value)
{
return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
((Value >> 2) & 1) + ((Value >> 3) & 1) +
((Value >> 4) & 1) + ((Value >> 5) & 1) +
((Value >> 6) & 1) + (Value >> 7);
}
public static double Max(double LHS, double RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.DoubleToInt64Bits(LHS) < 0 &&
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
}
if (LHS > RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
}
public static float MaxF(float LHS, float RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.SingleToInt32Bits(LHS) < 0 &&
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
}
if (LHS > RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
}
public static double Min(double LHS, double RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.DoubleToInt64Bits(LHS) < 0 ||
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
}
if (LHS < RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
}
public static float MinF(float LHS, float RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.SingleToInt32Bits(LHS) < 0 ||
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
}
if (LHS < RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
}
public static double Round(double Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return Math.Round (Value);
case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value);
case ARoundMode.TowardsZero: return Math.Truncate(Value);
}
throw new InvalidOperationException();
}
public static float RoundF(float Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return MathF.Round (Value);
case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value);
case ARoundMode.TowardsZero: return MathF.Truncate(Value);
}
throw new InvalidOperationException();
}
public static Vector128<float> Tbl1_V64(
Vector128<float> Vector,
Vector128<float> Tb0)
{
return Tbl(Vector, 8, Tb0);
}
public static Vector128<float> Tbl1_V128(
Vector128<float> Vector,
Vector128<float> Tb0)
{
return Tbl(Vector, 16, Tb0);
}
public static Vector128<float> Tbl2_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1)
{
return Tbl(Vector, 8, Tb0, Tb1);
}
public static Vector128<float> Tbl2_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1)
{
return Tbl(Vector, 16, Tb0, Tb1);
}
public static Vector128<float> Tbl3_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2);
}
public static Vector128<float> Tbl3_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2);
}
public static Vector128<float> Tbl4_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2,
Vector128<float> Tb3)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
}
public static Vector128<float> Tbl4_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2,
Vector128<float> Tb3)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
}
private static Vector128<float> Tbl(Vector128<float> Vector, int Bytes, params Vector128<float>[] Tb)
{
Vector128<float> Res = new Vector128<float>();
byte[] Table = new byte[Tb.Length * 16];
for (byte Index = 0; Index < Tb.Length; Index++)
for (byte Index2 = 0; Index2 < 16; Index2++)
{
Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
}
for (byte Index = 0; Index < Bytes; Index++)
{
byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
if (TblIdx < Table.Length)
{
Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
}
}
return Res;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double VectorExtractDouble(Vector128<float> Vector, byte Index)
{
return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long VectorExtractIntSx(Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
case 1:
return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
case 2:
return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
case 3:
return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
switch (Size)
{
case 0:
return (sbyte)VectorExtractIntZx(Vector, Index, Size);
case 1:
return (short)VectorExtractIntZx(Vector, Index, Size);
case 2:
return (int)VectorExtractIntZx(Vector, Index, Size);
case 3:
return (long)VectorExtractIntZx(Vector, Index, Size);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong VectorExtractIntZx(Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
case 1:
return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
case 2:
return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
case 3:
return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
int ShortIdx = Size == 0
? Index >> 1
: Index << (Size - 1);
ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
switch (Size)
{
case 0:
return (byte)(Value >> (Index & 1) * 8);
case 1:
return Value;
case 2:
case 3:
{
ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
if (Size == 2)
{
return (uint)(Value | (Value1 << 16));
}
ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
return ((ulong)Value << 0) |
((ulong)Value1 << 16) |
((ulong)Value2 << 32) |
((ulong)Value3 << 48);
}
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float VectorExtractSingle(Vector128<float> Vector, byte Index)
{
if (Sse41.IsSupported)
{
return Sse41.Extract(Vector, Index);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertDouble(double Value, Vector128<float> Vector, byte Index)
{
return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(Value), Vector, Index, 3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertInt(ulong Value, Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
case 1:
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
case 2:
return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
case 3:
return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector);
int ShortIdx = Size == 0
? Index >> 1
: Index << (Size - 1);
switch (Size)
{
case 0:
{
ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
int Shift = (Index & 1) * 8;
ShortVal &= (ushort)(0xff00 >> Shift);
ShortVal |= (ushort)((byte)Value << Shift);
return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
}
case 1:
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
case 2:
case 3:
{
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0));
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
if (Size == 3)
{
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
}
return Sse.StaticCast<ushort, float>(ShortVector);
}
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertSingle(float Value, Vector128<float> Vector, byte Index)
{
if (Sse41.IsSupported)
{
return Sse41.Insert(Vector, Value, (byte)(Index << 4));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, sbyte>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> VectorSingleToInt16(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, short>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> VectorSingleToInt32(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, int>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> VectorSingleToInt64(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, long>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, double>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<sbyte, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt16ToSingle(Vector128<short> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<short, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt32ToSingle(Vector128<int> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<int, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt64ToSingle(Vector128<long> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<long, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<double, float>(Vector);
}
throw new PlatformNotSupportedException();
}
}
}