Implement Fast Paths for most A32 SIMD instructions (#952)
* Begin work on A32 SIMD Intrinsics * More instructions, some cleanup. * Intrinsics for Move instructions (zip etc) These pass the existing tests. * Intrinsics for some of Cvt While doing this I noticed that the conversion for int/fp was incorrect in the slow path. I'll fix this in the original repo. * Intrinsics for more Arithmetic instructions. * Intrinsics for Vext * Fix VEXT Intrinsic for double words. * Use InsertPs to move scalar values. * Cleanup, fix VPADD.f32 and VMIN signed integer. * Cleanup, add SSE2 support for scalar insert. Works similarly to the IR scalar insert, but obviously this one works directly on V128. * Minor cleanup. * Enable intrinsic for FP64 to integer conversion. * Address feedback apart from splitting out intrinsic float abs Also: bad VREV encodings as undefined rather than throwing in translation. * Move float abs to helper, fix bug with cvt * Rename opc2 & 3 to match A32 docs, use ArgumentOutOfRangeException appropriately. * Get name of variable at compilation rather than string literal. * Use correct double sign mask.
This commit is contained in:
parent
d9ed827696
commit
68e15c1a74
12 changed files with 2077 additions and 400 deletions
|
@ -1,4 +1,5 @@
|
|||
using ARMeilleure.Decoders;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.Translation;
|
||||
|
||||
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
|
||||
|
@ -9,7 +10,14 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
public static void Vand_I(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, Intrinsic.X86Pand, Intrinsic.X86Pand);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vbif(ArmEmitterContext context)
|
||||
|
@ -24,33 +32,64 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Vbsl(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(op1,
|
||||
context.BitwiseExclusiveOr(op2, op3)), op3);
|
||||
});
|
||||
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);
|
||||
res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
|
||||
return context.AddIntrinsic(Intrinsic.X86Pxor, res, m);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx32(context, (op1, op2, op3) =>
|
||||
{
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(op1,
|
||||
context.BitwiseExclusiveOr(op2, op3)), op3);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vorr_I(ArmEmitterContext context)
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, Intrinsic.X86Por, Intrinsic.X86Por);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
private static void EmitBifBit(ArmEmitterContext context, bool notRm)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
EmitVectorTernaryOpZx32(context, (d, n, m) =>
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
if (notRm)
|
||||
EmitVectorTernaryOpSimd32(context, (d, n, m) =>
|
||||
{
|
||||
m = context.BitwiseNot(m);
|
||||
}
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(m,
|
||||
context.BitwiseExclusiveOr(d, n)), d);
|
||||
});
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
|
||||
res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
|
||||
return context.AddIntrinsic(Intrinsic.X86Pxor, d, res);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorTernaryOpZx32(context, (d, n, m) =>
|
||||
{
|
||||
if (notRm)
|
||||
{
|
||||
m = context.BitwiseNot(m);
|
||||
}
|
||||
return context.BitwiseExclusiveOr(
|
||||
context.BitwiseAnd(m,
|
||||
context.BitwiseExclusiveOr(d, n)), d);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue