CPU (A64): Add Fmaxnmp & Fminnmp Scalar Inst.s, Fast & Slow Paths; with Tests. (#1894)
This commit is contained in:
parent
b8353f5639
commit
c3e0c41da3
7 changed files with 111 additions and 25 deletions
|
@ -347,19 +347,17 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Faddp_S(ArmEmitterContext context)
|
||||
{
|
||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (Optimizations.FastFP && Optimizations.UseSse3)
|
||||
{
|
||||
if (sizeF == 0)
|
||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn));
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||
}
|
||||
else /* if (sizeF == 1) */
|
||||
else /* if ((op.Size & 1) == 1) */
|
||||
{
|
||||
Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn));
|
||||
|
||||
|
@ -368,14 +366,10 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
else
|
||||
{
|
||||
OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
|
||||
|
||||
Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
|
||||
Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
|
||||
|
||||
Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), ne0, ne1);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
||||
EmitScalarPairwiseOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -552,6 +546,24 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static void Fmaxnmp_S(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarPairwiseOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fmaxnmp_V(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
|
@ -708,6 +720,24 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static void Fminnmp_S(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
{
|
||||
EmitSse2ScalarPairwiseOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarPairwiseOpF(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Fminnmp_V(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||
|
|
|
@ -1118,6 +1118,49 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||
}
|
||||
|
||||
public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
|
||||
{
|
||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||
|
||||
OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32;
|
||||
|
||||
Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0);
|
||||
Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1);
|
||||
|
||||
Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0);
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
|
||||
public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit)
|
||||
{
|
||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||
|
||||
Operand n = GetVec(op.Rn);
|
||||
|
||||
Operand op0, op1;
|
||||
|
||||
if ((op.Size & 1) == 0)
|
||||
{
|
||||
const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0;
|
||||
const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0;
|
||||
|
||||
Operand zeroN = context.VectorZeroUpper64(n);
|
||||
|
||||
op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0));
|
||||
op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1));
|
||||
}
|
||||
else /* if ((op.Size & 1) == 1) */
|
||||
{
|
||||
Operand zero = context.VectorZero();
|
||||
|
||||
op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero);
|
||||
op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), emit(op0, op1));
|
||||
}
|
||||
|
||||
public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
|
||||
{
|
||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||
|
|
|
@ -212,6 +212,7 @@ namespace ARMeilleure.Instructions
|
|||
Fmax_V,
|
||||
Fmaxnm_S,
|
||||
Fmaxnm_V,
|
||||
Fmaxnmp_S,
|
||||
Fmaxnmp_V,
|
||||
Fmaxnmv_V,
|
||||
Fmaxp_V,
|
||||
|
@ -220,6 +221,7 @@ namespace ARMeilleure.Instructions
|
|||
Fmin_V,
|
||||
Fminnm_S,
|
||||
Fminnm_V,
|
||||
Fminnmp_S,
|
||||
Fminnmp_V,
|
||||
Fminnmv_V,
|
||||
Fminp_V,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue