CPU (A64): Add Pmull_V Inst. with Clmul fast path for the "1/2D -> 1Q" variant & Sse fast path and slow path for both the "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test. (#1817)

* Add Pmull_V Sse fast path only, both "8/16B -> 8H" and "1/2D -> 1Q" variants; with Test.

* Add Clmul fast path for the 128 bits variant.

* Small optimisation (save 60 instructions) for the Sse fast path about the 128 bits variant.

* Add slow path, both variants. Fix V128 Shl/Shr when shift = 0.

* A32: Add Vmull_I P64 variant (slow path); not tested.

* A32: Add Vmull_I_P8_P64 Test and fix P64 variant.
This commit is contained in:
LDj3SNuD 2021-01-04 23:45:54 +01:00 committed by GitHub
parent a03ab0c4a0
commit 430ba6da65
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 264 additions and 25 deletions

View file

@ -920,7 +920,19 @@ namespace ARMeilleure.Instructions
if (op.Polynomial)
{
EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
if (op.Size == 0) // P8
{
EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
}
else /* if (op.Size == 2) // P64 */
{
Operand ne = context.VectorExtract(OperandType.I64, GetVec(op.Qn), op.Vn & 1);
Operand me = context.VectorExtract(OperandType.I64, GetVec(op.Qm), op.Vm & 1);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me);
context.Copy(GetVecA32(op.Qd), res);
}
}
else
{
@ -1366,27 +1378,5 @@ namespace ARMeilleure.Instructions
EmitVectorBinaryOpSimd32(context, genericEmit);
}
}
private static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize)
{
Debug.Assert(eSize <= 32);
Operand result = eSize == 32 ? Const(0L) : Const(0);
if (eSize == 32)
{
op1 = context.ZeroExtend32(OperandType.I64, op1);
op2 = context.ZeroExtend32(OperandType.I64, op2);
}
for (int i = 0; i < eSize; i++)
{
Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i));
result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask));
}
return result;
}
}
}