Fix EmitHighNarrow(), EmitSaturatingNarrowOp() when Rd == Rn || Rd == Rm (& Part != 0). Optimization of EmitVectorTranspose(), EmitVectorUnzip(), EmitVectorZip() algorithms (reduction of the number of operations and their complexity). Add 12 Tests about Trn1/2, Uzp1/2, Zip1/2 (V) instructions. (#268)

* Update CpuTestSimdArithmetic.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Update Instructions.cs

* Update AInstEmitSimdArithmetic.cs

* Update AInstEmitSimdHelper.cs

* Update AInstEmitSimdMove.cs

* Delete CpuTestSimdMove.cs
This commit is contained in:
LDj3SNuD 2018-07-15 05:53:26 +02:00 committed by gdkchan
parent 50b706e2ba
commit 063fae50fe
8 changed files with 2385 additions and 1516 deletions

View file

@ -331,17 +331,18 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Op.GetBitsCount() >> 3;
int Words = Op.GetBitsCount() >> 4;
int Pairs = Words >> Op.Size;
int Elems = Bytes >> Op.Size;
for (int Index = 0; Index < Elems; Index++)
for (int Index = 0; Index < Pairs; Index++)
{
int Elem = (Index & ~1) + Part;
int Idx = Index << 1;
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
EmitVectorInsertTmp(Context, Idx , Op.Size);
}
Context.EmitLdvectmp();
@ -357,18 +358,18 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Op.GetBitsCount() >> 3;
int Words = Op.GetBitsCount() >> 4;
int Pairs = Words >> Op.Size;
int Elems = Bytes >> Op.Size;
int Half = Elems >> 1;
for (int Index = 0; Index < Elems; Index++)
for (int Index = 0; Index < Pairs; Index++)
{
int Elem = Part + ((Index & (Half - 1)) << 1);
int Idx = Index << 1;
EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size);
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
}
Context.EmitLdvectmp();
@ -384,18 +385,20 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int Bytes = Op.GetBitsCount() >> 3;
int Words = Op.GetBitsCount() >> 4;
int Pairs = Words >> Op.Size;
int Elems = Bytes >> Op.Size;
int Half = Elems >> 1;
int Base = Part != 0 ? Pairs : 0;
for (int Index = 0; Index < Elems; Index++)
for (int Index = 0; Index < Pairs; Index++)
{
int Elem = Part * Half + (Index >> 1);
int Idx = Index << 1;
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
EmitVectorInsertTmp(Context, Index, Op.Size);
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
EmitVectorInsertTmp(Context, Idx , Op.Size);
}
Context.EmitLdvectmp();