ARMeilleure: Hardware accelerate SHA256 (#3585)
* ARMeilleure/HardwareCapabilities: Add Sha * ARMeilleure/Intrinsic: Add X86Sha256Rnds2 * ARmeilleure: Hardware accelerate SHA256H/SHA256H2 * ARMeilleure/Intrinsic: Add X86Sha256Msg1, X86Sha256Msg2 * ARMeilleure/Intrinsic: Add X86Palignr * ARMeilleure: Hardware accelerate SHA256SU0, SHA256SU1 * PTC: Bump InternalVersion
This commit is contained in:
parent
eba682b767
commit
f5235fff29
12 changed files with 136 additions and 37 deletions
|
@ -157,6 +157,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
|
@ -239,6 +240,9 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||
Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||
Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
|
||||
Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None));
|
||||
Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None));
|
||||
Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None));
|
||||
Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||
Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||
Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
|
|
|
@ -12,21 +12,28 @@ namespace ARMeilleure.CodeGen.X86
|
|||
return;
|
||||
}
|
||||
|
||||
(_, _, int ecx, int edx) = X86Base.CpuId(0x00000001, 0x00000000);
|
||||
(int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000);
|
||||
|
||||
FeatureInfoEdx = (FeatureFlagsEdx)edx;
|
||||
FeatureInfoEcx = (FeatureFlagsEcx)ecx;
|
||||
(_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000);
|
||||
FeatureInfo1Edx = (FeatureFlags1Edx)edx1;
|
||||
FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1;
|
||||
|
||||
if (maxNum >= 7)
|
||||
{
|
||||
(_, int ebx7, _, _) = X86Base.CpuId(0x00000007, 0x00000000);
|
||||
FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
|
||||
}
|
||||
}
|
||||
|
||||
[Flags]
|
||||
public enum FeatureFlagsEdx
|
||||
public enum FeatureFlags1Edx
|
||||
{
|
||||
Sse = 1 << 25,
|
||||
Sse2 = 1 << 26
|
||||
}
|
||||
|
||||
[Flags]
|
||||
public enum FeatureFlagsEcx
|
||||
public enum FeatureFlags1Ecx
|
||||
{
|
||||
Sse3 = 1 << 0,
|
||||
Pclmulqdq = 1 << 1,
|
||||
|
@ -40,21 +47,31 @@ namespace ARMeilleure.CodeGen.X86
|
|||
F16c = 1 << 29
|
||||
}
|
||||
|
||||
public static FeatureFlagsEdx FeatureInfoEdx { get; }
|
||||
public static FeatureFlagsEcx FeatureInfoEcx { get; }
|
||||
[Flags]
|
||||
public enum FeatureFlags7Ebx
|
||||
{
|
||||
Avx2 = 1 << 5,
|
||||
Sha = 1 << 29
|
||||
}
|
||||
|
||||
public static bool SupportsSse => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse);
|
||||
public static bool SupportsSse2 => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse2);
|
||||
public static bool SupportsSse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse3);
|
||||
public static bool SupportsPclmulqdq => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Pclmulqdq);
|
||||
public static bool SupportsSsse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Ssse3);
|
||||
public static bool SupportsFma => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Fma);
|
||||
public static bool SupportsSse41 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse41);
|
||||
public static bool SupportsSse42 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse42);
|
||||
public static bool SupportsPopcnt => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Popcnt);
|
||||
public static bool SupportsAesni => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Aes);
|
||||
public static bool SupportsAvx => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Avx);
|
||||
public static bool SupportsF16c => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.F16c);
|
||||
public static FeatureFlags1Edx FeatureInfo1Edx { get; }
|
||||
public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
|
||||
public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
|
||||
|
||||
public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
|
||||
public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
|
||||
public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3);
|
||||
public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq);
|
||||
public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3);
|
||||
public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma);
|
||||
public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41);
|
||||
public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
|
||||
public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
|
||||
public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
|
||||
public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx);
|
||||
public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
|
||||
public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
|
||||
public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
|
||||
|
||||
public static bool ForceLegacySse { get; set; }
|
||||
|
||||
|
|
|
@ -82,6 +82,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm));
|
||||
Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
|
||||
|
@ -151,6 +152,9 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
|
||||
Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary));
|
||||
Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
|
||||
Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
|
||||
Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
|
||||
|
|
|
@ -308,11 +308,13 @@ namespace ARMeilleure.CodeGen.X86
|
|||
|
||||
case Instruction.Extended:
|
||||
{
|
||||
bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd ||
|
||||
node.Intrinsic == Intrinsic.X86Blendvps ||
|
||||
node.Intrinsic == Intrinsic.X86Pblendvb;
|
||||
|
||||
// BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
|
||||
if ((node.Intrinsic == Intrinsic.X86Blendvpd ||
|
||||
node.Intrinsic == Intrinsic.X86Blendvps ||
|
||||
node.Intrinsic == Intrinsic.X86Pblendvb) &&
|
||||
!HardwareCapabilities.SupportsVexEncoding)
|
||||
// SHA256RNDS2 always has an implied XMM0 as a last operand.
|
||||
if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2)
|
||||
{
|
||||
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
|
||||
|
||||
|
|
|
@ -98,6 +98,7 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Paddd,
|
||||
Paddq,
|
||||
Paddw,
|
||||
Palignr,
|
||||
Pand,
|
||||
Pandn,
|
||||
Pavgb,
|
||||
|
@ -180,6 +181,9 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Rsqrtss,
|
||||
Sar,
|
||||
Setcc,
|
||||
Sha256Msg1,
|
||||
Sha256Msg2,
|
||||
Sha256Rnds2,
|
||||
Shl,
|
||||
Shr,
|
||||
Shufpd,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue