Adjust naming conventions for Ryujinx and ChocolArm64 projects (#484)

* Change naming convention for Ryujinx project

* Change naming convention for ChocolArm64 project

* Fix NaN

* Remove unneeded this. from Ryujinx project

* Adjust naming from new PRs

* Name changes based on feedback

* How did this get removed?

* Rebasing fix

* Change FP enum case

* Remove prefix from ChocolArm64 classes - Part 1

* Remove prefix from ChocolArm64 classes - Part 2

* Fix alignment from last commit's renaming

* Rename namespaces

* Rename stragglers

* Fix alignment

* Rename OpCode class

* Missed a few

* Adjust alignment
This commit is contained in:
Alex Barney 2018-10-30 19:43:02 -06:00 committed by gdkchan
parent 5a87e58183
commit 9cb57fb4bb
314 changed files with 19456 additions and 19456 deletions

View file

@ -0,0 +1,328 @@
// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf
using System;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
static class CryptoHelper
{
#region "LookUp Tables"
private static byte[] _sBox =
{
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
};
private static byte[] _invSBox =
{
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
};
private static byte[] _gfMul02 =
{
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05,
0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25,
0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45,
0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65,
0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85,
0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5,
0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5,
0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5
};
private static byte[] _gfMul03 =
{
0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11,
0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21,
0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71,
0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41,
0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1,
0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1,
0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1,
0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81,
0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a,
0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba,
0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea,
0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda,
0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a,
0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a,
0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a,
0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a
};
private static byte[] _gfMul09 =
{
0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7,
0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc,
0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01,
0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91,
0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a,
0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa,
0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b,
0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b,
0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0,
0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30,
0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed,
0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d,
0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6,
0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46
};
private static byte[] _gfMul0B =
{
0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69,
0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9,
0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12,
0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2,
0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f,
0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f,
0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4,
0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54,
0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e,
0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e,
0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5,
0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55,
0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68,
0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8,
0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13,
0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3
};
private static byte[] _gfMul0D =
{
0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b,
0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b,
0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0,
0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20,
0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26,
0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6,
0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d,
0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d,
0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91,
0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41,
0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a,
0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa,
0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc,
0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c,
0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47,
0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97
};
private static byte[] _gfMul0E =
{
0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a,
0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba,
0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81,
0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61,
0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7,
0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17,
0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c,
0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc,
0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b,
0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb,
0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0,
0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20,
0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6,
0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56,
0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d,
0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d
};
private static byte[] _srPerm = { 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 };
private static byte[] _isrPerm = { 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 };
#endregion
public static Vector128<float> AesInvMixColumns(Vector128<float> op)
{
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
for (int columns = 0; columns <= 3; columns++)
{
int idx = columns << 2;
byte row0 = inState[idx + 0]; // A, E, I, M: [Row0, Col0-Col3]
byte row1 = inState[idx + 1]; // B, F, J, N: [Row1, Col0-Col3]
byte row2 = inState[idx + 2]; // C, G, K, O: [Row2, Col0-Col3]
byte row3 = inState[idx + 3]; // D, H, L, P: [Row3, Col0-Col3]
outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]);
outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]);
outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]);
outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]);
}
FromByteArrayToVector(outState, ref op);
return op;
}
public static Vector128<float> AesInvShiftRows(Vector128<float> op)
{
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
for (int idx = 0; idx <= 15; idx++)
{
outState[_isrPerm[idx]] = inState[idx];
}
FromByteArrayToVector(outState, ref op);
return op;
}
public static Vector128<float> AesInvSubBytes(Vector128<float> op)
{
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
for (int idx = 0; idx <= 15; idx++)
{
outState[idx] = _invSBox[inState[idx]];
}
FromByteArrayToVector(outState, ref op);
return op;
}
public static Vector128<float> AesMixColumns(Vector128<float> op)
{
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
for (int columns = 0; columns <= 3; columns++)
{
int idx = columns << 2;
byte row0 = inState[idx + 0]; // A, E, I, M: [Row0, Col0-Col3]
byte row1 = inState[idx + 1]; // B, F, J, N: [Row1, Col0-Col3]
byte row2 = inState[idx + 2]; // C, G, K, O: [Row2, Col0-Col3]
byte row3 = inState[idx + 3]; // D, H, L, P: [Row3, Col0-Col3]
outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3);
outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3);
outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]);
outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]);
}
FromByteArrayToVector(outState, ref op);
return op;
}
public static Vector128<float> AesShiftRows(Vector128<float> op)
{
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
for (int idx = 0; idx <= 15; idx++)
{
outState[_srPerm[idx]] = inState[idx];
}
FromByteArrayToVector(outState, ref op);
return op;
}
public static Vector128<float> AesSubBytes(Vector128<float> op)
{
byte[] inState = new byte[16];
byte[] outState = new byte[16];
FromVectorToByteArray(inState, ref op);
for (int idx = 0; idx <= 15; idx++)
{
outState[idx] = _sBox[inState[idx]];
}
FromByteArrayToVector(outState, ref op);
return op;
}
private static void FromVectorToByteArray(byte[] state, ref Vector128<float> op)
{
ulong uLongLow = VectorHelper.VectorExtractIntZx((op), (byte)0, 3);
ulong uLongHigh = VectorHelper.VectorExtractIntZx((op), (byte)1, 3);
for (int idx = 0; idx <= 7; idx++)
{
state[idx + 0] = (byte)(uLongLow & 0xFFUL);
state[idx + 8] = (byte)(uLongHigh & 0xFFUL);
uLongLow >>= 8;
uLongHigh >>= 8;
}
}
private static void FromByteArrayToVector(byte[] state, ref Vector128<float> op)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
op = Sse.StaticCast<byte, float>(Sse2.SetVector128(
state[15], state[14], state[13], state[12],
state[11], state[10], state[9], state[8],
state[7], state[6], state[5], state[4],
state[3], state[2], state[1], state[0]));
}
}
}

View file

@ -0,0 +1,20 @@
using System;
namespace ChocolArm64.Instructions
{
struct Inst
{
public InstInterpreter Interpreter { get; private set; }
public InstEmitter Emitter { get; private set; }
public Type Type { get; private set; }
public static Inst Undefined => new Inst(null, InstEmit.Und, null);
public Inst(InstInterpreter interpreter, InstEmitter emitter, Type type)
{
Interpreter = interpreter;
Emitter = emitter;
Type = type;
}
}
}

View file

@ -0,0 +1,402 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitAluHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Adc(ILEmitterCtx context) => EmitAdc(context, false);
public static void Adcs(ILEmitterCtx context) => EmitAdc(context, true);
private static void EmitAdc(ILEmitterCtx context, bool setFlags)
{
EmitDataLoadOpers(context);
context.Emit(OpCodes.Add);
context.EmitLdflg((int)PState.CBit);
Type[] mthdTypes = new Type[] { typeof(bool) };
MethodInfo mthdInfo = typeof(Convert).GetMethod(nameof(Convert.ToInt32), mthdTypes);
context.EmitCall(mthdInfo);
if (context.CurrOp.RegisterSize != RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.Emit(OpCodes.Add);
if (setFlags)
{
context.EmitZnFlagCheck();
EmitAdcsCCheck(context);
EmitAddsVCheck(context);
}
EmitDataStore(context);
}
public static void Add(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Add);
public static void Adds(ILEmitterCtx context)
{
EmitDataLoadOpers(context);
context.Emit(OpCodes.Add);
context.EmitZnFlagCheck();
EmitAddsCCheck(context);
EmitAddsVCheck(context);
EmitDataStoreS(context);
}
public static void And(ILEmitterCtx context) => EmitDataOp(context, OpCodes.And);
public static void Ands(ILEmitterCtx context)
{
EmitDataLoadOpers(context);
context.Emit(OpCodes.And);
EmitZeroCvFlags(context);
context.EmitZnFlagCheck();
EmitDataStoreS(context);
}
public static void Asrv(ILEmitterCtx context) => EmitDataOpShift(context, OpCodes.Shr);
public static void Bic(ILEmitterCtx context) => EmitBic(context, false);
public static void Bics(ILEmitterCtx context) => EmitBic(context, true);
private static void EmitBic(ILEmitterCtx context, bool setFlags)
{
EmitDataLoadOpers(context);
context.Emit(OpCodes.Not);
context.Emit(OpCodes.And);
if (setFlags)
{
EmitZeroCvFlags(context);
context.EmitZnFlagCheck();
}
EmitDataStore(context, setFlags);
}
public static void Cls(ILEmitterCtx context)
{
OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.EmitLdc_I4(op.RegisterSize == RegisterSize.Int32 ? 32 : 64);
SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingSigns));
context.EmitStintzr(op.Rd);
}
public static void Clz(ILEmitterCtx context)
{
OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp;
context.EmitLdintzr(op.Rn);
if (Lzcnt.IsSupported)
{
Type tValue = op.RegisterSize == RegisterSize.Int32 ? typeof(uint) : typeof(ulong);
context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { tValue }));
}
else
{
context.EmitLdc_I4(op.RegisterSize == RegisterSize.Int32 ? 32 : 64);
SoftFallback.EmitCall(context, nameof(SoftFallback.CountLeadingZeros));
}
context.EmitStintzr(op.Rd);
}
public static void Eon(ILEmitterCtx context)
{
EmitDataLoadOpers(context);
context.Emit(OpCodes.Not);
context.Emit(OpCodes.Xor);
EmitDataStore(context);
}
public static void Eor(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Xor);
public static void Extr(ILEmitterCtx context)
{
//TODO: Ensure that the Shift is valid for the Is64Bits.
OpCodeAluRs64 op = (OpCodeAluRs64)context.CurrOp;
context.EmitLdintzr(op.Rm);
if (op.Shift > 0)
{
context.EmitLdc_I4(op.Shift);
context.Emit(OpCodes.Shr_Un);
context.EmitLdintzr(op.Rn);
context.EmitLdc_I4(op.GetBitsCount() - op.Shift);
context.Emit(OpCodes.Shl);
context.Emit(OpCodes.Or);
}
EmitDataStore(context);
}
public static void Lslv(ILEmitterCtx context) => EmitDataOpShift(context, OpCodes.Shl);
public static void Lsrv(ILEmitterCtx context) => EmitDataOpShift(context, OpCodes.Shr_Un);
public static void Sbc(ILEmitterCtx context) => EmitSbc(context, false);
public static void Sbcs(ILEmitterCtx context) => EmitSbc(context, true);
private static void EmitSbc(ILEmitterCtx context, bool setFlags)
{
EmitDataLoadOpers(context);
context.Emit(OpCodes.Sub);
context.EmitLdflg((int)PState.CBit);
Type[] mthdTypes = new Type[] { typeof(bool) };
MethodInfo mthdInfo = typeof(Convert).GetMethod(nameof(Convert.ToInt32), mthdTypes);
context.EmitCall(mthdInfo);
context.EmitLdc_I4(1);
context.Emit(OpCodes.Xor);
if (context.CurrOp.RegisterSize != RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.Emit(OpCodes.Sub);
if (setFlags)
{
context.EmitZnFlagCheck();
EmitSbcsCCheck(context);
EmitSubsVCheck(context);
}
EmitDataStore(context);
}
public static void Sub(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Sub);
public static void Subs(ILEmitterCtx context)
{
context.TryOptMarkCondWithoutCmp();
EmitDataLoadOpers(context);
context.Emit(OpCodes.Sub);
context.EmitZnFlagCheck();
EmitSubsCCheck(context);
EmitSubsVCheck(context);
EmitDataStoreS(context);
}
public static void Orn(ILEmitterCtx context)
{
EmitDataLoadOpers(context);
context.Emit(OpCodes.Not);
context.Emit(OpCodes.Or);
EmitDataStore(context);
}
public static void Orr(ILEmitterCtx context) => EmitDataOp(context, OpCodes.Or);
public static void Rbit(ILEmitterCtx context) => EmitFallback32_64(context,
nameof(SoftFallback.ReverseBits32),
nameof(SoftFallback.ReverseBits64));
public static void Rev16(ILEmitterCtx context) => EmitFallback32_64(context,
nameof(SoftFallback.ReverseBytes16_32),
nameof(SoftFallback.ReverseBytes16_64));
public static void Rev32(ILEmitterCtx context) => EmitFallback32_64(context,
nameof(SoftFallback.ReverseBytes32_32),
nameof(SoftFallback.ReverseBytes32_64));
private static void EmitFallback32_64(ILEmitterCtx context, string name32, string name64)
{
OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp;
context.EmitLdintzr(op.Rn);
if (op.RegisterSize == RegisterSize.Int32)
{
SoftFallback.EmitCall(context, name32);
}
else
{
SoftFallback.EmitCall(context, name64);
}
context.EmitStintzr(op.Rd);
}
public static void Rev64(ILEmitterCtx context)
{
OpCodeAlu64 op = (OpCodeAlu64)context.CurrOp;
context.EmitLdintzr(op.Rn);
SoftFallback.EmitCall(context, nameof(SoftFallback.ReverseBytes64));
context.EmitStintzr(op.Rd);
}
public static void Rorv(ILEmitterCtx context)
{
EmitDataLoadRn(context);
EmitDataLoadShift(context);
context.Emit(OpCodes.Shr_Un);
EmitDataLoadRn(context);
context.EmitLdc_I4(context.CurrOp.GetBitsCount());
EmitDataLoadShift(context);
context.Emit(OpCodes.Sub);
context.Emit(OpCodes.Shl);
context.Emit(OpCodes.Or);
EmitDataStore(context);
}
public static void Sdiv(ILEmitterCtx context) => EmitDiv(context, OpCodes.Div);
public static void Udiv(ILEmitterCtx context) => EmitDiv(context, OpCodes.Div_Un);
private static void EmitDiv(ILEmitterCtx context, OpCode ilOp)
{
//If Rm == 0, Rd = 0 (division by zero).
context.EmitLdc_I(0);
EmitDataLoadRm(context);
context.EmitLdc_I(0);
ILLabel badDiv = new ILLabel();
context.Emit(OpCodes.Beq_S, badDiv);
context.Emit(OpCodes.Pop);
if (ilOp == OpCodes.Div)
{
//If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow).
long intMin = 1L << (context.CurrOp.GetBitsCount() - 1);
context.EmitLdc_I(intMin);
EmitDataLoadRn(context);
context.EmitLdc_I(intMin);
context.Emit(OpCodes.Ceq);
EmitDataLoadRm(context);
context.EmitLdc_I(-1);
context.Emit(OpCodes.Ceq);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Brtrue_S, badDiv);
context.Emit(OpCodes.Pop);
}
EmitDataLoadRn(context);
EmitDataLoadRm(context);
context.Emit(ilOp);
context.MarkLabel(badDiv);
EmitDataStore(context);
}
private static void EmitDataOp(ILEmitterCtx context, OpCode ilOp)
{
EmitDataLoadOpers(context);
context.Emit(ilOp);
EmitDataStore(context);
}
private static void EmitDataOpShift(ILEmitterCtx context, OpCode ilOp)
{
EmitDataLoadRn(context);
EmitDataLoadShift(context);
context.Emit(ilOp);
EmitDataStore(context);
}
private static void EmitDataLoadShift(ILEmitterCtx context)
{
EmitDataLoadRm(context);
context.EmitLdc_I(context.CurrOp.GetBitsCount() - 1);
context.Emit(OpCodes.And);
//Note: Only 32-bits shift values are valid, so when the value is 64-bits
//we need to cast it to a 32-bits integer. This is fine because we
//AND the value and only keep the lower 5 or 6 bits anyway -- it
//could very well fit on a byte.
if (context.CurrOp.RegisterSize != RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_I4);
}
}
private static void EmitZeroCvFlags(ILEmitterCtx context)
{
context.EmitLdc_I4(0);
context.EmitStflg((int)PState.VBit);
context.EmitLdc_I4(0);
context.EmitStflg((int)PState.CBit);
}
}
}

View file

@ -0,0 +1,212 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static class InstEmitAluHelper
{
public static void EmitAdcsCCheck(ILEmitterCtx context)
{
//C = (Rd == Rn && CIn) || Rd < Rn
context.EmitSttmp();
context.EmitLdtmp();
context.EmitLdtmp();
EmitDataLoadRn(context);
context.Emit(OpCodes.Ceq);
context.EmitLdflg((int)PState.CBit);
context.Emit(OpCodes.And);
context.EmitLdtmp();
EmitDataLoadRn(context);
context.Emit(OpCodes.Clt_Un);
context.Emit(OpCodes.Or);
context.EmitStflg((int)PState.CBit);
}
public static void EmitAddsCCheck(ILEmitterCtx context)
{
//C = Rd < Rn
context.Emit(OpCodes.Dup);
EmitDataLoadRn(context);
context.Emit(OpCodes.Clt_Un);
context.EmitStflg((int)PState.CBit);
}
public static void EmitAddsVCheck(ILEmitterCtx context)
{
//V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0
context.Emit(OpCodes.Dup);
EmitDataLoadRn(context);
context.Emit(OpCodes.Xor);
EmitDataLoadOpers(context);
context.Emit(OpCodes.Xor);
context.Emit(OpCodes.Not);
context.Emit(OpCodes.And);
context.EmitLdc_I(0);
context.Emit(OpCodes.Clt);
context.EmitStflg((int)PState.VBit);
}
public static void EmitSbcsCCheck(ILEmitterCtx context)
{
//C = (Rn == Rm && CIn) || Rn > Rm
EmitDataLoadOpers(context);
context.Emit(OpCodes.Ceq);
context.EmitLdflg((int)PState.CBit);
context.Emit(OpCodes.And);
EmitDataLoadOpers(context);
context.Emit(OpCodes.Cgt_Un);
context.Emit(OpCodes.Or);
context.EmitStflg((int)PState.CBit);
}
public static void EmitSubsCCheck(ILEmitterCtx context)
{
//C = Rn == Rm || Rn > Rm = !(Rn < Rm)
EmitDataLoadOpers(context);
context.Emit(OpCodes.Clt_Un);
context.EmitLdc_I4(1);
context.Emit(OpCodes.Xor);
context.EmitStflg((int)PState.CBit);
}
public static void EmitSubsVCheck(ILEmitterCtx context)
{
//V = (Rd ^ Rn) & (Rn ^ Rm) < 0
context.Emit(OpCodes.Dup);
EmitDataLoadRn(context);
context.Emit(OpCodes.Xor);
EmitDataLoadOpers(context);
context.Emit(OpCodes.Xor);
context.Emit(OpCodes.And);
context.EmitLdc_I(0);
context.Emit(OpCodes.Clt);
context.EmitStflg((int)PState.VBit);
}
public static void EmitDataLoadRm(ILEmitterCtx context)
{
context.EmitLdintzr(((IOpCodeAluRs64)context.CurrOp).Rm);
}
public static void EmitDataLoadOpers(ILEmitterCtx context)
{
EmitDataLoadRn(context);
EmitDataLoadOper2(context);
}
public static void EmitDataLoadRn(ILEmitterCtx context)
{
IOpCodeAlu64 op = (IOpCodeAlu64)context.CurrOp;
if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs64)
{
context.EmitLdintzr(op.Rn);
}
else
{
context.EmitLdint(op.Rn);
}
}
public static void EmitDataLoadOper2(ILEmitterCtx context)
{
switch (context.CurrOp)
{
case IOpCodeAluImm64 op:
context.EmitLdc_I(op.Imm);
break;
case IOpCodeAluRs64 op:
context.EmitLdintzr(op.Rm);
switch (op.ShiftType)
{
case ShiftType.Lsl: context.EmitLsl(op.Shift); break;
case ShiftType.Lsr: context.EmitLsr(op.Shift); break;
case ShiftType.Asr: context.EmitAsr(op.Shift); break;
case ShiftType.Ror: context.EmitRor(op.Shift); break;
}
break;
case IOpCodeAluRx64 op:
context.EmitLdintzr(op.Rm);
context.EmitCast(op.IntType);
context.EmitLsl(op.Shift);
break;
}
}
public static void EmitDataStore(ILEmitterCtx context) => EmitDataStore(context, false);
public static void EmitDataStoreS(ILEmitterCtx context) => EmitDataStore(context, true);
public static void EmitDataStore(ILEmitterCtx context, bool setFlags)
{
IOpCodeAlu64 op = (IOpCodeAlu64)context.CurrOp;
if (setFlags || op is IOpCodeAluRs64)
{
context.EmitStintzr(op.Rd);
}
else
{
context.EmitStint(op.Rd);
}
}
public static void EmitSetNzcv(ILEmitterCtx context, int nzcv)
{
context.EmitLdc_I4((nzcv >> 0) & 1);
context.EmitStflg((int)PState.VBit);
context.EmitLdc_I4((nzcv >> 1) & 1);
context.EmitStflg((int)PState.CBit);
context.EmitLdc_I4((nzcv >> 2) & 1);
context.EmitStflg((int)PState.ZBit);
context.EmitLdc_I4((nzcv >> 3) & 1);
context.EmitStflg((int)PState.NBit);
}
}
}

View file

@ -0,0 +1,208 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Bfm(ILEmitterCtx context)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
EmitBfmLoadRn(context);
context.EmitLdintzr(op.Rd);
context.EmitLdc_I(~op.WMask & op.TMask);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
context.EmitLdintzr(op.Rd);
context.EmitLdc_I(~op.TMask);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
context.EmitStintzr(op.Rd);
}
public static void Sbfm(ILEmitterCtx context)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
int bitsCount = op.GetBitsCount();
if (op.Pos + 1 == bitsCount)
{
EmitSbfmShift(context);
}
else if (op.Pos < op.Shift)
{
EmitSbfiz(context);
}
else if (op.Pos == 7 && op.Shift == 0)
{
EmitSbfmCast(context, OpCodes.Conv_I1);
}
else if (op.Pos == 15 && op.Shift == 0)
{
EmitSbfmCast(context, OpCodes.Conv_I2);
}
else if (op.Pos == 31 && op.Shift == 0)
{
EmitSbfmCast(context, OpCodes.Conv_I4);
}
else
{
EmitBfmLoadRn(context);
context.EmitLdintzr(op.Rn);
context.EmitLsl(bitsCount - 1 - op.Pos);
context.EmitAsr(bitsCount - 1);
context.EmitLdc_I(~op.TMask);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
context.EmitStintzr(op.Rd);
}
}
public static void Ubfm(ILEmitterCtx context)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
if (op.Pos + 1 == op.GetBitsCount())
{
EmitUbfmShift(context);
}
else if (op.Pos < op.Shift)
{
EmitUbfiz(context);
}
else if (op.Pos + 1 == op.Shift)
{
EmitBfmLsl(context);
}
else if (op.Pos == 7 && op.Shift == 0)
{
EmitUbfmCast(context, OpCodes.Conv_U1);
}
else if (op.Pos == 15 && op.Shift == 0)
{
EmitUbfmCast(context, OpCodes.Conv_U2);
}
else
{
EmitBfmLoadRn(context);
context.EmitStintzr(op.Rd);
}
}
private static void EmitSbfiz(ILEmitterCtx context) => EmitBfiz(context, true);
private static void EmitUbfiz(ILEmitterCtx context) => EmitBfiz(context, false);
private static void EmitBfiz(ILEmitterCtx context, bool signed)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
int width = op.Pos + 1;
context.EmitLdintzr(op.Rn);
context.EmitLsl(op.GetBitsCount() - width);
if (signed)
{
context.EmitAsr(op.Shift - width);
}
else
{
context.EmitLsr(op.Shift - width);
}
context.EmitStintzr(op.Rd);
}
private static void EmitSbfmCast(ILEmitterCtx context, OpCode ilOp)
{
EmitBfmCast(context, ilOp, true);
}
private static void EmitUbfmCast(ILEmitterCtx context, OpCode ilOp)
{
EmitBfmCast(context, ilOp, false);
}
private static void EmitBfmCast(ILEmitterCtx context, OpCode ilOp, bool signed)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.Emit(ilOp);
if (op.RegisterSize != RegisterSize.Int32)
{
context.Emit(signed
? OpCodes.Conv_I8
: OpCodes.Conv_U8);
}
context.EmitStintzr(op.Rd);
}
private static void EmitSbfmShift(ILEmitterCtx context)
{
EmitBfmShift(context, true);
}
private static void EmitUbfmShift(ILEmitterCtx context)
{
EmitBfmShift(context, false);
}
private static void EmitBfmShift(ILEmitterCtx context, bool signed)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.EmitLdc_I4(op.Shift);
context.Emit(signed
? OpCodes.Shr
: OpCodes.Shr_Un);
context.EmitStintzr(op.Rd);
}
private static void EmitBfmLsl(ILEmitterCtx context)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.EmitLsl(op.GetBitsCount() - op.Shift);
context.EmitStintzr(op.Rd);
}
private static void EmitBfmLoadRn(ILEmitterCtx context)
{
OpCodeBfm64 op = (OpCodeBfm64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.EmitRor(op.Shift);
context.EmitLdc_I(op.WMask & op.TMask);
context.Emit(OpCodes.And);
}
}
}

View file

@ -0,0 +1,81 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using static ChocolArm64.Instructions.InstEmitAluHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
private enum CcmpOp
{
Cmp,
Cmn
}
public static void Ccmn(ILEmitterCtx context) => EmitCcmp(context, CcmpOp.Cmn);
public static void Ccmp(ILEmitterCtx context) => EmitCcmp(context, CcmpOp.Cmp);
private static void EmitCcmp(ILEmitterCtx context, CcmpOp cmpOp)
{
OpCodeCcmp64 op = (OpCodeCcmp64)context.CurrOp;
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.EmitCondBranch(lblTrue, op.Cond);
context.EmitLdc_I4((op.Nzcv >> 0) & 1);
context.EmitStflg((int)PState.VBit);
context.EmitLdc_I4((op.Nzcv >> 1) & 1);
context.EmitStflg((int)PState.CBit);
context.EmitLdc_I4((op.Nzcv >> 2) & 1);
context.EmitStflg((int)PState.ZBit);
context.EmitLdc_I4((op.Nzcv >> 3) & 1);
context.EmitStflg((int)PState.NBit);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblTrue);
EmitDataLoadOpers(context);
if (cmpOp == CcmpOp.Cmp)
{
context.Emit(OpCodes.Sub);
context.EmitZnFlagCheck();
EmitSubsCCheck(context);
EmitSubsVCheck(context);
}
else if (cmpOp == CcmpOp.Cmn)
{
context.Emit(OpCodes.Add);
context.EmitZnFlagCheck();
EmitAddsCCheck(context);
EmitAddsVCheck(context);
}
else
{
throw new ArgumentException(nameof(cmpOp));
}
context.Emit(OpCodes.Pop);
context.MarkLabel(lblEnd);
}
}
}

View file

@ -0,0 +1,58 @@
using ChocolArm64.Decoders;
using ChocolArm64.Translation;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
private enum CselOperation
{
None,
Increment,
Invert,
Negate
}
public static void Csel(ILEmitterCtx context) => EmitCsel(context, CselOperation.None);
public static void Csinc(ILEmitterCtx context) => EmitCsel(context, CselOperation.Increment);
public static void Csinv(ILEmitterCtx context) => EmitCsel(context, CselOperation.Invert);
public static void Csneg(ILEmitterCtx context) => EmitCsel(context, CselOperation.Negate);
private static void EmitCsel(ILEmitterCtx context, CselOperation cselOp)
{
OpCodeCsel64 op = (OpCodeCsel64)context.CurrOp;
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.EmitCondBranch(lblTrue, op.Cond);
context.EmitLdintzr(op.Rm);
if (cselOp == CselOperation.Increment)
{
context.EmitLdc_I(1);
context.Emit(OpCodes.Add);
}
else if (cselOp == CselOperation.Invert)
{
context.Emit(OpCodes.Not);
}
else if (cselOp == CselOperation.Negate)
{
context.Emit(OpCodes.Neg);
}
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblTrue);
context.EmitLdintzr(op.Rn);
context.MarkLabel(lblEnd);
context.EmitStintzr(op.Rd);
}
}
}

View file

@ -0,0 +1,86 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Brk(ILEmitterCtx context)
{
EmitExceptionCall(context, nameof(CpuThreadState.OnBreak));
}
public static void Svc(ILEmitterCtx context)
{
EmitExceptionCall(context, nameof(CpuThreadState.OnSvcCall));
}
private static void EmitExceptionCall(ILEmitterCtx context, string mthdName)
{
OpCodeException64 op = (OpCodeException64)context.CurrOp;
context.EmitStoreState();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(op.Position);
context.EmitLdc_I4(op.Id);
context.EmitPrivateCall(typeof(CpuThreadState), mthdName);
//Check if the thread should still be running, if it isn't then we return 0
//to force a return to the dispatcher and then exit the thread.
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Running));
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.Brtrue_S, lblEnd);
context.EmitLdc_I8(0);
context.Emit(OpCodes.Ret);
context.MarkLabel(lblEnd);
if (context.CurrBlock.Next != null)
{
context.EmitLoadState(context.CurrBlock.Next);
}
else
{
context.EmitLdc_I8(op.Position + 4);
context.Emit(OpCodes.Ret);
}
}
public static void Und(ILEmitterCtx context)
{
OpCode64 op = context.CurrOp;
context.EmitStoreState();
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdc_I8(op.Position);
context.EmitLdc_I4(op.RawOpCode);
context.EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.OnUndefined));
if (context.CurrBlock.Next != null)
{
context.EmitLoadState(context.CurrBlock.Next);
}
else
{
context.EmitLdc_I8(op.Position + 4);
context.Emit(OpCodes.Ret);
}
}
}
}

View file

@ -0,0 +1,189 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void B(ILEmitterCtx context)
{
OpCodeBImmAl64 op = (OpCodeBImmAl64)context.CurrOp;
if (context.CurrBlock.Branch != null)
{
context.Emit(OpCodes.Br, context.GetLabel(op.Imm));
}
else
{
context.EmitStoreState();
context.EmitLdc_I8(op.Imm);
context.Emit(OpCodes.Ret);
}
}
public static void B_Cond(ILEmitterCtx context)
{
OpCodeBImmCond64 op = (OpCodeBImmCond64)context.CurrOp;
EmitBranch(context, op.Cond);
}
public static void Bl(ILEmitterCtx context)
{
OpCodeBImmAl64 op = (OpCodeBImmAl64)context.CurrOp;
context.EmitLdc_I(op.Position + 4);
context.EmitStint(CpuThreadState.LrIndex);
context.EmitStoreState();
if (context.TryOptEmitSubroutineCall())
{
//Note: the return value of the called method will be placed
//at the Stack, the return value is always a Int64 with the
//return address of the function. We check if the address is
//correct, if it isn't we keep returning until we reach the dispatcher.
context.Emit(OpCodes.Dup);
context.EmitLdc_I8(op.Position + 4);
ILLabel lblContinue = new ILLabel();
context.Emit(OpCodes.Beq_S, lblContinue);
context.Emit(OpCodes.Ret);
context.MarkLabel(lblContinue);
context.Emit(OpCodes.Pop);
context.EmitLoadState(context.CurrBlock.Next);
}
else
{
context.EmitLdc_I8(op.Imm);
context.Emit(OpCodes.Ret);
}
}
public static void Blr(ILEmitterCtx context)
{
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
context.EmitLdc_I(op.Position + 4);
context.EmitStint(CpuThreadState.LrIndex);
context.EmitStoreState();
context.EmitLdintzr(op.Rn);
context.Emit(OpCodes.Ret);
}
public static void Br(ILEmitterCtx context)
{
OpCodeBReg64 op = (OpCodeBReg64)context.CurrOp;
context.EmitStoreState();
context.EmitLdintzr(op.Rn);
context.Emit(OpCodes.Ret);
}
public static void Cbnz(ILEmitterCtx context) => EmitCb(context, OpCodes.Bne_Un);
public static void Cbz(ILEmitterCtx context) => EmitCb(context, OpCodes.Beq);
private static void EmitCb(ILEmitterCtx context, OpCode ilOp)
{
OpCodeBImmCmp64 op = (OpCodeBImmCmp64)context.CurrOp;
context.EmitLdintzr(op.Rt);
context.EmitLdc_I(0);
EmitBranch(context, ilOp);
}
public static void Ret(ILEmitterCtx context)
{
context.EmitStoreState();
context.EmitLdint(CpuThreadState.LrIndex);
context.Emit(OpCodes.Ret);
}
public static void Tbnz(ILEmitterCtx context) => EmitTb(context, OpCodes.Bne_Un);
public static void Tbz(ILEmitterCtx context) => EmitTb(context, OpCodes.Beq);
private static void EmitTb(ILEmitterCtx context, OpCode ilOp)
{
OpCodeBImmTest64 op = (OpCodeBImmTest64)context.CurrOp;
context.EmitLdintzr(op.Rt);
context.EmitLdc_I(1L << op.Pos);
context.Emit(OpCodes.And);
context.EmitLdc_I(0);
EmitBranch(context, ilOp);
}
private static void EmitBranch(ILEmitterCtx context, Cond cond)
{
OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp;
if (context.CurrBlock.Next != null &&
context.CurrBlock.Branch != null)
{
context.EmitCondBranch(context.GetLabel(op.Imm), cond);
}
else
{
context.EmitStoreState();
ILLabel lblTaken = new ILLabel();
context.EmitCondBranch(lblTaken, cond);
context.EmitLdc_I8(op.Position + 4);
context.Emit(OpCodes.Ret);
context.MarkLabel(lblTaken);
context.EmitLdc_I8(op.Imm);
context.Emit(OpCodes.Ret);
}
}
private static void EmitBranch(ILEmitterCtx context, OpCode ilOp)
{
OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp;
if (context.CurrBlock.Next != null &&
context.CurrBlock.Branch != null)
{
context.Emit(ilOp, context.GetLabel(op.Imm));
}
else
{
context.EmitStoreState();
ILLabel lblTaken = new ILLabel();
context.Emit(ilOp, lblTaken);
context.EmitLdc_I8(op.Position + 4);
context.Emit(OpCodes.Ret);
context.MarkLabel(lblTaken);
context.EmitLdc_I8(op.Imm);
context.Emit(OpCodes.Ret);
}
}
}
}

View file

@ -0,0 +1,115 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Crc32b(ILEmitterCtx context)
{
EmitCrc32(context, nameof(SoftFallback.Crc32B));
}
public static void Crc32h(ILEmitterCtx context)
{
EmitCrc32(context, nameof(SoftFallback.Crc32H));
}
public static void Crc32w(ILEmitterCtx context)
{
EmitCrc32(context, nameof(SoftFallback.Crc32W));
}
public static void Crc32x(ILEmitterCtx context)
{
EmitCrc32(context, nameof(SoftFallback.Crc32X));
}
public static void Crc32cb(ILEmitterCtx context)
{
if (Optimizations.UseSse42)
{
EmitSse42Crc32(context, typeof(uint), typeof(byte));
}
else
{
EmitCrc32(context, nameof(SoftFallback.Crc32Cb));
}
}
public static void Crc32ch(ILEmitterCtx context)
{
if (Optimizations.UseSse42)
{
EmitSse42Crc32(context, typeof(uint), typeof(ushort));
}
else
{
EmitCrc32(context, nameof(SoftFallback.Crc32Ch));
}
}
public static void Crc32cw(ILEmitterCtx context)
{
if (Optimizations.UseSse42)
{
EmitSse42Crc32(context, typeof(uint), typeof(uint));
}
else
{
EmitCrc32(context, nameof(SoftFallback.Crc32Cw));
}
}
public static void Crc32cx(ILEmitterCtx context)
{
if (Optimizations.UseSse42)
{
EmitSse42Crc32(context, typeof(ulong), typeof(ulong));
}
else
{
EmitCrc32(context, nameof(SoftFallback.Crc32Cx));
}
}
private static void EmitSse42Crc32(ILEmitterCtx context, Type tCrc, Type tData)
{
OpCodeAluRs64 op = (OpCodeAluRs64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.EmitLdintzr(op.Rm);
context.EmitCall(typeof(Sse42).GetMethod(nameof(Sse42.Crc32), new Type[] { tCrc, tData }));
context.EmitStintzr(op.Rd);
}
private static void EmitCrc32(ILEmitterCtx context, string name)
{
OpCodeAluRs64 op = (OpCodeAluRs64)context.CurrOp;
context.EmitLdintzr(op.Rn);
if (op.RegisterSize != RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U4);
}
context.EmitLdintzr(op.Rm);
SoftFallback.EmitCall(context, name);
if (op.RegisterSize != RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitStintzr(op.Rd);
}
}
}

View file

@ -0,0 +1,252 @@
using ChocolArm64.Decoders;
using ChocolArm64.Translation;
using System.Reflection.Emit;
using static ChocolArm64.Instructions.InstEmitMemoryHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Adr(ILEmitterCtx context)
{
OpCodeAdr64 op = (OpCodeAdr64)context.CurrOp;
context.EmitLdc_I(op.Position + op.Imm);
context.EmitStintzr(op.Rd);
}
public static void Adrp(ILEmitterCtx context)
{
OpCodeAdr64 op = (OpCodeAdr64)context.CurrOp;
context.EmitLdc_I((op.Position & ~0xfffL) + (op.Imm << 12));
context.EmitStintzr(op.Rd);
}
public static void Ldr(ILEmitterCtx context) => EmitLdr(context, false);
public static void Ldrs(ILEmitterCtx context) => EmitLdr(context, true);
private static void EmitLdr(ILEmitterCtx context, bool signed)
{
OpCodeMem64 op = (OpCodeMem64)context.CurrOp;
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
if (signed && op.Extend64)
{
EmitReadSx64Call(context, op.Size);
}
else if (signed)
{
EmitReadSx32Call(context, op.Size);
}
else
{
EmitReadZxCall(context, op.Size);
}
if (op is IOpCodeSimd64)
{
context.EmitStvec(op.Rt);
}
else
{
context.EmitStintzr(op.Rt);
}
EmitWBackIfNeeded(context);
}
public static void LdrLit(ILEmitterCtx context)
{
IOpCodeLit64 op = (IOpCodeLit64)context.CurrOp;
if (op.Prefetch)
{
return;
}
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdc_I8(op.Imm);
if (op.Signed)
{
EmitReadSx64Call(context, op.Size);
}
else
{
EmitReadZxCall(context, op.Size);
}
if (op is IOpCodeSimd64)
{
context.EmitStvec(op.Rt);
}
else
{
context.EmitStint(op.Rt);
}
}
public static void Ldp(ILEmitterCtx context)
{
OpCodeMemPair64 op = (OpCodeMemPair64)context.CurrOp;
void EmitReadAndStore(int rt)
{
if (op.Extend64)
{
EmitReadSx64Call(context, op.Size);
}
else
{
EmitReadZxCall(context, op.Size);
}
if (op is IOpCodeSimd64)
{
context.EmitStvec(rt);
}
else
{
context.EmitStintzr(rt);
}
}
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
EmitReadAndStore(op.Rt);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
context.Emit(OpCodes.Add);
EmitReadAndStore(op.Rt2);
EmitWBackIfNeeded(context);
}
public static void Str(ILEmitterCtx context)
{
OpCodeMem64 op = (OpCodeMem64)context.CurrOp;
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
if (op is IOpCodeSimd64)
{
context.EmitLdvec(op.Rt);
}
else
{
context.EmitLdintzr(op.Rt);
}
EmitWriteCall(context, op.Size);
EmitWBackIfNeeded(context);
}
public static void Stp(ILEmitterCtx context)
{
OpCodeMemPair64 op = (OpCodeMemPair64)context.CurrOp;
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
EmitLoadAddress(context);
if (op is IOpCodeSimd64)
{
context.EmitLdvec(op.Rt);
}
else
{
context.EmitLdintzr(op.Rt);
}
EmitWriteCall(context, op.Size);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
context.Emit(OpCodes.Add);
if (op is IOpCodeSimd64)
{
context.EmitLdvec(op.Rt2);
}
else
{
context.EmitLdintzr(op.Rt2);
}
EmitWriteCall(context, op.Size);
EmitWBackIfNeeded(context);
}
private static void EmitLoadAddress(ILEmitterCtx context)
{
switch (context.CurrOp)
{
case OpCodeMemImm64 op:
context.EmitLdint(op.Rn);
if (!op.PostIdx)
{
//Pre-indexing.
context.EmitLdc_I(op.Imm);
context.Emit(OpCodes.Add);
}
break;
case OpCodeMemReg64 op:
context.EmitLdint(op.Rn);
context.EmitLdintzr(op.Rm);
context.EmitCast(op.IntType);
if (op.Shift)
{
context.EmitLsl(op.Size);
}
context.Emit(OpCodes.Add);
break;
}
//Save address to Scratch var since the register value may change.
context.Emit(OpCodes.Dup);
context.EmitSttmp();
}
private static void EmitWBackIfNeeded(ILEmitterCtx context)
{
//Check whenever the current OpCode has post-indexed write back, if so write it.
//Note: AOpCodeMemPair inherits from AOpCodeMemImm, so this works for both.
if (context.CurrOp is OpCodeMemImm64 op && op.WBack)
{
context.EmitLdtmp();
if (op.PostIdx)
{
context.EmitLdc_I(op.Imm);
context.Emit(OpCodes.Add);
}
context.EmitStint(op.Rn);
}
}
}
}

View file

@ -0,0 +1,192 @@
using ChocolArm64.Decoders;
using ChocolArm64.Memory;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Threading;
using static ChocolArm64.Instructions.InstEmitMemoryHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
[Flags]
private enum AccessType
{
None = 0,
Ordered = 1,
Exclusive = 2,
OrderedEx = Ordered | Exclusive
}
public static void Clrex(ILEmitterCtx context)
{
EmitMemoryCall(context, nameof(MemoryManager.ClearExclusive));
}
public static void Dmb(ILEmitterCtx context) => EmitBarrier(context);
public static void Dsb(ILEmitterCtx context) => EmitBarrier(context);
public static void Ldar(ILEmitterCtx context) => EmitLdr(context, AccessType.Ordered);
public static void Ldaxr(ILEmitterCtx context) => EmitLdr(context, AccessType.OrderedEx);
public static void Ldxr(ILEmitterCtx context) => EmitLdr(context, AccessType.Exclusive);
public static void Ldxp(ILEmitterCtx context) => EmitLdp(context, AccessType.Exclusive);
public static void Ldaxp(ILEmitterCtx context) => EmitLdp(context, AccessType.OrderedEx);
private static void EmitLdr(ILEmitterCtx context, AccessType accType)
{
EmitLoad(context, accType, false);
}
private static void EmitLdp(ILEmitterCtx context, AccessType accType)
{
EmitLoad(context, accType, true);
}
private static void EmitLoad(ILEmitterCtx context, AccessType accType, bool pair)
{
OpCodeMemEx64 op = (OpCodeMemEx64)context.CurrOp;
bool ordered = (accType & AccessType.Ordered) != 0;
bool exclusive = (accType & AccessType.Exclusive) != 0;
if (ordered)
{
EmitBarrier(context);
}
if (exclusive)
{
EmitMemoryCall(context, nameof(MemoryManager.SetExclusive), op.Rn);
}
context.EmitLdint(op.Rn);
context.EmitSttmp();
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
EmitReadZxCall(context, op.Size);
context.EmitStintzr(op.Rt);
if (pair)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdtmp();
context.EmitLdc_I8(1 << op.Size);
context.Emit(OpCodes.Add);
EmitReadZxCall(context, op.Size);
context.EmitStintzr(op.Rt2);
}
}
public static void Pfrm(ILEmitterCtx context)
{
//Memory Prefetch, execute as no-op.
}
public static void Stlr(ILEmitterCtx context) => EmitStr(context, AccessType.Ordered);
public static void Stlxr(ILEmitterCtx context) => EmitStr(context, AccessType.OrderedEx);
public static void Stxr(ILEmitterCtx context) => EmitStr(context, AccessType.Exclusive);
public static void Stxp(ILEmitterCtx context) => EmitStp(context, AccessType.Exclusive);
public static void Stlxp(ILEmitterCtx context) => EmitStp(context, AccessType.OrderedEx);
private static void EmitStr(ILEmitterCtx context, AccessType accType)
{
EmitStore(context, accType, false);
}
private static void EmitStp(ILEmitterCtx context, AccessType accType)
{
EmitStore(context, accType, true);
}
private static void EmitStore(ILEmitterCtx context, AccessType accType, bool pair)
{
OpCodeMemEx64 op = (OpCodeMemEx64)context.CurrOp;
bool ordered = (accType & AccessType.Ordered) != 0;
bool exclusive = (accType & AccessType.Exclusive) != 0;
if (ordered)
{
EmitBarrier(context);
}
ILLabel lblEx = new ILLabel();
ILLabel lblEnd = new ILLabel();
if (exclusive)
{
EmitMemoryCall(context, nameof(MemoryManager.TestExclusive), op.Rn);
context.Emit(OpCodes.Brtrue_S, lblEx);
context.EmitLdc_I8(1);
context.EmitStintzr(op.Rs);
context.Emit(OpCodes.Br_S, lblEnd);
}
context.MarkLabel(lblEx);
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdintzr(op.Rt);
EmitWriteCall(context, op.Size);
if (pair)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(1 << op.Size);
context.Emit(OpCodes.Add);
context.EmitLdintzr(op.Rt2);
EmitWriteCall(context, op.Size);
}
if (exclusive)
{
context.EmitLdc_I8(0);
context.EmitStintzr(op.Rs);
EmitMemoryCall(context, nameof(MemoryManager.ClearExclusiveForStore));
}
context.MarkLabel(lblEnd);
}
private static void EmitMemoryCall(ILEmitterCtx context, string name, int rn = -1)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCallPropGet(typeof(CpuThreadState), nameof(CpuThreadState.Core));
if (rn != -1)
{
context.EmitLdint(rn);
}
context.EmitCall(typeof(MemoryManager), name);
}
private static void EmitBarrier(ILEmitterCtx context)
{
//Note: This barrier is most likely not necessary, and probably
//doesn't make any difference since we need to do a ton of stuff
//(software MMU emulation) to read or write anything anyway.
context.EmitCall(typeof(Thread), nameof(Thread.MemoryBarrier));
}
}
}

View file

@ -0,0 +1,138 @@
using ChocolArm64.Decoders;
using ChocolArm64.Memory;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static class InstEmitMemoryHelper
{
private enum Extension
{
Zx,
Sx32,
Sx64
}
public static void EmitReadZxCall(ILEmitterCtx context, int size)
{
EmitReadCall(context, Extension.Zx, size);
}
public static void EmitReadSx32Call(ILEmitterCtx context, int size)
{
EmitReadCall(context, Extension.Sx32, size);
}
public static void EmitReadSx64Call(ILEmitterCtx context, int size)
{
EmitReadCall(context, Extension.Sx64, size);
}
private static void EmitReadCall(ILEmitterCtx context, Extension ext, int size)
{
bool isSimd = GetIsSimd(context);
string name = null;
if (size < 0 || size > (isSimd ? 4 : 3))
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if (isSimd)
{
switch (size)
{
case 0: name = nameof(MemoryManager.ReadVector8); break;
case 1: name = nameof(MemoryManager.ReadVector16); break;
case 2: name = nameof(MemoryManager.ReadVector32); break;
case 3: name = nameof(MemoryManager.ReadVector64); break;
case 4: name = nameof(MemoryManager.ReadVector128); break;
}
}
else
{
switch (size)
{
case 0: name = nameof(MemoryManager.ReadByte); break;
case 1: name = nameof(MemoryManager.ReadUInt16); break;
case 2: name = nameof(MemoryManager.ReadUInt32); break;
case 3: name = nameof(MemoryManager.ReadUInt64); break;
}
}
context.EmitCall(typeof(MemoryManager), name);
if (!isSimd)
{
if (ext == Extension.Sx32 ||
ext == Extension.Sx64)
{
switch (size)
{
case 0: context.Emit(OpCodes.Conv_I1); break;
case 1: context.Emit(OpCodes.Conv_I2); break;
case 2: context.Emit(OpCodes.Conv_I4); break;
}
}
if (size < 3)
{
context.Emit(ext == Extension.Sx64
? OpCodes.Conv_I8
: OpCodes.Conv_U8);
}
}
}
public static void EmitWriteCall(ILEmitterCtx context, int size)
{
bool isSimd = GetIsSimd(context);
string name = null;
if (size < 0 || size > (isSimd ? 4 : 3))
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if (size < 3 && !isSimd)
{
context.Emit(OpCodes.Conv_I4);
}
if (isSimd)
{
switch (size)
{
case 0: name = nameof(MemoryManager.WriteVector8); break;
case 1: name = nameof(MemoryManager.WriteVector16); break;
case 2: name = nameof(MemoryManager.WriteVector32); break;
case 3: name = nameof(MemoryManager.WriteVector64); break;
case 4: name = nameof(MemoryManager.WriteVector128); break;
}
}
else
{
switch (size)
{
case 0: name = nameof(MemoryManager.WriteByte); break;
case 1: name = nameof(MemoryManager.WriteUInt16); break;
case 2: name = nameof(MemoryManager.WriteUInt32); break;
case 3: name = nameof(MemoryManager.WriteUInt64); break;
}
}
context.EmitCall(typeof(MemoryManager), name);
}
private static bool GetIsSimd(ILEmitterCtx context)
{
return context.CurrOp is IOpCodeSimd64 &&
!(context.CurrOp is OpCodeSimdMemMs64 ||
context.CurrOp is OpCodeSimdMemSs64);
}
}
}

View file

@ -0,0 +1,41 @@
using ChocolArm64.Decoders;
using ChocolArm64.Translation;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Movk(ILEmitterCtx context)
{
OpCodeMov64 op = (OpCodeMov64)context.CurrOp;
context.EmitLdintzr(op.Rd);
context.EmitLdc_I(~(0xffffL << op.Pos));
context.Emit(OpCodes.And);
context.EmitLdc_I(op.Imm);
context.Emit(OpCodes.Or);
context.EmitStintzr(op.Rd);
}
public static void Movn(ILEmitterCtx context)
{
OpCodeMov64 op = (OpCodeMov64)context.CurrOp;
context.EmitLdc_I(~op.Imm);
context.EmitStintzr(op.Rd);
}
public static void Movz(ILEmitterCtx context)
{
OpCodeMov64 op = (OpCodeMov64)context.CurrOp;
context.EmitLdc_I(op.Imm);
context.EmitStintzr(op.Rd);
}
}
}

View file

@ -0,0 +1,80 @@
using ChocolArm64.Decoders;
using ChocolArm64.Translation;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Madd(ILEmitterCtx context) => EmitMul(context, OpCodes.Add);
public static void Msub(ILEmitterCtx context) => EmitMul(context, OpCodes.Sub);
private static void EmitMul(ILEmitterCtx context, OpCode ilOp)
{
OpCodeMul64 op = (OpCodeMul64)context.CurrOp;
context.EmitLdintzr(op.Ra);
context.EmitLdintzr(op.Rn);
context.EmitLdintzr(op.Rm);
context.Emit(OpCodes.Mul);
context.Emit(ilOp);
context.EmitStintzr(op.Rd);
}
public static void Smaddl(ILEmitterCtx context) => EmitMull(context, OpCodes.Add, true);
public static void Smsubl(ILEmitterCtx context) => EmitMull(context, OpCodes.Sub, true);
public static void Umaddl(ILEmitterCtx context) => EmitMull(context, OpCodes.Add, false);
public static void Umsubl(ILEmitterCtx context) => EmitMull(context, OpCodes.Sub, false);
private static void EmitMull(ILEmitterCtx context, OpCode addSubOp, bool signed)
{
OpCodeMul64 op = (OpCodeMul64)context.CurrOp;
OpCode castOp = signed
? OpCodes.Conv_I8
: OpCodes.Conv_U8;
context.EmitLdintzr(op.Ra);
context.EmitLdintzr(op.Rn);
context.Emit(OpCodes.Conv_I4);
context.Emit(castOp);
context.EmitLdintzr(op.Rm);
context.Emit(OpCodes.Conv_I4);
context.Emit(castOp);
context.Emit(OpCodes.Mul);
context.Emit(addSubOp);
context.EmitStintzr(op.Rd);
}
public static void Smulh(ILEmitterCtx context)
{
OpCodeMul64 op = (OpCodeMul64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.EmitLdintzr(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.SMulHi128));
context.EmitStintzr(op.Rd);
}
public static void Umulh(ILEmitterCtx context)
{
OpCodeMul64 op = (OpCodeMul64)context.CurrOp;
context.EmitLdintzr(op.Rn);
context.EmitLdintzr(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.UMulHi128));
context.EmitStintzr(op.Rd);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,526 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitAluHelper;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Cmeq_S(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Beq_S, scalar: true);
}
public static void Cmeq_V(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 op)
{
if (op.Size < 3 && Optimizations.UseSse2)
{
EmitSse2Op(context, nameof(Sse2.CompareEqual));
}
else if (op.Size == 3 && Optimizations.UseSse41)
{
EmitSse41Op(context, nameof(Sse41.CompareEqual));
}
else
{
EmitCmp(context, OpCodes.Beq_S, scalar: false);
}
}
else
{
EmitCmp(context, OpCodes.Beq_S, scalar: false);
}
}
public static void Cmge_S(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Bge_S, scalar: true);
}
public static void Cmge_V(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Bge_S, scalar: false);
}
public static void Cmgt_S(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Bgt_S, scalar: true);
}
public static void Cmgt_V(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 op)
{
if (op.Size < 3 && Optimizations.UseSse2)
{
EmitSse2Op(context, nameof(Sse2.CompareGreaterThan));
}
else if (op.Size == 3 && Optimizations.UseSse42)
{
EmitSse42Op(context, nameof(Sse42.CompareGreaterThan));
}
else
{
EmitCmp(context, OpCodes.Bgt_S, scalar: false);
}
}
else
{
EmitCmp(context, OpCodes.Bgt_S, scalar: false);
}
}
public static void Cmhi_S(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Bgt_Un_S, scalar: true);
}
public static void Cmhi_V(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Bgt_Un_S, scalar: false);
}
public static void Cmhs_S(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Bge_Un_S, scalar: true);
}
public static void Cmhs_V(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Bge_Un_S, scalar: false);
}
public static void Cmle_S(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Ble_S, scalar: true);
}
public static void Cmle_V(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Ble_S, scalar: false);
}
public static void Cmlt_S(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Blt_S, scalar: true);
}
public static void Cmlt_V(ILEmitterCtx context)
{
EmitCmp(context, OpCodes.Blt_S, scalar: false);
}
public static void Cmtst_S(ILEmitterCtx context)
{
EmitCmtst(context, scalar: true);
}
public static void Cmtst_V(ILEmitterCtx context)
{
EmitCmtst(context, scalar: false);
}
public static void Fccmp_S(ILEmitterCtx context)
{
OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp;
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.EmitCondBranch(lblTrue, op.Cond);
EmitSetNzcv(context, op.Nzcv);
context.Emit(OpCodes.Br, lblEnd);
context.MarkLabel(lblTrue);
Fcmp_S(context);
context.MarkLabel(lblEnd);
}
public static void Fccmpe_S(ILEmitterCtx context)
{
Fccmp_S(context);
}
public static void Fcmeq_S(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar));
}
else
{
EmitScalarFcmp(context, OpCodes.Beq_S);
}
}
public static void Fcmeq_V(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual));
}
else
{
EmitVectorFcmp(context, OpCodes.Beq_S);
}
}
public static void Fcmge_S(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar));
}
else
{
EmitScalarFcmp(context, OpCodes.Bge_S);
}
}
public static void Fcmge_V(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual));
}
else
{
EmitVectorFcmp(context, OpCodes.Bge_S);
}
}
public static void Fcmgt_S(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar));
}
else
{
EmitScalarFcmp(context, OpCodes.Bgt_S);
}
}
public static void Fcmgt_V(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
&& Optimizations.UseSse2)
{
EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan));
}
else
{
EmitVectorFcmp(context, OpCodes.Bgt_S);
}
}
public static void Fcmle_S(ILEmitterCtx context)
{
EmitScalarFcmp(context, OpCodes.Ble_S);
}
public static void Fcmle_V(ILEmitterCtx context)
{
EmitVectorFcmp(context, OpCodes.Ble_S);
}
public static void Fcmlt_S(ILEmitterCtx context)
{
EmitScalarFcmp(context, OpCodes.Blt_S);
}
public static void Fcmlt_V(ILEmitterCtx context)
{
EmitVectorFcmp(context, OpCodes.Blt_S);
}
public static void Fcmp_S(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false;
//Handle NaN case.
//If any number is NaN, then NZCV = 0011.
if (cmpWithZero)
{
EmitNaNCheck(context, op.Rn);
}
else
{
EmitNaNCheck(context, op.Rn);
EmitNaNCheck(context, op.Rm);
context.Emit(OpCodes.Or);
}
ILLabel lblNaN = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.Brtrue_S, lblNaN);
void EmitLoadOpers()
{
EmitVectorExtractF(context, op.Rn, 0, op.Size);
if (cmpWithZero)
{
if (op.Size == 0)
{
context.EmitLdc_R4(0f);
}
else /* if (Op.Size == 1) */
{
context.EmitLdc_R8(0d);
}
}
else
{
EmitVectorExtractF(context, op.Rm, 0, op.Size);
}
}
//Z = Rn == Rm
EmitLoadOpers();
context.Emit(OpCodes.Ceq);
context.Emit(OpCodes.Dup);
context.EmitStflg((int)PState.ZBit);
//C = Rn >= Rm
EmitLoadOpers();
context.Emit(OpCodes.Cgt);
context.Emit(OpCodes.Or);
context.EmitStflg((int)PState.CBit);
//N = Rn < Rm
EmitLoadOpers();
context.Emit(OpCodes.Clt);
context.EmitStflg((int)PState.NBit);
//V = 0
context.EmitLdc_I4(0);
context.EmitStflg((int)PState.VBit);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblNaN);
EmitSetNzcv(context, 0b0011);
context.MarkLabel(lblEnd);
}
public static void Fcmpe_S(ILEmitterCtx context)
{
Fcmp_S(context);
}
private static void EmitNaNCheck(ILEmitterCtx context, int reg)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
EmitVectorExtractF(context, reg, 0, op.Size);
if (op.Size == 0)
{
context.EmitCall(typeof(float), nameof(float.IsNaN));
}
else if (op.Size == 1)
{
context.EmitCall(typeof(double), nameof(double.IsNaN));
}
else
{
throw new InvalidOperationException();
}
}
private static void EmitCmp(ILEmitterCtx context, OpCode ilOp, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> op.Size : 1;
ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
for (int index = 0; index < elems; index++)
{
EmitVectorExtractSx(context, op.Rn, index, op.Size);
if (op is OpCodeSimdReg64 binOp)
{
EmitVectorExtractSx(context, binOp.Rm, index, op.Size);
}
else
{
context.EmitLdc_I8(0L);
}
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(ilOp, lblTrue);
EmitVectorInsert(context, op.Rd, index, op.Size, 0);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblTrue);
EmitVectorInsert(context, op.Rd, index, op.Size, (long)szMask);
context.MarkLabel(lblEnd);
}
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitCmtst(ILEmitterCtx context, bool scalar)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> op.Size : 1;
ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size));
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size);
EmitVectorExtractZx(context, op.Rm, index, op.Size);
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(OpCodes.And);
context.EmitLdc_I8(0L);
context.Emit(OpCodes.Bne_Un_S, lblTrue);
EmitVectorInsert(context, op.Rd, index, op.Size, 0);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblTrue);
EmitVectorInsert(context, op.Rd, index, op.Size, (long)szMask);
context.MarkLabel(lblEnd);
}
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitScalarFcmp(ILEmitterCtx context, OpCode ilOp)
{
EmitFcmp(context, ilOp, 0, scalar: true);
}
private static void EmitVectorFcmp(ILEmitterCtx context, OpCode ilOp)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> sizeF + 2;
for (int index = 0; index < elems; index++)
{
EmitFcmp(context, ilOp, index, scalar: false);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitFcmp(ILEmitterCtx context, OpCode ilOp, int index, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
ulong szMask = ulong.MaxValue >> (64 - (32 << sizeF));
EmitVectorExtractF(context, op.Rn, index, sizeF);
if (op is OpCodeSimdReg64 binOp)
{
EmitVectorExtractF(context, binOp.Rm, index, sizeF);
}
else if (sizeF == 0)
{
context.EmitLdc_R4(0f);
}
else /* if (SizeF == 1) */
{
context.EmitLdc_R8(0d);
}
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.Emit(ilOp, lblTrue);
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
else
{
EmitVectorInsert(context, op.Rd, index, sizeF + 2, 0);
}
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblTrue);
if (scalar)
{
EmitVectorInsert(context, op.Rd, index, 3, (long)szMask);
EmitVectorZeroUpper(context, op.Rd);
}
else
{
EmitVectorInsert(context, op.Rd, index, sizeF + 2, (long)szMask);
}
context.MarkLabel(lblEnd);
}
}
}

View file

@ -0,0 +1,54 @@
using ChocolArm64.Decoders;
using ChocolArm64.Translation;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Aesd_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
SoftFallback.EmitCall(context, nameof(SoftFallback.Decrypt));
context.EmitStvec(op.Rd);
}
public static void Aese_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
SoftFallback.EmitCall(context, nameof(SoftFallback.Encrypt));
context.EmitStvec(op.Rd);
}
public static void Aesimc_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
context.EmitLdvec(op.Rn);
SoftFallback.EmitCall(context, nameof(SoftFallback.InverseMixColumns));
context.EmitStvec(op.Rd);
}
public static void Aesmc_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
context.EmitLdvec(op.Rn);
SoftFallback.EmitCall(context, nameof(SoftFallback.MixColumns));
context.EmitStvec(op.Rd);
}
}
}

View file

@ -0,0 +1,697 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Fcvt_S(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
if (Optimizations.UseSse2)
{
if (op.Size == 1 && op.Opc == 0)
{
//Double -> Single.
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
EmitLdvecWithCastToDouble(context, op.Rn);
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), types));
context.EmitStvec(op.Rd);
}
else if (op.Size == 0 && op.Opc == 1)
{
//Single -> Double.
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
context.EmitLdvec(op.Rn);
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<float>) };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), types));
EmitStvecWithCastFromDouble(context, op.Rd);
}
else
{
//Invalid encoding.
throw new InvalidOperationException();
}
}
else
{
EmitVectorExtractF(context, op.Rn, 0, op.Size);
EmitFloatCast(context, op.Opc);
EmitScalarSetF(context, op.Rd, op.Opc);
}
}
public static void Fcvtas_Gp(ILEmitterCtx context)
{
EmitFcvt_s_Gp(context, () => EmitRoundMathCall(context, MidpointRounding.AwayFromZero));
}
public static void Fcvtau_Gp(ILEmitterCtx context)
{
EmitFcvt_u_Gp(context, () => EmitRoundMathCall(context, MidpointRounding.AwayFromZero));
}
public static void Fcvtl_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int elems = 4 >> sizeF;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
if (sizeF == 0)
{
EmitVectorExtractZx(context, op.Rn, part + index, 1);
context.Emit(OpCodes.Conv_U2);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCall(typeof(SoftFloat1632), nameof(SoftFloat1632.FPConvert));
}
else /* if (SizeF == 1) */
{
EmitVectorExtractF(context, op.Rn, part + index, 0);
context.Emit(OpCodes.Conv_R8);
}
EmitVectorInsertTmpF(context, index, sizeF);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
public static void Fcvtms_Gp(ILEmitterCtx context)
{
EmitFcvt_s_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Floor)));
}
public static void Fcvtmu_Gp(ILEmitterCtx context)
{
EmitFcvt_u_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Floor)));
}
public static void Fcvtn_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int elems = 4 >> sizeF;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
if (part != 0)
{
context.EmitLdvec(op.Rd);
context.EmitStvectmp();
}
for (int index = 0; index < elems; index++)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
if (sizeF == 0)
{
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCall(typeof(SoftFloat3216), nameof(SoftFloat3216.FPConvert));
context.Emit(OpCodes.Conv_U8);
EmitVectorInsertTmp(context, part + index, 1);
}
else /* if (SizeF == 1) */
{
context.Emit(OpCodes.Conv_R4);
EmitVectorInsertTmpF(context, part + index, 0);
}
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (part == 0)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Fcvtns_S(ILEmitterCtx context)
{
EmitFcvtn(context, signed: true, scalar: true);
}
public static void Fcvtns_V(ILEmitterCtx context)
{
EmitFcvtn(context, signed: true, scalar: false);
}
public static void Fcvtnu_S(ILEmitterCtx context)
{
EmitFcvtn(context, signed: false, scalar: true);
}
public static void Fcvtnu_V(ILEmitterCtx context)
{
EmitFcvtn(context, signed: false, scalar: false);
}
public static void Fcvtps_Gp(ILEmitterCtx context)
{
EmitFcvt_s_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Ceiling)));
}
public static void Fcvtpu_Gp(ILEmitterCtx context)
{
EmitFcvt_u_Gp(context, () => EmitUnaryMathCall(context, nameof(Math.Ceiling)));
}
public static void Fcvtzs_Gp(ILEmitterCtx context)
{
EmitFcvt_s_Gp(context, () => { });
}
public static void Fcvtzs_Gp_Fix(ILEmitterCtx context)
{
EmitFcvtzs_Gp_Fix(context);
}
public static void Fcvtzs_S(ILEmitterCtx context)
{
EmitScalarFcvtzs(context);
}
public static void Fcvtzs_V(ILEmitterCtx context)
{
EmitVectorFcvtzs(context);
}
public static void Fcvtzu_Gp(ILEmitterCtx context)
{
EmitFcvt_u_Gp(context, () => { });
}
public static void Fcvtzu_Gp_Fix(ILEmitterCtx context)
{
EmitFcvtzu_Gp_Fix(context);
}
public static void Fcvtzu_S(ILEmitterCtx context)
{
EmitScalarFcvtzu(context);
}
public static void Fcvtzu_V(ILEmitterCtx context)
{
EmitVectorFcvtzu(context);
}
public static void Scvtf_Gp(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
context.EmitLdintzr(op.Rn);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U4);
}
EmitFloatCast(context, op.Size);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Scvtf_S(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
EmitVectorExtractSx(context, op.Rn, 0, op.Size + 2);
EmitFloatCast(context, op.Size);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Scvtf_V(ILEmitterCtx context)
{
EmitVectorCvtf(context, signed: true);
}
public static void Ucvtf_Gp(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
context.EmitLdintzr(op.Rn);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U4);
}
context.Emit(OpCodes.Conv_R_Un);
EmitFloatCast(context, op.Size);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Ucvtf_S(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2);
context.Emit(OpCodes.Conv_R_Un);
EmitFloatCast(context, op.Size);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Ucvtf_V(ILEmitterCtx context)
{
EmitVectorCvtf(context, signed: false);
}
private static int GetFBits(ILEmitterCtx context)
{
if (context.CurrOp is OpCodeSimdShImm64 op)
{
return GetImmShr(op);
}
return 0;
}
private static void EmitFloatCast(ILEmitterCtx context, int size)
{
if (size == 0)
{
context.Emit(OpCodes.Conv_R4);
}
else if (size == 1)
{
context.Emit(OpCodes.Conv_R8);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
private static void EmitFcvtn(ILEmitterCtx context, bool signed, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int sizeI = sizeF + 2;
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> sizeI : 1;
if (scalar && (sizeF == 0))
{
EmitVectorZeroLowerTmp(context);
}
for (int index = 0; index < elems; index++)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
EmitRoundMathCall(context, MidpointRounding.ToEven);
if (sizeF == 0)
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF32ToS32)
: nameof(VectorHelper.SatF32ToU32));
context.Emit(OpCodes.Conv_U8);
}
else /* if (SizeF == 1) */
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF64ToS64)
: nameof(VectorHelper.SatF64ToU64));
}
EmitVectorInsertTmp(context, index, sizeI);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitFcvt_s_Gp(ILEmitterCtx context, Action emit)
{
EmitFcvt___Gp(context, emit, true);
}
private static void EmitFcvt_u_Gp(ILEmitterCtx context, Action emit)
{
EmitFcvt___Gp(context, emit, false);
}
private static void EmitFcvt___Gp(ILEmitterCtx context, Action emit, bool signed)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
EmitVectorExtractF(context, op.Rn, 0, op.Size);
emit();
if (signed)
{
EmitScalarFcvts(context, op.Size, 0);
}
else
{
EmitScalarFcvtu(context, op.Size, 0);
}
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitStintzr(op.Rd);
}
private static void EmitFcvtzs_Gp_Fix(ILEmitterCtx context)
{
EmitFcvtz__Gp_Fix(context, true);
}
private static void EmitFcvtzu_Gp_Fix(ILEmitterCtx context)
{
EmitFcvtz__Gp_Fix(context, false);
}
private static void EmitFcvtz__Gp_Fix(ILEmitterCtx context, bool signed)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
EmitVectorExtractF(context, op.Rn, 0, op.Size);
if (signed)
{
EmitScalarFcvts(context, op.Size, op.FBits);
}
else
{
EmitScalarFcvtu(context, op.Size, op.FBits);
}
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitStintzr(op.Rd);
}
private static void EmitVectorScvtf(ILEmitterCtx context)
{
EmitVectorCvtf(context, true);
}
private static void EmitVectorUcvtf(ILEmitterCtx context)
{
EmitVectorCvtf(context, false);
}
private static void EmitVectorCvtf(ILEmitterCtx context, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int sizeI = sizeF + 2;
int fBits = GetFBits(context);
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> sizeI;
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, sizeI, signed);
if (!signed)
{
context.Emit(OpCodes.Conv_R_Un);
}
context.Emit(sizeF == 0
? OpCodes.Conv_R4
: OpCodes.Conv_R8);
EmitI2fFBitsMul(context, sizeF, fBits);
EmitVectorInsertF(context, op.Rd, index, sizeF);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitScalarFcvtzs(ILEmitterCtx context)
{
EmitScalarFcvtz(context, true);
}
private static void EmitScalarFcvtzu(ILEmitterCtx context)
{
EmitScalarFcvtz(context, false);
}
private static void EmitScalarFcvtz(ILEmitterCtx context, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int sizeI = sizeF + 2;
int fBits = GetFBits(context);
EmitVectorExtractF(context, op.Rn, 0, sizeF);
EmitF2iFBitsMul(context, sizeF, fBits);
if (sizeF == 0)
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF32ToS32)
: nameof(VectorHelper.SatF32ToU32));
}
else /* if (SizeF == 1) */
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF64ToS64)
: nameof(VectorHelper.SatF64ToU64));
}
if (sizeF == 0)
{
context.Emit(OpCodes.Conv_U8);
}
EmitScalarSet(context, op.Rd, sizeI);
}
private static void EmitVectorFcvtzs(ILEmitterCtx context)
{
EmitVectorFcvtz(context, true);
}
private static void EmitVectorFcvtzu(ILEmitterCtx context)
{
EmitVectorFcvtz(context, false);
}
private static void EmitVectorFcvtz(ILEmitterCtx context, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int sizeI = sizeF + 2;
int fBits = GetFBits(context);
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> sizeI;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
EmitF2iFBitsMul(context, sizeF, fBits);
if (sizeF == 0)
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF32ToS32)
: nameof(VectorHelper.SatF32ToU32));
}
else /* if (SizeF == 1) */
{
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.SatF64ToS64)
: nameof(VectorHelper.SatF64ToU64));
}
if (sizeF == 0)
{
context.Emit(OpCodes.Conv_U8);
}
EmitVectorInsert(context, op.Rd, index, sizeI);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitScalarFcvts(ILEmitterCtx context, int size, int fBits)
{
if (size < 0 || size > 1)
{
throw new ArgumentOutOfRangeException(nameof(size));
}
EmitF2iFBitsMul(context, size, fBits);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
if (size == 0)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToS32));
}
else /* if (Size == 1) */
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToS32));
}
}
else
{
if (size == 0)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToS64));
}
else /* if (Size == 1) */
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToS64));
}
}
}
private static void EmitScalarFcvtu(ILEmitterCtx context, int size, int fBits)
{
if (size < 0 || size > 1)
{
throw new ArgumentOutOfRangeException(nameof(size));
}
EmitF2iFBitsMul(context, size, fBits);
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
{
if (size == 0)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToU32));
}
else /* if (Size == 1) */
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToU32));
}
}
else
{
if (size == 0)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF32ToU64));
}
else /* if (Size == 1) */
{
VectorHelper.EmitCall(context, nameof(VectorHelper.SatF64ToU64));
}
}
}
private static void EmitF2iFBitsMul(ILEmitterCtx context, int size, int fBits)
{
if (fBits != 0)
{
if (size == 0)
{
context.EmitLdc_R4(MathF.Pow(2f, fBits));
}
else if (size == 1)
{
context.EmitLdc_R8(Math.Pow(2d, fBits));
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
context.Emit(OpCodes.Mul);
}
}
private static void EmitI2fFBitsMul(ILEmitterCtx context, int size, int fBits)
{
if (fBits != 0)
{
if (size == 0)
{
context.EmitLdc_R4(1f / MathF.Pow(2f, fBits));
}
else if (size == 1)
{
context.EmitLdc_R8(1d / Math.Pow(2d, fBits));
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
context.Emit(OpCodes.Mul);
}
}
}
}

View file

@ -0,0 +1,140 @@
using ChocolArm64.Decoders;
using ChocolArm64.Translation;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
#region "Sha1"
public static void Sha1c_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.EmitLdvec(op.Rd);
EmitVectorExtractZx(context, op.Rn, 0, 2);
context.EmitLdvec(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.HashChoose));
context.EmitStvec(op.Rd);
}
public static void Sha1h_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
EmitVectorExtractZx(context, op.Rn, 0, 2);
SoftFallback.EmitCall(context, nameof(SoftFallback.FixedRotate));
EmitScalarSet(context, op.Rd, 2);
}
public static void Sha1m_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.EmitLdvec(op.Rd);
EmitVectorExtractZx(context, op.Rn, 0, 2);
context.EmitLdvec(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.HashMajority));
context.EmitStvec(op.Rd);
}
public static void Sha1p_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.EmitLdvec(op.Rd);
EmitVectorExtractZx(context, op.Rn, 0, 2);
context.EmitLdvec(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.HashParity));
context.EmitStvec(op.Rd);
}
public static void Sha1su0_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.Sha1SchedulePart1));
context.EmitStvec(op.Rd);
}
public static void Sha1su1_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
SoftFallback.EmitCall(context, nameof(SoftFallback.Sha1SchedulePart2));
context.EmitStvec(op.Rd);
}
#endregion
#region "Sha256"
public static void Sha256h_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.HashLower));
context.EmitStvec(op.Rd);
}
public static void Sha256h2_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.HashUpper));
context.EmitStvec(op.Rd);
}
public static void Sha256su0_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
SoftFallback.EmitCall(context, nameof(SoftFallback.Sha256SchedulePart1));
context.EmitStvec(op.Rd);
}
public static void Sha256su1_V(ILEmitterCtx context)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
SoftFallback.EmitCall(context, nameof(SoftFallback.Sha256SchedulePart2));
context.EmitStvec(op.Rd);
}
#endregion
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,311 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void And_V(ILEmitterCtx context)
{
if (Optimizations.UseSse2)
{
EmitSse2Op(context, nameof(Sse2.And));
}
else
{
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.And));
}
}
public static void Bic_V(ILEmitterCtx context)
{
if (Optimizations.UseSse2)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), types));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorBinaryOpZx(context, () =>
{
context.Emit(OpCodes.Not);
context.Emit(OpCodes.And);
});
}
}
public static void Bic_Vi(ILEmitterCtx context)
{
EmitVectorImmBinaryOp(context, () =>
{
context.Emit(OpCodes.Not);
context.Emit(OpCodes.And);
});
}
public static void Bif_V(ILEmitterCtx context)
{
EmitBitBif(context, true);
}
public static void Bit_V(ILEmitterCtx context)
{
EmitBitBif(context, false);
}
private static void EmitBitBif(ILEmitterCtx context, bool notRm)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
if (Optimizations.UseSse2)
{
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
string name = notRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
context.EmitCall(typeof(Sse2).GetMethod(name, types));
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rd, index, op.Size);
EmitVectorExtractZx(context, op.Rn, index, op.Size);
context.Emit(OpCodes.Xor);
EmitVectorExtractZx(context, op.Rm, index, op.Size);
if (notRm)
{
context.Emit(OpCodes.Not);
}
context.Emit(OpCodes.And);
EmitVectorExtractZx(context, op.Rd, index, op.Size);
context.Emit(OpCodes.Xor);
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
}
public static void Bsl_V(ILEmitterCtx context)
{
if (Optimizations.UseSse2)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types));
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorTernaryOpZx(context, () =>
{
context.EmitSttmp();
context.EmitLdtmp();
context.Emit(OpCodes.Xor);
context.Emit(OpCodes.And);
context.EmitLdtmp();
context.Emit(OpCodes.Xor);
});
}
}
public static void Eor_V(ILEmitterCtx context)
{
if (Optimizations.UseSse2)
{
EmitSse2Op(context, nameof(Sse2.Xor));
}
else
{
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Xor));
}
}
public static void Not_V(ILEmitterCtx context)
{
EmitVectorUnaryOpZx(context, () => context.Emit(OpCodes.Not));
}
public static void Orn_V(ILEmitterCtx context)
{
EmitVectorBinaryOpZx(context, () =>
{
context.Emit(OpCodes.Not);
context.Emit(OpCodes.Or);
});
}
public static void Orr_V(ILEmitterCtx context)
{
if (Optimizations.UseSse2)
{
EmitSse2Op(context, nameof(Sse2.Or));
}
else
{
EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Or));
}
}
public static void Orr_Vi(ILEmitterCtx context)
{
EmitVectorImmBinaryOp(context, () => context.Emit(OpCodes.Or));
}
public static void Rbit_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, 0);
context.Emit(OpCodes.Conv_U4);
SoftFallback.EmitCall(context, nameof(SoftFallback.ReverseBits8));
context.Emit(OpCodes.Conv_U8);
EmitVectorInsert(context, op.Rd, index, 0);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Rev16_V(ILEmitterCtx context)
{
EmitRev_V(context, containerSize: 1);
}
public static void Rev32_V(ILEmitterCtx context)
{
EmitRev_V(context, containerSize: 2);
}
public static void Rev64_V(ILEmitterCtx context)
{
EmitRev_V(context, containerSize: 3);
}
private static void EmitRev_V(ILEmitterCtx context, int containerSize)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
if (op.Size >= containerSize)
{
throw new InvalidOperationException();
}
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
int containerMask = (1 << (containerSize - op.Size)) - 1;
for (int index = 0; index < elems; index++)
{
int revIndex = index ^ containerMask;
EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);
EmitVectorInsertTmp(context, index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
}
}

View file

@ -0,0 +1,185 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using static ChocolArm64.Instructions.InstEmitMemoryHelper;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Ld__Vms(ILEmitterCtx context)
{
EmitSimdMemMs(context, isLoad: true);
}
public static void Ld__Vss(ILEmitterCtx context)
{
EmitSimdMemSs(context, isLoad: true);
}
public static void St__Vms(ILEmitterCtx context)
{
EmitSimdMemMs(context, isLoad: false);
}
public static void St__Vss(ILEmitterCtx context)
{
EmitSimdMemSs(context, isLoad: false);
}
private static void EmitSimdMemMs(ILEmitterCtx context, bool isLoad)
{
OpCodeSimdMemMs64 op = (OpCodeSimdMemMs64)context.CurrOp;
int offset = 0;
for (int rep = 0; rep < op.Reps; rep++)
for (int elem = 0; elem < op.Elems; elem++)
for (int sElem = 0; sElem < op.SElems; sElem++)
{
int rtt = (op.Rt + rep + sElem) & 0x1f;
if (isLoad)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
context.Emit(OpCodes.Add);
EmitReadZxCall(context, op.Size);
EmitVectorInsert(context, rtt, elem, op.Size);
if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1)
{
EmitVectorZeroUpper(context, rtt);
}
}
else
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
context.Emit(OpCodes.Add);
EmitVectorExtractZx(context, rtt, elem, op.Size);
EmitWriteCall(context, op.Size);
}
offset += 1 << op.Size;
}
if (op.WBack)
{
EmitSimdMemWBack(context, offset);
}
}
private static void EmitSimdMemSs(ILEmitterCtx context, bool isLoad)
{
OpCodeSimdMemSs64 op = (OpCodeSimdMemSs64)context.CurrOp;
int offset = 0;
void EmitMemAddress()
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdint(op.Rn);
context.EmitLdc_I8(offset);
context.Emit(OpCodes.Add);
}
if (op.Replicate)
{
//Only loads uses the replicate mode.
if (!isLoad)
{
throw new InvalidOperationException();
}
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int sElem = 0; sElem < op.SElems; sElem++)
{
int rt = (op.Rt + sElem) & 0x1f;
for (int index = 0; index < elems; index++)
{
EmitMemAddress();
EmitReadZxCall(context, op.Size);
EmitVectorInsert(context, rt, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, rt);
}
offset += 1 << op.Size;
}
}
else
{
for (int sElem = 0; sElem < op.SElems; sElem++)
{
int rt = (op.Rt + sElem) & 0x1f;
if (isLoad)
{
EmitMemAddress();
EmitReadZxCall(context, op.Size);
EmitVectorInsert(context, rt, op.Index, op.Size);
}
else
{
EmitMemAddress();
EmitVectorExtractZx(context, rt, op.Index, op.Size);
EmitWriteCall(context, op.Size);
}
offset += 1 << op.Size;
}
}
if (op.WBack)
{
EmitSimdMemWBack(context, offset);
}
}
private static void EmitSimdMemWBack(ILEmitterCtx context, int offset)
{
OpCodeMemReg64 op = (OpCodeMemReg64)context.CurrOp;
context.EmitLdint(op.Rn);
if (op.Rm != CpuThreadState.ZrIndex)
{
context.EmitLdint(op.Rm);
}
else
{
context.EmitLdc_I8(offset);
}
context.Emit(OpCodes.Add);
context.EmitStint(op.Rn);
}
}
}

View file

@ -0,0 +1,562 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Dup_Gp(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
if (Optimizations.UseSse2)
{
context.EmitLdintzr(op.Rn);
switch (op.Size)
{
case 0: context.Emit(OpCodes.Conv_U1); break;
case 1: context.Emit(OpCodes.Conv_U2); break;
case 2: context.Emit(OpCodes.Conv_U4); break;
}
Type[] types = new Type[] { UIntTypesPerSizeLog2[op.Size] };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), types));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
context.EmitLdintzr(op.Rn);
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
}
public static void Dup_S(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
EmitScalarSet(context, op.Rd, op.Size);
}
public static void Dup_V(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Ext_V(ILEmitterCtx context)
{
OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp;
context.EmitLdvec(op.Rd);
context.EmitStvectmp();
int bytes = op.GetBitsCount() >> 3;
int position = op.Imm4;
for (int index = 0; index < bytes; index++)
{
int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;
if (position == bytes)
{
position = 0;
}
EmitVectorExtractZx(context, reg, position++, 0);
EmitVectorInsertTmp(context, index, 0);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Fcsel_S(ILEmitterCtx context)
{
OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp;
ILLabel lblTrue = new ILLabel();
ILLabel lblEnd = new ILLabel();
context.EmitCondBranch(lblTrue, op.Cond);
EmitVectorExtractF(context, op.Rm, 0, op.Size);
context.Emit(OpCodes.Br_S, lblEnd);
context.MarkLabel(lblTrue);
EmitVectorExtractF(context, op.Rn, 0, op.Size);
context.MarkLabel(lblEnd);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Fmov_Ftoi(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
EmitVectorExtractZx(context, op.Rn, 0, 3);
EmitIntZeroUpperIfNeeded(context);
context.EmitStintzr(op.Rd);
}
public static void Fmov_Ftoi1(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
EmitVectorExtractZx(context, op.Rn, 1, 3);
EmitIntZeroUpperIfNeeded(context);
context.EmitStintzr(op.Rd);
}
public static void Fmov_Itof(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
context.EmitLdintzr(op.Rn);
EmitIntZeroUpperIfNeeded(context);
EmitScalarSet(context, op.Rd, 3);
}
public static void Fmov_Itof1(ILEmitterCtx context)
{
OpCodeSimdCvt64 op = (OpCodeSimdCvt64)context.CurrOp;
context.EmitLdintzr(op.Rn);
EmitIntZeroUpperIfNeeded(context);
EmitVectorInsert(context, op.Rd, 1, 3);
}
public static void Fmov_S(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
EmitVectorExtractF(context, op.Rn, 0, op.Size);
EmitScalarSetF(context, op.Rd, op.Size);
}
public static void Fmov_Si(ILEmitterCtx context)
{
OpCodeSimdFmov64 op = (OpCodeSimdFmov64)context.CurrOp;
context.EmitLdc_I8(op.Imm);
EmitScalarSet(context, op.Rd, op.Size + 2);
}
public static void Fmov_V(ILEmitterCtx context)
{
OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp;
int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2;
for (int index = 0; index < (elems >> op.Size); index++)
{
context.EmitLdc_I8(op.Imm);
EmitVectorInsert(context, op.Rd, index, op.Size + 2);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Ins_Gp(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
context.EmitLdintzr(op.Rn);
EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size);
}
public static void Ins_V(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size);
EmitVectorInsert(context, op.Rd, op.DstIndex, op.Size);
}
public static void Movi_V(ILEmitterCtx context)
{
EmitVectorImmUnaryOp(context, () => { });
}
public static void Mvni_V(ILEmitterCtx context)
{
EmitVectorImmUnaryOp(context, () => context.Emit(OpCodes.Not));
}
public static void Smov_S(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size);
EmitIntZeroUpperIfNeeded(context);
context.EmitStintzr(op.Rd);
}
public static void Tbl_V(ILEmitterCtx context)
{
OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp;
context.EmitLdvec(op.Rm);
for (int index = 0; index < op.Size; index++)
{
context.EmitLdvec((op.Rn + index) & 0x1f);
}
switch (op.Size)
{
case 1: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl1_V64),
nameof(VectorHelper.Tbl1_V128)); break;
case 2: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl2_V64),
nameof(VectorHelper.Tbl2_V128)); break;
case 3: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl3_V64),
nameof(VectorHelper.Tbl3_V128)); break;
case 4: VectorHelper.EmitCall(context,
nameof(VectorHelper.Tbl4_V64),
nameof(VectorHelper.Tbl4_V128)); break;
default: throw new InvalidOperationException();
}
context.EmitStvec(op.Rd);
}
public static void Trn1_V(ILEmitterCtx context)
{
EmitVectorTranspose(context, part: 0);
}
public static void Trn2_V(ILEmitterCtx context)
{
EmitVectorTranspose(context, part: 1);
}
public static void Umov_S(ILEmitterCtx context)
{
OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp;
EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size);
context.EmitStintzr(op.Rd);
}
public static void Uzp1_V(ILEmitterCtx context)
{
EmitVectorUnzip(context, part: 0);
}
public static void Uzp2_V(ILEmitterCtx context)
{
EmitVectorUnzip(context, part: 1);
}
public static void Xtn_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
if (Optimizations.UseSse41 && op.Size < 2)
{
void EmitZeroVector()
{
switch (op.Size)
{
case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16Zero)); break;
case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32Zero)); break;
}
}
//For XTN, first operand is source, second operand is 0.
//For XTN2, first operand is 0, second operand is source.
if (part != 0)
{
EmitZeroVector();
}
EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
//Set mask to discard the upper half of the wide elements.
switch (op.Size)
{
case 0: context.EmitLdc_I4(0x00ff); break;
case 1: context.EmitLdc_I4(0x0000ffff); break;
}
Type wideType = IntTypesPerSizeLog2[op.Size + 1];
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { wideType }));
wideType = VectorIntTypesPerSizeLog2[op.Size + 1];
Type[] wideTypes = new Type[] { wideType, wideType };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), wideTypes));
if (part == 0)
{
EmitZeroVector();
}
//Pack values with signed saturation, the signed saturation shouldn't
//saturate anything since the upper bits were masked off.
Type sseType = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
context.EmitCall(sseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), wideTypes));
if (part != 0)
{
//For XTN2, we additionally need to discard the upper bits
//of the target register and OR the result with it.
EmitVectorZeroUpper(context, op.Rd);
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
Type narrowType = VectorUIntTypesPerSizeLog2[op.Size];
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { narrowType, narrowType }));
}
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
}
else
{
if (part != 0)
{
context.EmitLdvec(op.Rd);
context.EmitStvectmp();
}
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
EmitVectorInsertTmp(context, part + index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (part == 0)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
}
public static void Zip1_V(ILEmitterCtx context)
{
EmitVectorZip(context, part: 0);
}
public static void Zip2_V(ILEmitterCtx context)
{
EmitVectorZip(context, part: 1);
}
private static void EmitIntZeroUpperIfNeeded(ILEmitterCtx context)
{
if (context.CurrOp.RegisterSize == RegisterSize.Int32 ||
context.CurrOp.RegisterSize == RegisterSize.Simd64)
{
context.Emit(OpCodes.Conv_U4);
context.Emit(OpCodes.Conv_U8);
}
}
private static void EmitVectorTranspose(ILEmitterCtx context, int part)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
EmitVectorInsertTmp(context, idx + 1, op.Size);
EmitVectorInsertTmp(context, idx, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitVectorUnzip(ILEmitterCtx context, int part)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);
EmitVectorInsertTmp(context, pairs + index, op.Size);
EmitVectorInsertTmp(context, index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitVectorZip(ILEmitterCtx context, int part)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
if (Optimizations.UseSse2)
{
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);
Type[] types = new Type[]
{
VectorUIntTypesPerSizeLog2[op.Size],
VectorUIntTypesPerSizeLog2[op.Size]
};
string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64)
? nameof(Sse2.UnpackLow)
: nameof(Sse2.UnpackHigh);
context.EmitCall(typeof(Sse2).GetMethod(name, types));
if (op.RegisterSize == RegisterSize.Simd64 && part != 0)
{
context.EmitLdc_I4(8);
Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes));
}
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64 && part == 0)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
int Base = part != 0 ? pairs : 0;
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
EmitVectorExtractZx(context, op.Rn, Base + index, op.Size);
EmitVectorExtractZx(context, op.Rm, Base + index, op.Size);
EmitVectorInsertTmp(context, idx + 1, op.Size);
EmitVectorInsertTmp(context, idx, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
}
}
}

View file

@ -0,0 +1,865 @@
// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Rshrn_V(ILEmitterCtx context)
{
EmitVectorShrImmNarrowOpZx(context, round: true);
}
public static void Shl_S(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
EmitScalarUnaryOpZx(context, () =>
{
context.EmitLdc_I4(GetImmShl(op));
context.Emit(OpCodes.Shl);
});
}
public static void Shl_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
{
Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(GetImmShl(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorUnaryOpZx(context, () =>
{
context.EmitLdc_I4(GetImmShl(op));
context.Emit(OpCodes.Shl);
});
}
}
public static void Shll_V(ILEmitterCtx context)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int shift = 8 << op.Size;
EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
}
public static void Shrn_V(ILEmitterCtx context)
{
EmitVectorShrImmNarrowOpZx(context, round: false);
}
public static void Sli_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
int shift = GetImmShl(op);
ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size);
context.EmitLdc_I4(shift);
context.Emit(OpCodes.Shl);
EmitVectorExtractZx(context, op.Rd, index, op.Size);
context.EmitLdc_I8((long)mask);
context.Emit(OpCodes.And);
context.Emit(OpCodes.Or);
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void Sqrshrn_S(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
}
public static void Sqrshrn_V(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
}
public static void Sqrshrun_S(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
}
public static void Sqrshrun_V(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
public static void Sqshrn_S(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx);
}
public static void Sqshrn_V(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx);
}
public static void Sqshrun_S(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx);
}
public static void Sqshrun_V(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
}
public static void Srshr_S(ILEmitterCtx context)
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round);
}
public static void Srshr_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0
&& op.Size < 3)
{
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitLdvectmp();
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorShrImmOpSx(context, ShrImmFlags.Round);
}
}
public static void Srsra_S(ILEmitterCtx context)
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
public static void Srsra_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0
&& op.Size < 3)
{
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithSignedCast(context, op.Rd, op.Size);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitLdvectmp();
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
}
public static void Sshl_V(ILEmitterCtx context)
{
EmitVectorShl(context, signed: true);
}
public static void Sshll_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
}
public static void Sshr_S(ILEmitterCtx context)
{
EmitShrImmOp(context, ShrImmFlags.ScalarSx);
}
public static void Sshr_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0
&& op.Size < 3)
{
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitShrImmOp(context, ShrImmFlags.VectorSx);
}
}
public static void Ssra_S(ILEmitterCtx context)
{
EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate);
}
public static void Ssra_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0
&& op.Size < 3)
{
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithSignedCast(context, op.Rd, op.Size);
EmitLdvecWithSignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithSignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate);
}
}
public static void Uqrshrn_S(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
}
public static void Uqrshrn_V(ILEmitterCtx context)
{
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
public static void Uqshrn_S(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx);
}
public static void Uqshrn_V(ILEmitterCtx context)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx);
}
public static void Urshr_S(ILEmitterCtx context)
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round);
}
public static void Urshr_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
{
Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitLdvectmp();
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorShrImmOpZx(context, ShrImmFlags.Round);
}
}
public static void Ursra_S(ILEmitterCtx context)
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
public static void Ursra_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
{
Type[] typesShs = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
int shift = GetImmShr(op);
int eSize = 8 << op.Size;
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.Emit(OpCodes.Dup);
context.EmitStvectmp();
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitLdvectmp();
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
}
}
public static void Ushl_V(ILEmitterCtx context)
{
EmitVectorShl(context, signed: false);
}
public static void Ushll_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
}
public static void Ushr_S(ILEmitterCtx context)
{
EmitShrImmOp(context, ShrImmFlags.ScalarZx);
}
public static void Ushr_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
{
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitShrImmOp(context, ShrImmFlags.VectorZx);
}
}
public static void Usra_S(ILEmitterCtx context)
{
EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate);
}
public static void Usra_V(ILEmitterCtx context)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
if (Optimizations.UseSse2 && op.Size > 0)
{
Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
context.EmitLdc_I4(GetImmShr(op));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else
{
EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
}
}
private static void EmitVectorShl(ILEmitterCtx context, bool signed)
{
//This instruction shifts the value on vector A by the number of bits
//specified on the signed, lower 8 bits of vector B. If the shift value
//is greater or equal to the data size of each lane, then the result is zero.
//Additionally, negative shifts produces right shifts by the negated shift value.
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int maxShift = 8 << op.Size;
Action emit = () =>
{
ILLabel lblShl = new ILLabel();
ILLabel lblZero = new ILLabel();
ILLabel lblEnd = new ILLabel();
void EmitShift(OpCode ilOp)
{
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(maxShift);
context.Emit(OpCodes.Bge_S, lblZero);
context.Emit(ilOp);
context.Emit(OpCodes.Br_S, lblEnd);
}
context.Emit(OpCodes.Conv_I1);
context.Emit(OpCodes.Dup);
context.EmitLdc_I4(0);
context.Emit(OpCodes.Bge_S, lblShl);
context.Emit(OpCodes.Neg);
EmitShift(signed
? OpCodes.Shr
: OpCodes.Shr_Un);
context.MarkLabel(lblShl);
EmitShift(OpCodes.Shl);
context.MarkLabel(lblZero);
context.Emit(OpCodes.Pop);
context.Emit(OpCodes.Pop);
context.EmitLdc_I8(0);
context.MarkLabel(lblEnd);
};
if (signed)
{
EmitVectorBinaryOpSx(context, emit);
}
else
{
EmitVectorBinaryOpZx(context, emit);
}
}
[Flags]
private enum ShrImmFlags
{
Scalar = 1 << 0,
Signed = 1 << 1,
Round = 1 << 2,
Accumulate = 1 << 3,
ScalarSx = Scalar | Signed,
ScalarZx = Scalar,
VectorSx = Signed,
VectorZx = 0
}
private static void EmitScalarShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
{
EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags);
}
private static void EmitScalarShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
{
EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags);
}
private static void EmitVectorShrImmOpSx(ILEmitterCtx context, ShrImmFlags flags)
{
EmitShrImmOp(context, ShrImmFlags.VectorSx | flags);
}
private static void EmitVectorShrImmOpZx(ILEmitterCtx context, ShrImmFlags flags)
{
EmitShrImmOp(context, ShrImmFlags.VectorZx | flags);
}
private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
bool scalar = (flags & ShrImmFlags.Scalar) != 0;
bool signed = (flags & ShrImmFlags.Signed) != 0;
bool round = (flags & ShrImmFlags.Round) != 0;
bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;
int shift = GetImmShr(op);
long roundConst = 1L << (shift - 1);
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> op.Size : 1;
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, op.Size, signed);
if (op.Size <= 2)
{
if (round)
{
context.EmitLdc_I8(roundConst);
context.Emit(OpCodes.Add);
}
context.EmitLdc_I4(shift);
context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un);
}
else /* if (Op.Size == 3) */
{
EmitShrImm_64(context, signed, round ? roundConst : 0L, shift);
}
if (accumulate)
{
EmitVectorExtract(context, op.Rd, index, op.Size, signed);
context.Emit(OpCodes.Add);
}
EmitVectorInsertTmp(context, index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
int shift = GetImmShr(op);
long roundConst = 1L << (shift - 1);
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
if (part != 0)
{
context.EmitLdvec(op.Rd);
context.EmitStvectmp();
}
for (int index = 0; index < elems; index++)
{
EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);
if (round)
{
context.EmitLdc_I8(roundConst);
context.Emit(OpCodes.Add);
}
context.EmitLdc_I4(shift);
context.Emit(OpCodes.Shr_Un);
EmitVectorInsertTmp(context, part + index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (part == 0)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
[Flags]
private enum ShrImmSaturatingNarrowFlags
{
Scalar = 1 << 0,
SignedSrc = 1 << 1,
SignedDst = 1 << 2,
Round = 1 << 3,
ScalarSxSx = Scalar | SignedSrc | SignedDst,
ScalarSxZx = Scalar | SignedSrc,
ScalarZxZx = Scalar,
VectorSxSx = SignedSrc | SignedDst,
VectorSxZx = SignedSrc,
VectorZxZx = 0
}
private static void EmitRoundShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
{
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
}
private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
int shift = GetImmShr(op);
long roundConst = 1L << (shift - 1);
int elems = !scalar ? 8 >> op.Size : 1;
int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
if (scalar)
{
EmitVectorZeroLowerTmp(context);
}
if (part != 0)
{
context.EmitLdvec(op.Rd);
context.EmitStvectmp();
}
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
if (op.Size <= 1 || !round)
{
if (round)
{
context.EmitLdc_I8(roundConst);
context.Emit(OpCodes.Add);
}
context.EmitLdc_I4(shift);
context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un);
}
else /* if (Op.Size == 2 && Round) */
{
EmitShrImm_64(context, signedSrc, roundConst, shift); // Shift <= 32
}
EmitSatQ(context, op.Size, signedSrc, signedDst);
EmitVectorInsertTmp(context, part + index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (part == 0)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
// Dst_64 = (Int(Src_64, Signed) + RoundConst) >> Shift;
private static void EmitShrImm_64(
ILEmitterCtx context,
bool signed,
long roundConst,
int shift)
{
context.EmitLdc_I8(roundConst);
context.EmitLdc_I4(shift);
SoftFallback.EmitCall(context, signed
? nameof(SoftFallback.SignedShrImm_64)
: nameof(SoftFallback.UnsignedShrImm_64));
}
private static void EmitVectorShImmWidenBinarySx(ILEmitterCtx context, Action emit, int imm)
{
EmitVectorShImmWidenBinaryOp(context, emit, imm, true);
}
private static void EmitVectorShImmWidenBinaryZx(ILEmitterCtx context, Action emit, int imm)
{
EmitVectorShImmWidenBinaryOp(context, emit, imm, false);
}
private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
context.EmitLdc_I4(imm);
emit();
EmitVectorInsertTmp(context, index, op.Size + 1);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
}
}

View file

@ -0,0 +1,138 @@
using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
public static void Hint(ILEmitterCtx context)
{
//Execute as no-op.
}
public static void Isb(ILEmitterCtx context)
{
//Execute as no-op.
}
public static void Mrs(ILEmitterCtx context)
{
OpCodeSystem64 op = (OpCodeSystem64)context.CurrOp;
context.EmitLdarg(TranslatedSub.StateArgIdx);
string propName;
switch (GetPackedId(op))
{
case 0b11_011_0000_0000_001: propName = nameof(CpuThreadState.CtrEl0); break;
case 0b11_011_0000_0000_111: propName = nameof(CpuThreadState.DczidEl0); break;
case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break;
case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break;
case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break;
case 0b11_011_1101_0000_011: propName = nameof(CpuThreadState.Tpidr); break;
case 0b11_011_1110_0000_000: propName = nameof(CpuThreadState.CntfrqEl0); break;
case 0b11_011_1110_0000_001: propName = nameof(CpuThreadState.CntpctEl0); break;
default: throw new NotImplementedException($"Unknown MRS at {op.Position:x16}");
}
context.EmitCallPropGet(typeof(CpuThreadState), propName);
PropertyInfo propInfo = typeof(CpuThreadState).GetProperty(propName);
if (propInfo.PropertyType != typeof(long) &&
propInfo.PropertyType != typeof(ulong))
{
context.Emit(OpCodes.Conv_U8);
}
context.EmitStintzr(op.Rt);
}
public static void Msr(ILEmitterCtx context)
{
OpCodeSystem64 op = (OpCodeSystem64)context.CurrOp;
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitLdintzr(op.Rt);
string propName;
switch (GetPackedId(op))
{
case 0b11_011_0100_0100_000: propName = nameof(CpuThreadState.Fpcr); break;
case 0b11_011_0100_0100_001: propName = nameof(CpuThreadState.Fpsr); break;
case 0b11_011_1101_0000_010: propName = nameof(CpuThreadState.TpidrEl0); break;
default: throw new NotImplementedException($"Unknown MSR at {op.Position:x16}");
}
PropertyInfo propInfo = typeof(CpuThreadState).GetProperty(propName);
if (propInfo.PropertyType != typeof(long) &&
propInfo.PropertyType != typeof(ulong))
{
context.Emit(OpCodes.Conv_U4);
}
context.EmitCallPropSet(typeof(CpuThreadState), propName);
}
public static void Nop(ILEmitterCtx context)
{
//Do nothing.
}
public static void Sys(ILEmitterCtx context)
{
//This instruction is used to do some operations on the CPU like cache invalidation,
//address translation and the like.
//We treat it as no-op here since we don't have any cache being emulated anyway.
OpCodeSystem64 op = (OpCodeSystem64)context.CurrOp;
switch (GetPackedId(op))
{
case 0b11_011_0111_0100_001:
{
//DC ZVA
for (int offs = 0; offs < (4 << CpuThreadState.DczSizeLog2); offs += 8)
{
context.EmitLdarg(TranslatedSub.MemoryArgIdx);
context.EmitLdintzr(op.Rt);
context.EmitLdc_I(offs);
context.Emit(OpCodes.Add);
context.EmitLdc_I8(0);
InstEmitMemoryHelper.EmitWriteCall(context, 3);
}
break;
}
//No-op
case 0b11_011_0111_1110_001: //DC CIVAC
break;
}
}
private static int GetPackedId(OpCodeSystem64 op)
{
int id;
id = op.Op2 << 0;
id |= op.CRm << 3;
id |= op.CRn << 7;
id |= op.Op1 << 11;
id |= op.Op0 << 14;
return id;
}
}
}

View file

@ -0,0 +1,6 @@
using ChocolArm64.Translation;
namespace ChocolArm64.Instructions
{
delegate void InstEmitter(ILEmitterCtx context);
}

View file

@ -0,0 +1,8 @@
using ChocolArm64.Decoders;
using ChocolArm64.Memory;
using ChocolArm64.State;
namespace ChocolArm64.Instructions
{
delegate void InstInterpreter(CpuThreadState state, MemoryManager memory, OpCode64 opCode);
}

View file

@ -0,0 +1,922 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
using static VectorHelper;
static class SoftFallback
{
public static void EmitCall(ILEmitterCtx context, string mthdName)
{
context.EmitCall(typeof(SoftFallback), mthdName);
}
#region "ShrImm_64"
public static long SignedShrImm_64(long value, long roundConst, int shift)
{
if (roundConst == 0L)
{
if (shift <= 63)
{
return value >> shift;
}
else /* if (Shift == 64) */
{
if (value < 0L)
{
return -1L;
}
else
{
return 0L;
}
}
}
else /* if (RoundConst == 1L << (Shift - 1)) */
{
if (shift <= 63)
{
long add = value + roundConst;
if ((~value & (value ^ add)) < 0L)
{
return (long)((ulong)add >> shift);
}
else
{
return add >> shift;
}
}
else /* if (Shift == 64) */
{
return 0L;
}
}
}
public static ulong UnsignedShrImm_64(ulong value, long roundConst, int shift)
{
if (roundConst == 0L)
{
if (shift <= 63)
{
return value >> shift;
}
else /* if (Shift == 64) */
{
return 0UL;
}
}
else /* if (RoundConst == 1L << (Shift - 1)) */
{
ulong add = value + (ulong)roundConst;
if ((add < value) && (add < (ulong)roundConst))
{
if (shift <= 63)
{
return (add >> shift) | (0x8000000000000000UL >> (shift - 1));
}
else /* if (Shift == 64) */
{
return 1UL;
}
}
else
{
if (shift <= 63)
{
return add >> shift;
}
else /* if (Shift == 64) */
{
return 0UL;
}
}
}
}
#endregion
#region "Saturating"
public static long SignedSrcSignedDstSatQ(long op, int size, CpuThreadState state)
{
int eSize = 8 << size;
long tMaxValue = (1L << (eSize - 1)) - 1L;
long tMinValue = -(1L << (eSize - 1));
if (op > tMaxValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMaxValue;
}
else if (op < tMinValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMinValue;
}
else
{
return op;
}
}
public static ulong SignedSrcUnsignedDstSatQ(long op, int size, CpuThreadState state)
{
int eSize = 8 << size;
ulong tMaxValue = (1UL << eSize) - 1UL;
ulong tMinValue = 0UL;
if (op > (long)tMaxValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMaxValue;
}
else if (op < (long)tMinValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMinValue;
}
else
{
return (ulong)op;
}
}
public static long UnsignedSrcSignedDstSatQ(ulong op, int size, CpuThreadState state)
{
int eSize = 8 << size;
long tMaxValue = (1L << (eSize - 1)) - 1L;
if (op > (ulong)tMaxValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMaxValue;
}
else
{
return (long)op;
}
}
public static ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size, CpuThreadState state)
{
int eSize = 8 << size;
ulong tMaxValue = (1UL << eSize) - 1UL;
if (op > tMaxValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return tMaxValue;
}
else
{
return op;
}
}
public static long UnarySignedSatQAbsOrNeg(long op, CpuThreadState state)
{
if (op == long.MinValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return long.MaxValue;
}
else
{
return op;
}
}
public static long BinarySignedSatQAdd(long op1, long op2, CpuThreadState state)
{
long add = op1 + op2;
if ((~(op1 ^ op2) & (op1 ^ add)) < 0L)
{
state.SetFpsrFlag(Fpsr.Qc);
if (op1 < 0L)
{
return long.MinValue;
}
else
{
return long.MaxValue;
}
}
else
{
return add;
}
}
public static ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2, CpuThreadState state)
{
ulong add = op1 + op2;
if ((add < op1) && (add < op2))
{
state.SetFpsrFlag(Fpsr.Qc);
return ulong.MaxValue;
}
else
{
return add;
}
}
public static long BinarySignedSatQSub(long op1, long op2, CpuThreadState state)
{
long sub = op1 - op2;
if (((op1 ^ op2) & (op1 ^ sub)) < 0L)
{
state.SetFpsrFlag(Fpsr.Qc);
if (op1 < 0L)
{
return long.MinValue;
}
else
{
return long.MaxValue;
}
}
else
{
return sub;
}
}
public static ulong BinaryUnsignedSatQSub(ulong op1, ulong op2, CpuThreadState state)
{
ulong sub = op1 - op2;
if (op1 < op2)
{
state.SetFpsrFlag(Fpsr.Qc);
return ulong.MinValue;
}
else
{
return sub;
}
}
public static long BinarySignedSatQAcc(ulong op1, long op2, CpuThreadState state)
{
if (op1 <= (ulong)long.MaxValue)
{
// Op1 from ulong.MinValue to (ulong)long.MaxValue
// Op2 from long.MinValue to long.MaxValue
long add = (long)op1 + op2;
if ((~op2 & add) < 0L)
{
state.SetFpsrFlag(Fpsr.Qc);
return long.MaxValue;
}
else
{
return add;
}
}
else if (op2 >= 0L)
{
// Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
// Op2 from (long)ulong.MinValue to long.MaxValue
state.SetFpsrFlag(Fpsr.Qc);
return long.MaxValue;
}
else
{
// Op1 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
// Op2 from long.MinValue to (long)ulong.MinValue - 1L
ulong add = op1 + (ulong)op2;
if (add > (ulong)long.MaxValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return long.MaxValue;
}
else
{
return (long)add;
}
}
}
public static ulong BinaryUnsignedSatQAcc(long op1, ulong op2, CpuThreadState state)
{
if (op1 >= 0L)
{
// Op1 from (long)ulong.MinValue to long.MaxValue
// Op2 from ulong.MinValue to ulong.MaxValue
ulong add = (ulong)op1 + op2;
if ((add < (ulong)op1) && (add < op2))
{
state.SetFpsrFlag(Fpsr.Qc);
return ulong.MaxValue;
}
else
{
return add;
}
}
else if (op2 > (ulong)long.MaxValue)
{
// Op1 from long.MinValue to (long)ulong.MinValue - 1L
// Op2 from (ulong)long.MaxValue + 1UL to ulong.MaxValue
return (ulong)op1 + op2;
}
else
{
// Op1 from long.MinValue to (long)ulong.MinValue - 1L
// Op2 from ulong.MinValue to (ulong)long.MaxValue
long add = op1 + (long)op2;
if (add < (long)ulong.MinValue)
{
state.SetFpsrFlag(Fpsr.Qc);
return ulong.MinValue;
}
else
{
return (ulong)add;
}
}
}
#endregion
#region "Count"
public static ulong CountLeadingSigns(ulong value, int size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
{
value ^= value >> 1;
int highBit = size - 2;
for (int bit = highBit; bit >= 0; bit--)
{
if (((value >> bit) & 0b1) != 0)
{
return (ulong)(highBit - bit);
}
}
return (ulong)(size - 1);
}
private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
public static ulong CountLeadingZeros(ulong value, int size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
{
if (value == 0ul)
{
return (ulong)size;
}
int nibbleIdx = size;
int preCount, count = 0;
do
{
nibbleIdx -= 4;
preCount = ClzNibbleTbl[(value >> nibbleIdx) & 0b1111];
count += preCount;
}
while (preCount == 4);
return (ulong)count;
}
public static ulong CountSetBits8(ulong value) // "Size" is 8 (SIMD&FP Inst.).
{
if (value == 0xfful)
{
return 8ul;
}
value = ((value >> 1) & 0x55ul) + (value & 0x55ul);
value = ((value >> 2) & 0x33ul) + (value & 0x33ul);
return (value >> 4) + (value & 0x0ful);
}
#endregion
#region "Crc32"
private const uint Crc32RevPoly = 0xedb88320;
private const uint Crc32CRevPoly = 0x82f63b78;
public static uint Crc32B(uint crc, byte val) => Crc32 (crc, Crc32RevPoly, val);
public static uint Crc32H(uint crc, ushort val) => Crc32H(crc, Crc32RevPoly, val);
public static uint Crc32W(uint crc, uint val) => Crc32W(crc, Crc32RevPoly, val);
public static uint Crc32X(uint crc, ulong val) => Crc32X(crc, Crc32RevPoly, val);
public static uint Crc32Cb(uint crc, byte val) => Crc32 (crc, Crc32CRevPoly, val);
public static uint Crc32Ch(uint crc, ushort val) => Crc32H(crc, Crc32CRevPoly, val);
public static uint Crc32Cw(uint crc, uint val) => Crc32W(crc, Crc32CRevPoly, val);
public static uint Crc32Cx(uint crc, ulong val) => Crc32X(crc, Crc32CRevPoly, val);
private static uint Crc32H(uint crc, uint poly, ushort val)
{
crc = Crc32(crc, poly, (byte)(val >> 0));
crc = Crc32(crc, poly, (byte)(val >> 8));
return crc;
}
private static uint Crc32W(uint crc, uint poly, uint val)
{
crc = Crc32(crc, poly, (byte)(val >> 0 ));
crc = Crc32(crc, poly, (byte)(val >> 8 ));
crc = Crc32(crc, poly, (byte)(val >> 16));
crc = Crc32(crc, poly, (byte)(val >> 24));
return crc;
}
private static uint Crc32X(uint crc, uint poly, ulong val)
{
crc = Crc32(crc, poly, (byte)(val >> 0 ));
crc = Crc32(crc, poly, (byte)(val >> 8 ));
crc = Crc32(crc, poly, (byte)(val >> 16));
crc = Crc32(crc, poly, (byte)(val >> 24));
crc = Crc32(crc, poly, (byte)(val >> 32));
crc = Crc32(crc, poly, (byte)(val >> 40));
crc = Crc32(crc, poly, (byte)(val >> 48));
crc = Crc32(crc, poly, (byte)(val >> 56));
return crc;
}
private static uint Crc32(uint crc, uint poly, byte val)
{
crc ^= val;
for (int bit = 7; bit >= 0; bit--)
{
uint mask = (uint)(-(int)(crc & 1));
crc = (crc >> 1) ^ (poly & mask);
}
return crc;
}
#endregion
#region "Aes"
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Decrypt(Vector128<float> value, Vector128<float> roundKey)
{
if (!Sse.IsSupported)
{
throw new PlatformNotSupportedException();
}
return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(Sse.Xor(value, roundKey)));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Encrypt(Vector128<float> value, Vector128<float> roundKey)
{
if (!Sse.IsSupported)
{
throw new PlatformNotSupportedException();
}
return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(Sse.Xor(value, roundKey)));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> InverseMixColumns(Vector128<float> value)
{
return CryptoHelper.AesInvMixColumns(value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> MixColumns(Vector128<float> value)
{
return CryptoHelper.AesMixColumns(value);
}
#endregion
#region "Sha1"
public static Vector128<float> HashChoose(Vector128<float> hashAbcd, uint hashE, Vector128<float> wk)
{
for (int e = 0; e <= 3; e++)
{
uint t = ShaChoose((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2),
(uint)VectorExtractIntZx(hashAbcd, (byte)2, 2),
(uint)VectorExtractIntZx(hashAbcd, (byte)3, 2));
hashE += Rol((uint)VectorExtractIntZx(hashAbcd, (byte)0, 2), 5) + t;
hashE += (uint)VectorExtractIntZx(wk, (byte)e, 2);
t = Rol((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), 30);
hashAbcd = VectorInsertInt((ulong)t, hashAbcd, (byte)1, 2);
Rol32_160(ref hashE, ref hashAbcd);
}
return hashAbcd;
}
public static uint FixedRotate(uint hashE)
{
return hashE.Rol(30);
}
public static Vector128<float> HashMajority(Vector128<float> hashAbcd, uint hashE, Vector128<float> wk)
{
for (int e = 0; e <= 3; e++)
{
uint t = ShaMajority((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2),
(uint)VectorExtractIntZx(hashAbcd, (byte)2, 2),
(uint)VectorExtractIntZx(hashAbcd, (byte)3, 2));
hashE += Rol((uint)VectorExtractIntZx(hashAbcd, (byte)0, 2), 5) + t;
hashE += (uint)VectorExtractIntZx(wk, (byte)e, 2);
t = Rol((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), 30);
hashAbcd = VectorInsertInt((ulong)t, hashAbcd, (byte)1, 2);
Rol32_160(ref hashE, ref hashAbcd);
}
return hashAbcd;
}
public static Vector128<float> HashParity(Vector128<float> hashAbcd, uint hashE, Vector128<float> wk)
{
for (int e = 0; e <= 3; e++)
{
uint t = ShaParity((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2),
(uint)VectorExtractIntZx(hashAbcd, (byte)2, 2),
(uint)VectorExtractIntZx(hashAbcd, (byte)3, 2));
hashE += Rol((uint)VectorExtractIntZx(hashAbcd, (byte)0, 2), 5) + t;
hashE += (uint)VectorExtractIntZx(wk, (byte)e, 2);
t = Rol((uint)VectorExtractIntZx(hashAbcd, (byte)1, 2), 30);
hashAbcd = VectorInsertInt((ulong)t, hashAbcd, (byte)1, 2);
Rol32_160(ref hashE, ref hashAbcd);
}
return hashAbcd;
}
public static Vector128<float> Sha1SchedulePart1(Vector128<float> w03, Vector128<float> w47, Vector128<float> w811)
{
if (!Sse.IsSupported)
{
throw new PlatformNotSupportedException();
}
Vector128<float> result = new Vector128<float>();
ulong t2 = VectorExtractIntZx(w47, (byte)0, 3);
ulong t1 = VectorExtractIntZx(w03, (byte)1, 3);
result = VectorInsertInt((ulong)t1, result, (byte)0, 3);
result = VectorInsertInt((ulong)t2, result, (byte)1, 3);
return Sse.Xor(result, Sse.Xor(w03, w811));
}
public static Vector128<float> Sha1SchedulePart2(Vector128<float> tw03, Vector128<float> w1215)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
Vector128<float> result = new Vector128<float>();
Vector128<float> t = Sse.Xor(tw03, Sse.StaticCast<uint, float>(
Sse2.ShiftRightLogical128BitLane(Sse.StaticCast<float, uint>(w1215), (byte)4)));
uint tE0 = (uint)VectorExtractIntZx(t, (byte)0, 2);
uint tE1 = (uint)VectorExtractIntZx(t, (byte)1, 2);
uint tE2 = (uint)VectorExtractIntZx(t, (byte)2, 2);
uint tE3 = (uint)VectorExtractIntZx(t, (byte)3, 2);
result = VectorInsertInt((ulong)tE0.Rol(1), result, (byte)0, 2);
result = VectorInsertInt((ulong)tE1.Rol(1), result, (byte)1, 2);
result = VectorInsertInt((ulong)tE2.Rol(1), result, (byte)2, 2);
return VectorInsertInt((ulong)(tE3.Rol(1) ^ tE0.Rol(2)), result, (byte)3, 2);
}
private static void Rol32_160(ref uint y, ref Vector128<float> x)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2);
x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4));
x = VectorInsertInt((ulong)y, x, (byte)0, 2);
y = xE3;
}
private static uint ShaChoose(uint x, uint y, uint z)
{
return ((y ^ z) & x) ^ z;
}
private static uint ShaMajority(uint x, uint y, uint z)
{
return (x & y) | ((x | y) & z);
}
private static uint ShaParity(uint x, uint y, uint z)
{
return x ^ y ^ z;
}
private static uint Rol(this uint value, int count)
{
return (value << count) | (value >> (32 - count));
}
#endregion
#region "Sha256"
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> HashLower(Vector128<float> hashAbcd, Vector128<float> hashEfgh, Vector128<float> wk)
{
return Sha256Hash(hashAbcd, hashEfgh, wk, true);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> HashUpper(Vector128<float> hashEfgh, Vector128<float> hashAbcd, Vector128<float> wk)
{
return Sha256Hash(hashAbcd, hashEfgh, wk, false);
}
public static Vector128<float> Sha256SchedulePart1(Vector128<float> w03, Vector128<float> w47)
{
Vector128<float> result = new Vector128<float>();
for (int e = 0; e <= 3; e++)
{
uint elt = (uint)VectorExtractIntZx(e <= 2 ? w03 : w47, (byte)(e <= 2 ? e + 1 : 0), 2);
elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3);
elt += (uint)VectorExtractIntZx(w03, (byte)e, 2);
result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
}
return result;
}
public static Vector128<float> Sha256SchedulePart2(Vector128<float> w03, Vector128<float> w811, Vector128<float> w1215)
{
Vector128<float> result = new Vector128<float>();
ulong t1 = VectorExtractIntZx(w1215, (byte)1, 3);
for (int e = 0; e <= 1; e++)
{
uint elt = t1.ULongPart(e);
elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
elt += (uint)VectorExtractIntZx(w03, (byte)e, 2);
elt += (uint)VectorExtractIntZx(w811, (byte)(e + 1), 2);
result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
}
t1 = VectorExtractIntZx(result, (byte)0, 3);
for (int e = 2; e <= 3; e++)
{
uint elt = t1.ULongPart(e - 2);
elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10);
elt += (uint)VectorExtractIntZx(w03, (byte)e, 2);
elt += (uint)VectorExtractIntZx(e == 2 ? w811 : w1215, (byte)(e == 2 ? 3 : 0), 2);
result = VectorInsertInt((ulong)elt, result, (byte)e, 2);
}
return result;
}
private static Vector128<float> Sha256Hash(Vector128<float> x, Vector128<float> y, Vector128<float> w, bool part1)
{
for (int e = 0; e <= 3; e++)
{
uint chs = ShaChoose((uint)VectorExtractIntZx(y, (byte)0, 2),
(uint)VectorExtractIntZx(y, (byte)1, 2),
(uint)VectorExtractIntZx(y, (byte)2, 2));
uint maj = ShaMajority((uint)VectorExtractIntZx(x, (byte)0, 2),
(uint)VectorExtractIntZx(x, (byte)1, 2),
(uint)VectorExtractIntZx(x, (byte)2, 2));
uint t1 = (uint)VectorExtractIntZx(y, (byte)3, 2);
t1 += ShaHashSigma1((uint)VectorExtractIntZx(y, (byte)0, 2)) + chs;
t1 += (uint)VectorExtractIntZx(w, (byte)e, 2);
uint t2 = t1 + (uint)VectorExtractIntZx(x, (byte)3, 2);
x = VectorInsertInt((ulong)t2, x, (byte)3, 2);
t2 = t1 + ShaHashSigma0((uint)VectorExtractIntZx(x, (byte)0, 2)) + maj;
y = VectorInsertInt((ulong)t2, y, (byte)3, 2);
Rol32_256(ref y, ref x);
}
return part1 ? x : y;
}
private static void Rol32_256(ref Vector128<float> y, ref Vector128<float> x)
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
uint yE3 = (uint)VectorExtractIntZx(y, (byte)3, 2);
uint xE3 = (uint)VectorExtractIntZx(x, (byte)3, 2);
y = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(y), (byte)4));
x = Sse.StaticCast<uint, float>(Sse2.ShiftLeftLogical128BitLane(Sse.StaticCast<float, uint>(x), (byte)4));
y = VectorInsertInt((ulong)xE3, y, (byte)0, 2);
x = VectorInsertInt((ulong)yE3, x, (byte)0, 2);
}
private static uint ShaHashSigma0(uint x)
{
return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22);
}
private static uint ShaHashSigma1(uint x)
{
return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25);
}
private static uint Ror(this uint value, int count)
{
return (value >> count) | (value << (32 - count));
}
private static uint Lsr(this uint value, int count)
{
return value >> count;
}
private static uint ULongPart(this ulong value, int part)
{
return part == 0
? (uint)(value & 0xFFFFFFFFUL)
: (uint)(value >> 32);
}
#endregion
#region "Reverse"
public static uint ReverseBits8(uint value)
{
value = ((value & 0xaa) >> 1) | ((value & 0x55) << 1);
value = ((value & 0xcc) >> 2) | ((value & 0x33) << 2);
return (value >> 4) | ((value & 0x0f) << 4);
}
public static uint ReverseBits32(uint value)
{
value = ((value & 0xaaaaaaaa) >> 1) | ((value & 0x55555555) << 1);
value = ((value & 0xcccccccc) >> 2) | ((value & 0x33333333) << 2);
value = ((value & 0xf0f0f0f0) >> 4) | ((value & 0x0f0f0f0f) << 4);
value = ((value & 0xff00ff00) >> 8) | ((value & 0x00ff00ff) << 8);
return (value >> 16) | (value << 16);
}
public static ulong ReverseBits64(ulong value)
{
value = ((value & 0xaaaaaaaaaaaaaaaa) >> 1 ) | ((value & 0x5555555555555555) << 1 );
value = ((value & 0xcccccccccccccccc) >> 2 ) | ((value & 0x3333333333333333) << 2 );
value = ((value & 0xf0f0f0f0f0f0f0f0) >> 4 ) | ((value & 0x0f0f0f0f0f0f0f0f) << 4 );
value = ((value & 0xff00ff00ff00ff00) >> 8 ) | ((value & 0x00ff00ff00ff00ff) << 8 );
value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
return (value >> 32) | (value << 32);
}
public static uint ReverseBytes16_32(uint value) => (uint)ReverseBytes16_64(value);
public static uint ReverseBytes32_32(uint value) => (uint)ReverseBytes32_64(value);
public static ulong ReverseBytes16_64(ulong value) => ReverseBytes(value, RevSize.Rev16);
public static ulong ReverseBytes32_64(ulong value) => ReverseBytes(value, RevSize.Rev32);
public static ulong ReverseBytes64(ulong value) => ReverseBytes(value, RevSize.Rev64);
private enum RevSize
{
Rev16,
Rev32,
Rev64
}
private static ulong ReverseBytes(ulong value, RevSize size)
{
value = ((value & 0xff00ff00ff00ff00) >> 8) | ((value & 0x00ff00ff00ff00ff) << 8);
if (size == RevSize.Rev16)
{
return value;
}
value = ((value & 0xffff0000ffff0000) >> 16) | ((value & 0x0000ffff0000ffff) << 16);
if (size == RevSize.Rev32)
{
return value;
}
value = ((value & 0xffffffff00000000) >> 32) | ((value & 0x00000000ffffffff) << 32);
if (size == RevSize.Rev64)
{
return value;
}
throw new ArgumentException(nameof(size));
}
#endregion
#region "MultiplyHigh"
public static long SMulHi128(long left, long right)
{
long result = (long)UMulHi128((ulong)left, (ulong)right);
if (left < 0)
{
result -= right;
}
if (right < 0)
{
result -= left;
}
return result;
}
public static ulong UMulHi128(ulong left, ulong right)
{
ulong lHigh = left >> 32;
ulong lLow = left & 0xFFFFFFFF;
ulong rHigh = right >> 32;
ulong rLow = right & 0xFFFFFFFF;
ulong z2 = lLow * rLow;
ulong t = lHigh * rLow + (z2 >> 32);
ulong z1 = t & 0xFFFFFFFF;
ulong z0 = t >> 32;
z1 += lLow * rHigh;
return lHigh * rHigh + z0 + (z1 >> 32);
}
#endregion
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,790 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
static class VectorHelper
{
private static readonly Vector128<float> Zero32128Mask;
static VectorHelper()
{
if (!Sse2.IsSupported)
{
throw new PlatformNotSupportedException();
}
Zero32128Mask = Sse.StaticCast<uint, float>(Sse2.SetVector128(0, 0, 0, 0xffffffff));
}
public static void EmitCall(ILEmitterCtx context, string name64, string name128)
{
bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64;
context.EmitCall(typeof(VectorHelper), isSimd64 ? name64 : name128);
}
public static void EmitCall(ILEmitterCtx context, string mthdName)
{
context.EmitCall(typeof(VectorHelper), mthdName);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float value)
{
if (float.IsNaN(value)) return 0;
return value > int.MaxValue ? int.MaxValue :
value < int.MinValue ? int.MinValue : (int)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float value)
{
if (float.IsNaN(value)) return 0;
return value > long.MaxValue ? long.MaxValue :
value < long.MinValue ? long.MinValue : (long)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float value)
{
if (float.IsNaN(value)) return 0;
return value > uint.MaxValue ? uint.MaxValue :
value < uint.MinValue ? uint.MinValue : (uint)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float value)
{
if (float.IsNaN(value)) return 0;
return value > ulong.MaxValue ? ulong.MaxValue :
value < ulong.MinValue ? ulong.MinValue : (ulong)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double value)
{
if (double.IsNaN(value)) return 0;
return value > int.MaxValue ? int.MaxValue :
value < int.MinValue ? int.MinValue : (int)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double value)
{
if (double.IsNaN(value)) return 0;
return value > long.MaxValue ? long.MaxValue :
value < long.MinValue ? long.MinValue : (long)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double value)
{
if (double.IsNaN(value)) return 0;
return value > uint.MaxValue ? uint.MaxValue :
value < uint.MinValue ? uint.MinValue : (uint)value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double value)
{
if (double.IsNaN(value)) return 0;
return value > ulong.MaxValue ? ulong.MaxValue :
value < ulong.MinValue ? ulong.MinValue : (ulong)value;
}
public static double Round(double value, CpuThreadState state)
{
switch (state.FPRoundingMode())
{
case RoundMode.ToNearest: return Math.Round (value);
case RoundMode.TowardsPlusInfinity: return Math.Ceiling (value);
case RoundMode.TowardsMinusInfinity: return Math.Floor (value);
case RoundMode.TowardsZero: return Math.Truncate(value);
}
throw new InvalidOperationException();
}
public static float RoundF(float value, CpuThreadState state)
{
switch (state.FPRoundingMode())
{
case RoundMode.ToNearest: return MathF.Round (value);
case RoundMode.TowardsPlusInfinity: return MathF.Ceiling (value);
case RoundMode.TowardsMinusInfinity: return MathF.Floor (value);
case RoundMode.TowardsZero: return MathF.Truncate(value);
}
throw new InvalidOperationException();
}
public static Vector128<float> Tbl1_V64(
Vector128<float> vector,
Vector128<float> tb0)
{
return Tbl(vector, 8, tb0);
}
public static Vector128<float> Tbl1_V128(
Vector128<float> vector,
Vector128<float> tb0)
{
return Tbl(vector, 16, tb0);
}
public static Vector128<float> Tbl2_V64(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1)
{
return Tbl(vector, 8, tb0, tb1);
}
public static Vector128<float> Tbl2_V128(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1)
{
return Tbl(vector, 16, tb0, tb1);
}
public static Vector128<float> Tbl3_V64(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2)
{
return Tbl(vector, 8, tb0, tb1, tb2);
}
public static Vector128<float> Tbl3_V128(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2)
{
return Tbl(vector, 16, tb0, tb1, tb2);
}
public static Vector128<float> Tbl4_V64(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2,
Vector128<float> tb3)
{
return Tbl(vector, 8, tb0, tb1, tb2, tb3);
}
public static Vector128<float> Tbl4_V128(
Vector128<float> vector,
Vector128<float> tb0,
Vector128<float> tb1,
Vector128<float> tb2,
Vector128<float> tb3)
{
return Tbl(vector, 16, tb0, tb1, tb2, tb3);
}
private static Vector128<float> Tbl(Vector128<float> vector, int bytes, params Vector128<float>[] tb)
{
Vector128<float> res = new Vector128<float>();
byte[] table = new byte[tb.Length * 16];
for (byte index = 0; index < tb.Length; index++)
for (byte index2 = 0; index2 < 16; index2++)
{
table[index * 16 + index2] = (byte)VectorExtractIntZx(tb[index], index2, 0);
}
for (byte index = 0; index < bytes; index++)
{
byte tblIdx = (byte)VectorExtractIntZx(vector, index, 0);
if (tblIdx < table.Length)
{
res = VectorInsertInt(table[tblIdx], res, index, 0);
}
}
return res;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double VectorExtractDouble(Vector128<float> vector, byte index)
{
if (Sse41.IsSupported)
{
return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast<float, long>(vector), index));
}
else if (Sse2.IsSupported)
{
return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(vector, index, 3));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long VectorExtractIntSx(Vector128<float> vector, byte index, int size)
{
if (Sse41.IsSupported)
{
if (size == 0)
{
return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(vector), index);
}
else if (size == 1)
{
return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index);
}
else if (size == 2)
{
return Sse41.Extract(Sse.StaticCast<float, int>(vector), index);
}
else if (size == 3)
{
return Sse41.Extract(Sse.StaticCast<float, long>(vector), index);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
else if (Sse2.IsSupported)
{
if (size == 0)
{
return (sbyte)VectorExtractIntZx(vector, index, size);
}
else if (size == 1)
{
return (short)VectorExtractIntZx(vector, index, size);
}
else if (size == 2)
{
return (int)VectorExtractIntZx(vector, index, size);
}
else if (size == 3)
{
return (long)VectorExtractIntZx(vector, index, size);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong VectorExtractIntZx(Vector128<float> vector, byte index, int size)
{
if (Sse41.IsSupported)
{
if (size == 0)
{
return Sse41.Extract(Sse.StaticCast<float, byte>(vector), index);
}
else if (size == 1)
{
return Sse2.Extract(Sse.StaticCast<float, ushort>(vector), index);
}
else if (size == 2)
{
return Sse41.Extract(Sse.StaticCast<float, uint>(vector), index);
}
else if (size == 3)
{
return Sse41.Extract(Sse.StaticCast<float, ulong>(vector), index);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
else if (Sse2.IsSupported)
{
int shortIdx = size == 0
? index >> 1
: index << (size - 1);
ushort value = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx);
if (size == 0)
{
return (byte)(value >> (index & 1) * 8);
}
else if (size == 1)
{
return value;
}
else if (size == 2 || size == 3)
{
ushort value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 1));
if (size == 2)
{
return (uint)(value | (value1 << 16));
}
ushort value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 2));
ushort value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)(shortIdx + 3));
return ((ulong)value << 0) |
((ulong)value1 << 16) |
((ulong)value2 << 32) |
((ulong)value3 << 48);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float VectorExtractSingle(Vector128<float> vector, byte index)
{
if (Sse41.IsSupported)
{
return Sse41.Extract(vector, index);
}
else if (Sse2.IsSupported)
{
Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
int low = Sse2.Extract(shortVector, (byte)(index * 2 + 0));
int high = Sse2.Extract(shortVector, (byte)(index * 2 + 1));
return BitConverter.Int32BitsToSingle(low | (high << 16));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertDouble(double value, Vector128<float> vector, byte index)
{
return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(value), vector, index, 3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertInt(ulong value, Vector128<float> vector, byte index, int size)
{
if (Sse41.IsSupported)
{
if (size == 0)
{
return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(vector), (byte)value, index));
}
else if (size == 1)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index));
}
else if (size == 2)
{
return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(vector), (uint)value, index));
}
else if (size == 3)
{
return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(vector), value, index));
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
else if (Sse2.IsSupported)
{
Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
int shortIdx = size == 0
? index >> 1
: index << (size - 1);
if (size == 0)
{
ushort shortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(vector), (byte)shortIdx);
int shift = (index & 1) * 8;
shortVal &= (ushort)(0xff00 >> shift);
shortVal |= (ushort)((byte)value << shift);
return Sse.StaticCast<ushort, float>(Sse2.Insert(shortVector, shortVal, (byte)shortIdx));
}
else if (size == 1)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(vector), (ushort)value, index));
}
else if (size == 2 || size == 3)
{
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 0), (byte)(shortIdx + 0));
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 16), (byte)(shortIdx + 1));
if (size == 3)
{
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 32), (byte)(shortIdx + 2));
shortVector = Sse2.Insert(shortVector, (ushort)(value >> 48), (byte)(shortIdx + 3));
}
return Sse.StaticCast<ushort, float>(shortVector);
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertSingle(float value, Vector128<float> vector, byte index)
{
if (Sse41.IsSupported)
{
//Note: The if/else if is necessary to enable the JIT to
//produce a single INSERTPS instruction instead of the
//jump table fallback.
if (index == 0)
{
return Sse41.Insert(vector, value, 0x00);
}
else if (index == 1)
{
return Sse41.Insert(vector, value, 0x10);
}
else if (index == 2)
{
return Sse41.Insert(vector, value, 0x20);
}
else if (index == 3)
{
return Sse41.Insert(vector, value, 0x30);
}
else
{
throw new ArgumentOutOfRangeException(nameof(index));
}
}
else if (Sse2.IsSupported)
{
int intValue = BitConverter.SingleToInt32Bits(value);
ushort low = (ushort)(intValue >> 0);
ushort high = (ushort)(intValue >> 16);
Vector128<ushort> shortVector = Sse.StaticCast<float, ushort>(vector);
shortVector = Sse2.Insert(shortVector, low, (byte)(index * 2 + 0));
shortVector = Sse2.Insert(shortVector, high, (byte)(index * 2 + 1));
return Sse.StaticCast<ushort, float>(shortVector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> Sse41VectorInsertScalarSingle(float value, Vector128<float> vector)
{
//Note: 0b1110 is the mask to zero the upper bits.
return Sse41.Insert(vector, value, 0b1110);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSByteZero()
{
if (Sse2.IsSupported)
{
return Sse2.SetZeroVector128<sbyte>();
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> VectorInt16Zero()
{
if (Sse2.IsSupported)
{
return Sse2.SetZeroVector128<short>();
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> VectorInt32Zero()
{
if (Sse2.IsSupported)
{
return Sse2.SetZeroVector128<int>();
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> VectorInt64Zero()
{
if (Sse2.IsSupported)
{
return Sse2.SetZeroVector128<long>();
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorSingleZero()
{
if (Sse.IsSupported)
{
return Sse.SetZeroVector128();
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorDoubleZero()
{
if (Sse2.IsSupported)
{
return Sse2.SetZeroVector128<double>();
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorZero32_128(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.And(vector, Zero32128Mask);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, sbyte>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> VectorSingleToInt16(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, short>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> VectorSingleToInt32(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, int>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> VectorSingleToInt64(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, long>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> VectorSingleToByte(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, byte>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<ushort> VectorSingleToUInt16(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, ushort>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<uint> VectorSingleToUInt32(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, uint>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<ulong> VectorSingleToUInt64(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, ulong>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorSingleToDouble(Vector128<float> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, double>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<sbyte, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt16ToSingle(Vector128<short> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<short, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt32ToSingle(Vector128<int> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<int, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt64ToSingle(Vector128<long> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<long, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorByteToSingle(Vector128<byte> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<byte, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorUInt16ToSingle(Vector128<ushort> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<ushort, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorUInt32ToSingle(Vector128<uint> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<uint, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorUInt64ToSingle(Vector128<ulong> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<ulong, float>(vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorDoubleToSingle(Vector128<double> vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<double, float>(vector);
}
throw new PlatformNotSupportedException();
}
}
}