ryujinx/ChocolArm64/State/CpuThreadState.cs
gdkchan a731ab3a2a Add a new JIT compiler for CPU code (#693)
* Start of the ARMeilleure project

* Refactoring around the old IRAdapter, now renamed to PreAllocator

* Optimize the LowestBitSet method

* Add CLZ support and fix CLS implementation

* Add missing Equals and GetHashCode overrides on some structs, misc small tweaks

* Implement the ByteSwap IR instruction, and some refactoring on the assembler

* Implement the DivideUI IR instruction and fix 64-bits IDIV

* Correct constant operand type on CSINC

* Move division instructions implementation to InstEmitDiv

* Fix destination type for the ConditionalSelect IR instruction

* Implement UMULH and SMULH, with new IR instructions

* Fix some issues with shift instructions

* Fix constant types for BFM instructions

* Fix up new tests using the new V128 struct

* Update tests

* Move DIV tests to a separate file

* Add support for calls, and some instructions that depends on them

* Start adding support for SIMD & FP types, along with some of the related ARM instructions

* Fix some typos and the divide instruction with FP operands

* Fix wrong method call on Clz_V

* Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes

* Implement SIMD logical instructions and more misc. fixes

* Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations

* Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes

* Implement SIMD shift instruction and fix Dup_V

* Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table

* Fix check with tolerance on tester

* Implement FP & SIMD comparison instructions, and some fixes

* Update FCVT (Scalar) encoding on the table to support the Half-float variants

* Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes

* Use old memory access methods, made a start on SIMD memory insts support, some fixes

* Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes

* Fix arguments count with struct return values, other fixes

* More instructions

* Misc. fixes and integrate LDj3SNuD fixes

* Update tests

* Add a faster linear scan allocator, unwinding support on windows, and other changes

* Update Ryujinx.HLE

* Update Ryujinx.Graphics

* Fix V128 return pointer passing, RCX is clobbered

* Update Ryujinx.Tests

* Update ITimeZoneService

* Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks

* Use generic GetFunctionPointerForDelegate method and other tweaks

* Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics

* Remove some unused code on the assembler

* Fix REX.W prefix regression on float conversion instructions, add some sort of profiler

* Add hardware capability detection

* Fix regression on Sha1h and revert Fcm** changes

* Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator

* Fix silly mistake introduced on last commit on CpuId

* Generate inline stack probes when the stack allocation is too large

* Initial support for the System-V ABI

* Support multiple destination operands

* Fix SSE2 VectorInsert8 path, and other fixes

* Change placement of XMM callee save and restore code to match other compilers

* Rename Dest to Destination and Inst to Instruction

* Fix a regression related to calls and the V128 type

* Add an extra space on comments to match code style

* Some refactoring

* Fix vector insert FP32 SSE2 path

* Port over the ARM32 instructions

* Avoid memory protection races on JIT Cache

* Another fix on VectorInsert FP32 (thanks to LDj3SNuD

* Float operands don't need to use the same register when VEX is supported

* Add a new register allocator, higher quality code for hot code (tier up), and other tweaks

* Some nits, small improvements on the pre allocator

* CpuThreadState is gone

* Allow changing CPU emulators with a config entry

* Add runtime identifiers on the ARMeilleure project

* Allow switching between CPUs through a config entry (pt. 2)

* Change win10-x64 to win-x64 on projects

* Update the Ryujinx project to use ARMeilleure

* Ensure that the selected register is valid on the hybrid allocator

* Allow exiting on returns to 0 (should fix test regression)

* Remove register assignments for most used variables on the hybrid allocator

* Do not use fixed registers as spill temp

* Add missing namespace and remove unneeded using

* Address PR feedback

* Fix types, etc

* Enable AssumeStrictAbiCompliance by default

* Ensure that Spill and Fill don't load or store any more than necessary
2019-08-08 21:56:22 +03:00

354 lines
13 KiB
C#

using ChocolArm64.Translation;
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using static ChocolArm64.Instructions.VectorHelper;
namespace ChocolArm64.State
{
public class CpuThreadState : ARMeilleure.State.IExecutionContext
{
private const int MinCountForCheck = 40000;
internal const int ErgSizeLog2 = 4;
internal const int DczSizeLog2 = 4;
public ulong X0, X1, X2, X3, X4, X5, X6, X7,
X8, X9, X10, X11, X12, X13, X14, X15,
X16, X17, X18, X19, X20, X21, X22, X23,
X24, X25, X26, X27, X28, X29, X30, X31;
public Vector128<float> V0, V1, V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13, V14, V15,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30, V31;
public bool IsAarch32 { get; set; }
public bool Thumb;
public bool BigEndian;
public bool Overflow;
public bool Carry;
public bool Zero;
public bool Negative;
public int ElrHyp;
public bool Running { get; set; }
private bool _interrupted;
private int _syncCount;
public long TpidrEl0 { get; set; }
public long Tpidr { get; set; }
public int CFpcr { get; set; }
public int CFpsr { get; set; }
public ARMeilleure.State.FPCR Fpcr
{
get => (ARMeilleure.State.FPCR)CFpcr;
set => CFpcr = (int)value;
}
public ARMeilleure.State.FPSR Fpsr
{
get => (ARMeilleure.State.FPSR)CFpsr;
set => CFpsr = (int)value;
}
public int Psr
{
get
{
return (Negative ? (int)PState.NMask : 0) |
(Zero ? (int)PState.ZMask : 0) |
(Carry ? (int)PState.CMask : 0) |
(Overflow ? (int)PState.VMask : 0);
}
}
public uint CtrEl0 => 0x8444c004;
public uint DczidEl0 => 0x00000004;
public ulong CntfrqEl0 { get; set; }
public ulong CntpctEl0
{
get
{
double ticks = _tickCounter.ElapsedTicks * _hostTickFreq;
return (ulong)(ticks * CntfrqEl0);
}
}
public event EventHandler<EventArgs> Interrupt;
public event EventHandler<ARMeilleure.State.InstExceptionEventArgs> Break;
public event EventHandler<ARMeilleure.State.InstExceptionEventArgs> SupervisorCall;
public event EventHandler<ARMeilleure.State.InstUndefinedEventArgs> Undefined;
private static Stopwatch _tickCounter;
private static double _hostTickFreq;
internal Translator CurrentTranslator;
private ulong _exclusiveAddress;
internal ulong ExclusiveValueLow { get; set; }
internal ulong ExclusiveValueHigh { get; set; }
public CpuThreadState()
{
ClearExclusiveAddress();
Running = true;
}
static CpuThreadState()
{
_hostTickFreq = 1.0 / Stopwatch.Frequency;
_tickCounter = new Stopwatch();
_tickCounter.Start();
}
internal void SetExclusiveAddress(ulong address)
{
_exclusiveAddress = GetMaskedExclusiveAddress(address);
}
internal bool CheckExclusiveAddress(ulong address)
{
return GetMaskedExclusiveAddress(address) == _exclusiveAddress;
}
internal void ClearExclusiveAddress()
{
_exclusiveAddress = ulong.MaxValue;
}
private ulong GetMaskedExclusiveAddress(ulong address)
{
return address & ~((4UL << ErgSizeLog2) - 1);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool Synchronize()
{
// Firing a interrupt frequently is expensive, so we only
// do it after a given number of instructions has executed.
_syncCount++;
if (_syncCount >= MinCountForCheck)
{
CheckInterrupt();
}
return Running;
}
[MethodImpl(MethodImplOptions.NoInlining)]
private void CheckInterrupt()
{
_syncCount = 0;
if (_interrupted)
{
_interrupted = false;
Interrupt?.Invoke(this, EventArgs.Empty);
}
}
public ulong GetX(int index)
{
switch (index)
{
case 0: return X0;
case 1: return X1;
case 2: return X2;
case 3: return X3;
case 4: return X4;
case 5: return X5;
case 6: return X6;
case 7: return X7;
case 8: return X8;
case 9: return X9;
case 10: return X10;
case 11: return X11;
case 12: return X12;
case 13: return X13;
case 14: return X14;
case 15: return X15;
case 16: return X16;
case 17: return X17;
case 18: return X18;
case 19: return X19;
case 20: return X20;
case 21: return X21;
case 22: return X22;
case 23: return X23;
case 24: return X24;
case 25: return X25;
case 26: return X26;
case 27: return X27;
case 28: return X28;
case 29: return X29;
case 30: return X30;
case 31: return X31;
default: throw new ArgumentOutOfRangeException(nameof(index));
}
}
public void SetX(int index, ulong value)
{
switch (index)
{
case 0: X0 = value; break;
case 1: X1 = value; break;
case 2: X2 = value; break;
case 3: X3 = value; break;
case 4: X4 = value; break;
case 5: X5 = value; break;
case 6: X6 = value; break;
case 7: X7 = value; break;
case 8: X8 = value; break;
case 9: X9 = value; break;
case 10: X10 = value; break;
case 11: X11 = value; break;
case 12: X12 = value; break;
case 13: X13 = value; break;
case 14: X14 = value; break;
case 15: X15 = value; break;
case 16: X16 = value; break;
case 17: X17 = value; break;
case 18: X18 = value; break;
case 19: X19 = value; break;
case 20: X20 = value; break;
case 21: X21 = value; break;
case 22: X22 = value; break;
case 23: X23 = value; break;
case 24: X24 = value; break;
case 25: X25 = value; break;
case 26: X26 = value; break;
case 27: X27 = value; break;
case 28: X28 = value; break;
case 29: X29 = value; break;
case 30: X30 = value; break;
case 31: X31 = value; break;
default: throw new ArgumentOutOfRangeException(nameof(index));
}
}
public ARMeilleure.State.V128 GetV(int index)
{
switch (index)
{
case 0: return new ARMeilleure.State.V128(VectorExtractIntZx(V0, 0, 3), VectorExtractIntZx(V0, 1, 3));
case 1: return new ARMeilleure.State.V128(VectorExtractIntZx(V1, 0, 3), VectorExtractIntZx(V1, 1, 3));
case 2: return new ARMeilleure.State.V128(VectorExtractIntZx(V2, 0, 3), VectorExtractIntZx(V2, 1, 3));
case 3: return new ARMeilleure.State.V128(VectorExtractIntZx(V3, 0, 3), VectorExtractIntZx(V3, 1, 3));
case 4: return new ARMeilleure.State.V128(VectorExtractIntZx(V4, 0, 3), VectorExtractIntZx(V4, 1, 3));
case 5: return new ARMeilleure.State.V128(VectorExtractIntZx(V5, 0, 3), VectorExtractIntZx(V5, 1, 3));
case 6: return new ARMeilleure.State.V128(VectorExtractIntZx(V6, 0, 3), VectorExtractIntZx(V6, 1, 3));
case 7: return new ARMeilleure.State.V128(VectorExtractIntZx(V7, 0, 3), VectorExtractIntZx(V7, 1, 3));
case 8: return new ARMeilleure.State.V128(VectorExtractIntZx(V8, 0, 3), VectorExtractIntZx(V8, 1, 3));
case 9: return new ARMeilleure.State.V128(VectorExtractIntZx(V9, 0, 3), VectorExtractIntZx(V9, 1, 3));
case 10: return new ARMeilleure.State.V128(VectorExtractIntZx(V10, 0, 3), VectorExtractIntZx(V10, 1, 3));
case 11: return new ARMeilleure.State.V128(VectorExtractIntZx(V11, 0, 3), VectorExtractIntZx(V11, 1, 3));
case 12: return new ARMeilleure.State.V128(VectorExtractIntZx(V12, 0, 3), VectorExtractIntZx(V12, 1, 3));
case 13: return new ARMeilleure.State.V128(VectorExtractIntZx(V13, 0, 3), VectorExtractIntZx(V13, 1, 3));
case 14: return new ARMeilleure.State.V128(VectorExtractIntZx(V14, 0, 3), VectorExtractIntZx(V14, 1, 3));
case 15: return new ARMeilleure.State.V128(VectorExtractIntZx(V15, 0, 3), VectorExtractIntZx(V15, 1, 3));
case 16: return new ARMeilleure.State.V128(VectorExtractIntZx(V16, 0, 3), VectorExtractIntZx(V16, 1, 3));
case 17: return new ARMeilleure.State.V128(VectorExtractIntZx(V17, 0, 3), VectorExtractIntZx(V17, 1, 3));
case 18: return new ARMeilleure.State.V128(VectorExtractIntZx(V18, 0, 3), VectorExtractIntZx(V18, 1, 3));
case 19: return new ARMeilleure.State.V128(VectorExtractIntZx(V19, 0, 3), VectorExtractIntZx(V19, 1, 3));
case 20: return new ARMeilleure.State.V128(VectorExtractIntZx(V20, 0, 3), VectorExtractIntZx(V20, 1, 3));
case 21: return new ARMeilleure.State.V128(VectorExtractIntZx(V21, 0, 3), VectorExtractIntZx(V21, 1, 3));
case 22: return new ARMeilleure.State.V128(VectorExtractIntZx(V22, 0, 3), VectorExtractIntZx(V22, 1, 3));
case 23: return new ARMeilleure.State.V128(VectorExtractIntZx(V23, 0, 3), VectorExtractIntZx(V23, 1, 3));
case 24: return new ARMeilleure.State.V128(VectorExtractIntZx(V24, 0, 3), VectorExtractIntZx(V24, 1, 3));
case 25: return new ARMeilleure.State.V128(VectorExtractIntZx(V25, 0, 3), VectorExtractIntZx(V25, 1, 3));
case 26: return new ARMeilleure.State.V128(VectorExtractIntZx(V26, 0, 3), VectorExtractIntZx(V26, 1, 3));
case 27: return new ARMeilleure.State.V128(VectorExtractIntZx(V27, 0, 3), VectorExtractIntZx(V27, 1, 3));
case 28: return new ARMeilleure.State.V128(VectorExtractIntZx(V28, 0, 3), VectorExtractIntZx(V28, 1, 3));
case 29: return new ARMeilleure.State.V128(VectorExtractIntZx(V29, 0, 3), VectorExtractIntZx(V29, 1, 3));
case 30: return new ARMeilleure.State.V128(VectorExtractIntZx(V30, 0, 3), VectorExtractIntZx(V30, 1, 3));
case 31: return new ARMeilleure.State.V128(VectorExtractIntZx(V31, 0, 3), VectorExtractIntZx(V31, 1, 3));
default: throw new ArgumentOutOfRangeException(nameof(index));
}
}
public bool GetPstateFlag(ARMeilleure.State.PState flag)
{
switch (flag)
{
case ARMeilleure.State.PState.NFlag: return Negative;
case ARMeilleure.State.PState.ZFlag: return Zero;
case ARMeilleure.State.PState.CFlag: return Carry;
case ARMeilleure.State.PState.VFlag: return Overflow;
default: throw new ArgumentOutOfRangeException(nameof(flag));
}
}
public void RequestInterrupt()
{
_interrupted = true;
}
internal void OnBreak(long position, int imm)
{
Break?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm));
}
internal void OnSvcCall(long position, int imm)
{
SupervisorCall?.Invoke(this, new ARMeilleure.State.InstExceptionEventArgs((ulong)position, imm));
}
internal void OnUndefined(long position, int rawOpCode)
{
Undefined?.Invoke(this, new ARMeilleure.State.InstUndefinedEventArgs((ulong)position, rawOpCode));
}
internal ExecutionMode GetExecutionMode()
{
if (!IsAarch32)
{
return ExecutionMode.Aarch64;
}
else
{
return Thumb ? ExecutionMode.Aarch32Thumb : ExecutionMode.Aarch32Arm;
}
}
internal bool GetFpcrFlag(Fpcr flag)
{
return (CFpcr & (1 << (int)flag)) != 0;
}
internal void SetFpsrFlag(Fpsr flag)
{
CFpsr |= 1 << (int)flag;
}
internal RoundMode FPRoundingMode()
{
return (RoundMode)((CFpcr >> (int)State.Fpcr.RMode) & 3);
}
public void Dispose() { }
}
}