Refactoring and optimization on CPU translation (#661)

* Refactoring and optimization on CPU translation

* Remove now unused property

* Rename ilBlock -> block (local)

* Change equality comparison on RegisterMask for consistency

Co-Authored-By: gdkchan <gab.dark.100@gmail.com>

* Add back the aggressive inlining attribute to the Synchronize method

* Implement IEquatable on the Register struct

* Fix identation
This commit is contained in:
gdkchan 2019-04-26 01:55:12 -03:00 committed by jduncanator
parent 2b8eac1bce
commit 8a7d99cdea
48 changed files with 1257 additions and 1280 deletions

View file

@ -1,7 +0,0 @@
namespace ChocolArm64.Translation
{
interface IILEmit
{
void Emit(ILMethodBuilder context);
}
}

View file

@ -1,7 +0,0 @@
namespace ChocolArm64.Translation
{
struct ILBarrier : IILEmit
{
public void Emit(ILMethodBuilder context) { }
}
}

View file

@ -1,74 +0,0 @@
using System.Collections.Generic;
namespace ChocolArm64.Translation
{
class ILBlock : IILEmit
{
public long IntInputs { get; private set; }
public long IntOutputs { get; private set; }
private long _intAwOutputs;
public long VecInputs { get; private set; }
public long VecOutputs { get; private set; }
private long _vecAwOutputs;
public bool HasStateStore { get; private set; }
private List<IILEmit> _emitters;
public int Count => _emitters.Count;
public ILBlock Next { get; set; }
public ILBlock Branch { get; set; }
public ILBlock()
{
_emitters = new List<IILEmit>();
}
public void Add(IILEmit emitter)
{
if (emitter is ILBarrier)
{
//Those barriers are used to separate the groups of CIL
//opcodes emitted by each ARM instruction.
//We can only consider the new outputs for doing input elimination
//after all the CIL opcodes used by the instruction being emitted.
_intAwOutputs = IntOutputs;
_vecAwOutputs = VecOutputs;
}
else if (emitter is ILOpCodeLoad ld && ILMethodBuilder.IsRegIndex(ld.Index))
{
switch (ld.VarType)
{
case VarType.Flag: IntInputs |= ((1L << ld.Index) << 32) & ~_intAwOutputs; break;
case VarType.Int: IntInputs |= (1L << ld.Index) & ~_intAwOutputs; break;
case VarType.Vector: VecInputs |= (1L << ld.Index) & ~_vecAwOutputs; break;
}
}
else if (emitter is ILOpCodeStore st && ILMethodBuilder.IsRegIndex(st.Index))
{
switch (st.VarType)
{
case VarType.Flag: IntOutputs |= (1L << st.Index) << 32; break;
case VarType.Int: IntOutputs |= 1L << st.Index; break;
case VarType.Vector: VecOutputs |= 1L << st.Index; break;
}
}
else if (emitter is ILOpCodeStoreState)
{
HasStateStore = true;
}
_emitters.Add(emitter);
}
public void Emit(ILMethodBuilder context)
{
foreach (IILEmit ilEmitter in _emitters)
{
ilEmitter.Emit(context);
}
}
}
}

View file

@ -1,5 +1,6 @@
using ChocolArm64.Decoders;
using ChocolArm64.Instructions;
using ChocolArm64.IntermediateRepresentation;
using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
@ -16,16 +17,23 @@ namespace ChocolArm64.Translation
private TranslatorCache _cache;
private TranslatorQueue _queue;
private Dictionary<long, ILLabel> _labels;
private long _subPosition;
private int _opcIndex;
private Block _currBlock;
public Block CurrBlock => _currBlock;
public OpCode64 CurrOp => _currBlock?.OpCodes[_opcIndex];
public Block CurrBlock
{
get
{
return _currBlock;
}
set
{
_currBlock = value;
ResetBlockState();
}
}
public OpCode64 CurrOp { get; set; }
public TranslationTier Tier { get; }
@ -35,13 +43,15 @@ namespace ChocolArm64.Translation
public bool HasSlowCall { get; set; }
private Dictionary<Block, ILBlock> _visitedBlocks;
private Dictionary<long, ILLabel> _labels;
private Queue<Block> _branchTargets;
private Dictionary<ILLabel, BasicBlock> _irLabels;
private List<ILBlock> _ilBlocks;
private List<BasicBlock> _irBlocks;
private ILBlock _ilBlock;
private BasicBlock _irBlock;
private bool _needsNewBlock;
private OpCode64 _optOpLastCompare;
private OpCode64 _optOpLastFlagSet;
@ -72,36 +82,25 @@ namespace ChocolArm64.Translation
MemoryManager memory,
TranslatorCache cache,
TranslatorQueue queue,
TranslationTier tier,
Block graph)
TranslationTier tier)
{
Memory = memory ?? throw new ArgumentNullException(nameof(memory));
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
_currBlock = graph ?? throw new ArgumentNullException(nameof(graph));
Memory = memory ?? throw new ArgumentNullException(nameof(memory));
_cache = cache ?? throw new ArgumentNullException(nameof(cache));
_queue = queue ?? throw new ArgumentNullException(nameof(queue));
Tier = tier;
_labels = new Dictionary<long, ILLabel>();
_visitedBlocks = new Dictionary<Block, ILBlock>();
_irLabels = new Dictionary<ILLabel, BasicBlock>();
_visitedBlocks.Add(graph, new ILBlock());
_irBlocks = new List<BasicBlock>();
_branchTargets = new Queue<Block>();
NewNextBlock();
_ilBlocks = new List<ILBlock>();
EmitSynchronization();
_subPosition = graph.Position;
ResetBlockState();
if (AdvanceOpCode())
{
EmitSynchronization();
_ilBlock.Add(new ILOpCodeLoadState(_ilBlock, isSubEntry: true));
}
EmitLoadContext();
}
public static int GetIntTempIndex()
@ -114,96 +113,15 @@ namespace ChocolArm64.Translation
return UserVecTempStart + _userVecTempCount++;
}
public ILBlock[] GetILBlocks()
public BasicBlock[] GetBlocks()
{
EmitAllOpCodes();
return _ilBlocks.ToArray();
return _irBlocks.ToArray();
}
private void EmitAllOpCodes()
{
do
{
EmitOpCode();
}
while (AdvanceOpCode());
}
private void EmitOpCode()
{
if (_currBlock == null)
{
return;
}
int opcIndex = _opcIndex;
if (opcIndex == 0)
{
MarkLabel(GetLabel(_currBlock.Position));
}
bool isLastOp = opcIndex == CurrBlock.OpCodes.Count - 1;
if (isLastOp && CurrBlock.Branch != null &&
(ulong)CurrBlock.Branch.Position <= (ulong)CurrBlock.Position)
{
EmitSynchronization();
}
//On AARCH32 mode, (almost) all instruction can be conditionally
//executed, and the required condition is encoded on the opcode.
//We handle that here, skipping the instruction if the condition
//is not met. We can just ignore it when the condition is "Always",
//because in this case the instruction is always going to be executed.
//Condition "Never" is also ignored because this is a special encoding
//used by some unconditional instructions.
ILLabel lblSkip = null;
if (CurrOp is OpCode32 op && op.Cond < Condition.Al)
{
lblSkip = new ILLabel();
EmitCondBranch(lblSkip, GetInverseCond(op.Cond));
}
CurrOp.Emitter(this);
if (lblSkip != null)
{
MarkLabel(lblSkip);
//If this is the last op on the block, and there's no "next" block
//after this one, then we have to return right now, with the address
//of the next instruction to be executed (in the case that the condition
//is false, and the branch was not taken, as all basic blocks should end with
//some kind of branch).
if (isLastOp && CurrBlock.Next == null)
{
EmitStoreState();
EmitLdc_I8(CurrOp.Position + CurrOp.OpCodeSizeInBytes);
Emit(OpCodes.Ret);
}
}
_ilBlock.Add(new ILBarrier());
}
private static Condition GetInverseCond(Condition cond)
{
//Bit 0 of all conditions is basically a negation bit, so
//inverting this bit has the effect of inverting the condition.
return (Condition)((int)cond ^ 1);
}
private void EmitSynchronization()
public void EmitSynchronization()
{
EmitLdarg(TranslatedSub.StateArgIdx);
EmitLdc_I4(_currBlock.OpCodes.Count);
EmitPrivateCall(typeof(CpuThreadState), nameof(CpuThreadState.Synchronize));
EmitLdc_I4(0);
@ -219,83 +137,24 @@ namespace ChocolArm64.Translation
MarkLabel(lblContinue);
}
private bool AdvanceOpCode()
public void ResetBlockStateForPredicatedOp()
{
if (_currBlock == null)
//Check if this is a predicated instruction that modifies flags,
//in this case the value of the flags is unknown as we don't know
//in advance if the instruction is going to be executed or not.
//So, we reset the block state to prevent an invalid optimization.
if (CurrOp == _optOpLastFlagSet)
{
return false;
}
while (++_opcIndex >= _currBlock.OpCodes.Count)
{
if (!AdvanceBlock())
{
return false;
}
ResetBlockState();
}
return true;
}
private bool AdvanceBlock()
{
if (_currBlock.Branch != null)
{
if (_visitedBlocks.TryAdd(_currBlock.Branch, _ilBlock.Branch))
{
_branchTargets.Enqueue(_currBlock.Branch);
}
}
if (_currBlock.Next != null)
{
if (_visitedBlocks.TryAdd(_currBlock.Next, _ilBlock.Next))
{
_currBlock = _currBlock.Next;
return true;
}
else
{
Emit(OpCodes.Br, GetLabel(_currBlock.Next.Position));
}
}
return _branchTargets.TryDequeue(out _currBlock);
}
private void ResetBlockState()
{
_ilBlock = _visitedBlocks[_currBlock];
_ilBlocks.Add(_ilBlock);
_ilBlock.Next = GetOrCreateILBlock(_currBlock.Next);
_ilBlock.Branch = GetOrCreateILBlock(_currBlock.Branch);
_opcIndex = -1;
_optOpLastFlagSet = null;
_optOpLastCompare = null;
}
private ILBlock GetOrCreateILBlock(Block block)
{
if (block == null)
{
return null;
}
if (_visitedBlocks.TryGetValue(block, out ILBlock ilBlock))
{
return ilBlock;
}
return new ILBlock();
}
public void TranslateAhead(long position, ExecutionMode mode = ExecutionMode.Aarch64)
{
if (_cache.TryGetSubroutine(position, out TranslatedSub sub) && sub.Tier != TranslationTier.Tier0)
@ -320,19 +179,12 @@ namespace ChocolArm64.Translation
return false;
}
if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub))
if (!_cache.TryGetSubroutine(op.Imm, out TranslatedSub sub) || sub.Tier != TranslationTier.Tier0)
{
return false;
}
//It's not worth to call a Tier0 method, because
//it contains slow code, rather than the entire function.
if (sub.Tier == TranslationTier.Tier0)
{
return false;
}
EmitStoreState(sub);
EmitStoreContext();
for (int index = 0; index < TranslatedSub.FixedArgTypes.Length; index++)
{
@ -350,8 +202,8 @@ namespace ChocolArm64.Translation
InstEmitAluHelper.EmitAluLoadOpers(this);
Stloc(CmpOptTmp2Index, VarType.Int);
Stloc(CmpOptTmp1Index, VarType.Int);
Stloc(CmpOptTmp2Index, RegisterType.Int);
Stloc(CmpOptTmp1Index, RegisterType.Int);
}
private Dictionary<Condition, OpCode> _branchOps = new Dictionary<Condition, OpCode>()
@ -375,8 +227,8 @@ namespace ChocolArm64.Translation
{
if (_optOpLastCompare.Emitter == InstEmit.Subs)
{
Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp2Index, VarType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp1Index, RegisterType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp2Index, RegisterType.Int, _optOpLastCompare.RegisterSize);
Emit(_branchOps[cond], target);
@ -388,17 +240,17 @@ namespace ChocolArm64.Translation
&& cond != Condition.LeUn)
{
//There are several limitations that needs to be taken into account for CMN comparisons:
//* The unsigned comparisons are not valid, as they depend on the
//- The unsigned comparisons are not valid, as they depend on the
//carry flag value, and they will have different values for addition and
//subtraction. For addition, it's carry, and for subtraction, it's borrow.
//So, we need to make sure we're not doing a unsigned compare for the CMN case.
//* We can only do the optimization for the immediate variants,
//- We can only do the optimization for the immediate variants,
//because when the second operand value is exactly INT_MIN, we can't
//negate the value as theres no positive counterpart.
//Such invalid values can't be encoded on the immediate encodings.
if (_optOpLastCompare is IOpCodeAluImm64 op)
{
Ldloc(CmpOptTmp1Index, VarType.Int, _optOpLastCompare.RegisterSize);
Ldloc(CmpOptTmp1Index, RegisterType.Int, _optOpLastCompare.RegisterSize);
if (_optOpLastCompare.RegisterSize == RegisterSize.Int32)
{
@ -456,9 +308,7 @@ namespace ChocolArm64.Translation
break;
}
ilOp = (intCond & 1) != 0
? OpCodes.Brfalse
: OpCodes.Brtrue;
ilOp = (intCond & 1) != 0 ? OpCodes.Brfalse : OpCodes.Brtrue;
}
else
{
@ -484,17 +334,14 @@ namespace ChocolArm64.Translation
bool sz64 = CurrOp.RegisterSize != RegisterSize.Int32;
if (sz64 == (intType == IntType.UInt64 ||
intType == IntType.Int64))
if (sz64 == (intType == IntType.UInt64 || intType == IntType.Int64))
{
return;
}
if (sz64)
{
Emit(intType >= IntType.Int8
? OpCodes.Conv_I8
: OpCodes.Conv_U8);
Emit(intType >= IntType.Int8 ? OpCodes.Conv_I8 : OpCodes.Conv_U8);
}
else
{
@ -520,14 +367,14 @@ namespace ChocolArm64.Translation
{
if (amount > 0)
{
Stloc(RorTmpIndex, VarType.Int);
Ldloc(RorTmpIndex, VarType.Int);
Stloc(RorTmpIndex, RegisterType.Int);
Ldloc(RorTmpIndex, RegisterType.Int);
EmitLdc_I4(amount);
Emit(OpCodes.Shr_Un);
Ldloc(RorTmpIndex, VarType.Int);
Ldloc(RorTmpIndex, RegisterType.Int);
EmitLdc_I4(CurrOp.GetBitsCount() - amount);
@ -550,32 +397,60 @@ namespace ChocolArm64.Translation
public void MarkLabel(ILLabel label)
{
_ilBlock.Add(label);
if (_irLabels.TryGetValue(label, out BasicBlock nextBlock))
{
nextBlock.Index = _irBlocks.Count;
_irBlocks.Add(nextBlock);
NextBlock(nextBlock);
}
else
{
NewNextBlock();
_irLabels.Add(label, _irBlock);
}
AddOperation(Operation.MarkLabel(label));
}
public void Emit(OpCode ilOp)
{
_ilBlock.Add(new ILOpCode(ilOp));
AddOperation(Operation.IL(ilOp));
if (ilOp == OpCodes.Ret)
{
NextBlock(null);
_needsNewBlock = true;
}
}
public void Emit(OpCode ilOp, ILLabel label)
{
_ilBlock.Add(new ILOpCodeBranch(ilOp, label));
AddOperation(Operation.ILBranch(ilOp, label));
_needsNewBlock = true;
if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock))
{
branchBlock = new BasicBlock();
_irLabels.Add(label, branchBlock);
}
_irBlock.Branch = branchBlock;
}
public void EmitFieldLoad(FieldInfo info)
public void EmitLdfld(FieldInfo info)
{
_ilBlock.Add(new ILOpCodeLoadField(info));
}
public void EmitPrint(string text)
{
_ilBlock.Add(new ILOpCodeLog(text));
AddOperation(Operation.LoadField(info));
}
public void EmitLdarg(int index)
{
_ilBlock.Add(new ILOpCodeLoad(index, VarType.Arg));
AddOperation(Operation.LoadArgument(index));
}
public void EmitLdintzr(int index)
@ -602,24 +477,16 @@ namespace ChocolArm64.Translation
}
}
public void EmitLoadState()
public void EmitLoadContext()
{
if (_ilBlock.Next == null)
{
throw new InvalidOperationException("Can't load state for next block, because there's no next block.");
}
_needsNewBlock = true;
_ilBlock.Add(new ILOpCodeLoadState(_ilBlock.Next));
AddOperation(Operation.LoadContext());
}
public void EmitStoreState()
public void EmitStoreContext()
{
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock));
}
private void EmitStoreState(TranslatedSub callSub)
{
_ilBlock.Add(new ILOpCodeStoreState(_ilBlock, callSub));
AddOperation(Operation.StoreContext());
}
public void EmitLdtmp() => EmitLdint(IntGpTmp1Index);
@ -637,17 +504,17 @@ namespace ChocolArm64.Translation
public void EmitLdvectmp3() => EmitLdvec(VecGpTmp3Index);
public void EmitStvectmp3() => EmitStvec(VecGpTmp3Index);
public void EmitLdint(int index) => Ldloc(index, VarType.Int);
public void EmitStint(int index) => Stloc(index, VarType.Int);
public void EmitLdint(int index) => Ldloc(index, RegisterType.Int);
public void EmitStint(int index) => Stloc(index, RegisterType.Int);
public void EmitLdvec(int index) => Ldloc(index, VarType.Vector);
public void EmitStvec(int index) => Stloc(index, VarType.Vector);
public void EmitLdvec(int index) => Ldloc(index, RegisterType.Vector);
public void EmitStvec(int index) => Stloc(index, RegisterType.Vector);
public void EmitLdflg(int index) => Ldloc(index, VarType.Flag);
public void EmitLdflg(int index) => Ldloc(index, RegisterType.Flag);
public void EmitStflg(int index)
{
//Set this only if any of the NZCV flag bits were modified.
//This is used to ensure that, when emiting a direct IL branch
//This is used to ensure that when emiting a direct IL branch
//instruction for compare + branch sequences, we're not expecting
//to use comparison values from an old instruction, when in fact
//the flags were already overwritten by another instruction further along.
@ -656,22 +523,22 @@ namespace ChocolArm64.Translation
_optOpLastFlagSet = CurrOp;
}
Stloc(index, VarType.Flag);
Stloc(index, RegisterType.Flag);
}
private void Ldloc(int index, VarType varType)
private void Ldloc(int index, RegisterType type)
{
_ilBlock.Add(new ILOpCodeLoad(index, varType, CurrOp.RegisterSize));
AddOperation(Operation.LoadLocal(index, type, CurrOp.RegisterSize));
}
private void Ldloc(int index, VarType varType, RegisterSize registerSize)
private void Ldloc(int index, RegisterType type, RegisterSize size)
{
_ilBlock.Add(new ILOpCodeLoad(index, varType, registerSize));
AddOperation(Operation.LoadLocal(index, type, size));
}
private void Stloc(int index, VarType varType)
private void Stloc(int index, RegisterType type)
{
_ilBlock.Add(new ILOpCodeStore(index, varType, CurrOp.RegisterSize));
AddOperation(Operation.StoreLocal(index, type, CurrOp.RegisterSize));
}
public void EmitCallPropGet(Type objType, string propName)
@ -726,7 +593,19 @@ namespace ChocolArm64.Translation
public void EmitCall(MethodInfo mthdInfo, bool isVirtual = false)
{
_ilBlock.Add(new ILOpCodeCall(mthdInfo ?? throw new ArgumentNullException(nameof(mthdInfo)), isVirtual));
if (mthdInfo == null)
{
throw new ArgumentNullException(nameof(mthdInfo));
}
if (isVirtual)
{
AddOperation(Operation.CallVirtual(mthdInfo));
}
else
{
AddOperation(Operation.Call(mthdInfo));
}
}
public void EmitLdc_I(long value)
@ -743,22 +622,22 @@ namespace ChocolArm64.Translation
public void EmitLdc_I4(int value)
{
_ilBlock.Add(new ILOpCodeConst(value));
AddOperation(Operation.LoadConstant(value));
}
public void EmitLdc_I8(long value)
{
_ilBlock.Add(new ILOpCodeConst(value));
AddOperation(Operation.LoadConstant(value));
}
public void EmitLdc_R4(float value)
{
_ilBlock.Add(new ILOpCodeConst(value));
AddOperation(Operation.LoadConstant(value));
}
public void EmitLdc_R8(double value)
{
_ilBlock.Add(new ILOpCodeConst(value));
AddOperation(Operation.LoadConstant(value));
}
public void EmitZnFlagCheck()
@ -781,5 +660,50 @@ namespace ChocolArm64.Translation
EmitStflg(flag);
}
private void AddOperation(Operation operation)
{
if (_needsNewBlock)
{
NewNextBlock();
}
_irBlock.Add(operation);
}
private void NewNextBlock()
{
BasicBlock block = new BasicBlock(_irBlocks.Count);
_irBlocks.Add(block);
NextBlock(block);
}
private void NextBlock(BasicBlock nextBlock)
{
if (_irBlock != null && !EndsWithUnconditional(_irBlock))
{
_irBlock.Next = nextBlock;
}
_irBlock = nextBlock;
_needsNewBlock = false;
}
private static bool EndsWithUnconditional(BasicBlock block)
{
Operation lastOp = block.GetLastOp();
if (lastOp == null || lastOp.Type != OperationType.ILBranch)
{
return false;
}
OpCode opCode = lastOp.GetArg<OpCode>(0);
return opCode == OpCodes.Br || opCode == OpCodes.Br_S;
}
}
}

View file

@ -117,13 +117,5 @@ namespace ChocolArm64
break;
}
}
public static void EmitLdargSeq(this ILGenerator generator, int count)
{
for (int index = 0; index < count; index++)
{
generator.EmitLdarg(index);
}
}
}
}

View file

@ -1,28 +0,0 @@
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
class ILLabel : IILEmit
{
private bool _hasLabel;
private Label _label;
public void Emit(ILMethodBuilder context)
{
context.Generator.MarkLabel(GetLabel(context));
}
public Label GetLabel(ILMethodBuilder context)
{
if (!_hasLabel)
{
_label = context.Generator.DefineLabel();
_hasLabel = true;
}
return _label;
}
}
}

View file

@ -1,123 +0,0 @@
using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
namespace ChocolArm64.Translation
{
class ILMethodBuilder
{
private const int RegsCount = 32;
private const int RegsMask = RegsCount - 1;
public RegisterUsage RegUsage { get; private set; }
public ILGenerator Generator { get; private set; }
private Dictionary<Register, int> _locals;
private ILBlock[] _ilBlocks;
private string _subName;
public bool IsAarch64 { get; }
public bool IsSubComplete { get; }
private int _localsCount;
public ILMethodBuilder(
ILBlock[] ilBlocks,
string subName,
bool isAarch64,
bool isSubComplete = false)
{
_ilBlocks = ilBlocks;
_subName = subName;
IsAarch64 = isAarch64;
IsSubComplete = isSubComplete;
}
public TranslatedSub GetSubroutine(TranslationTier tier, bool isWorthOptimizing)
{
RegUsage = new RegisterUsage();
RegUsage.BuildUses(_ilBlocks[0]);
DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes);
long intNiRegsMask = RegUsage.GetIntNotInputs(_ilBlocks[0]);
long vecNiRegsMask = RegUsage.GetVecNotInputs(_ilBlocks[0]);
TranslatedSub subroutine = new TranslatedSub(
method,
intNiRegsMask,
vecNiRegsMask,
tier,
isWorthOptimizing);
_locals = new Dictionary<Register, int>();
_localsCount = 0;
Generator = method.GetILGenerator();
foreach (ILBlock ilBlock in _ilBlocks)
{
ilBlock.Emit(this);
}
subroutine.PrepareMethod();
return subroutine;
}
public int GetLocalIndex(Register reg)
{
if (!_locals.TryGetValue(reg, out int index))
{
Generator.DeclareLocal(GetFieldType(reg.Type));
index = _localsCount++;
_locals.Add(reg, index);
}
return index;
}
private static Type GetFieldType(RegisterType regType)
{
switch (regType)
{
case RegisterType.Flag: return typeof(bool);
case RegisterType.Int: return typeof(ulong);
case RegisterType.Vector: return typeof(Vector128<float>);
}
throw new ArgumentException(nameof(regType));
}
public static Register GetRegFromBit(int bit, RegisterType baseType)
{
if (bit < RegsCount)
{
return new Register(bit, baseType);
}
else if (baseType == RegisterType.Int)
{
return new Register(bit & RegsMask, RegisterType.Flag);
}
else
{
throw new ArgumentOutOfRangeException(nameof(bit));
}
}
public static bool IsRegIndex(int index)
{
return (uint)index < RegsCount;
}
}
}

View file

@ -1,19 +0,0 @@
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCode : IILEmit
{
public OpCode ILOp { get; }
public ILOpCode(OpCode ilOp)
{
ILOp = ilOp;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(ILOp);
}
}
}

View file

@ -1,21 +0,0 @@
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeBranch : IILEmit
{
public OpCode ILOp { get; }
public ILLabel Label { get; }
public ILOpCodeBranch(OpCode ilOp, ILLabel label)
{
ILOp = ilOp;
Label = label;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(ILOp, Label.GetLabel(context));
}
}
}

View file

@ -1,23 +0,0 @@
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeCall : IILEmit
{
public MethodInfo Info { get; }
public bool IsVirtual { get; }
public ILOpCodeCall(MethodInfo info, bool isVirtual)
{
Info = info;
IsVirtual = isVirtual;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(IsVirtual ? OpCodes.Callvirt : OpCodes.Call, Info);
}
}
}

View file

@ -1,67 +0,0 @@
using System.Reflection.Emit;
using System.Runtime.InteropServices;
namespace ChocolArm64.Translation
{
class ILOpCodeConst : IILEmit
{
[StructLayout(LayoutKind.Explicit, Size = 8)]
private struct ImmVal
{
[FieldOffset(0)] public int I4;
[FieldOffset(0)] public long I8;
[FieldOffset(0)] public float R4;
[FieldOffset(0)] public double R8;
}
private ImmVal _value;
public long Value => _value.I8;
private enum ConstType
{
Int32,
Int64,
Single,
Double
}
private ConstType _type;
private ILOpCodeConst(ConstType type)
{
_type = type;
}
public ILOpCodeConst(int value) : this(ConstType.Int32)
{
_value = new ImmVal { I4 = value };
}
public ILOpCodeConst(long value) : this(ConstType.Int64)
{
_value = new ImmVal { I8 = value };
}
public ILOpCodeConst(float value) : this(ConstType.Single)
{
_value = new ImmVal { R4 = value };
}
public ILOpCodeConst(double value) : this(ConstType.Double)
{
_value = new ImmVal { R8 = value };
}
public void Emit(ILMethodBuilder context)
{
switch (_type)
{
case ConstType.Int32: context.Generator.EmitLdc_I4(_value.I4); break;
case ConstType.Int64: context.Generator.Emit(OpCodes.Ldc_I8, _value.I8); break;
case ConstType.Single: context.Generator.Emit(OpCodes.Ldc_R4, _value.R4); break;
case ConstType.Double: context.Generator.Emit(OpCodes.Ldc_R8, _value.R8); break;
}
}
}
}

View file

@ -1,46 +0,0 @@
using ChocolArm64.State;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeLoad : IILEmit
{
public int Index { get; }
public VarType VarType { get; }
public RegisterSize RegisterSize { get; }
public ILOpCodeLoad(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
switch (VarType)
{
case VarType.Arg: context.Generator.EmitLdarg(Index); break;
case VarType.Flag: EmitLdloc(context, Index, RegisterType.Flag); break;
case VarType.Int: EmitLdloc(context, Index, RegisterType.Int); break;
case VarType.Vector: EmitLdloc(context, Index, RegisterType.Vector); break;
}
}
private void EmitLdloc(ILMethodBuilder context, int index, RegisterType registerType)
{
Register reg = new Register(index, registerType);
context.Generator.EmitLdloc(context.GetLocalIndex(reg));
if (registerType == RegisterType.Int &&
RegisterSize == RegisterSize.Int32)
{
context.Generator.Emit(OpCodes.Conv_U4);
}
}
}
}

View file

@ -1,20 +0,0 @@
using System.Reflection;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeLoadField : IILEmit
{
public FieldInfo Info { get; }
public ILOpCodeLoadField(FieldInfo info)
{
Info = info;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.Emit(OpCodes.Ldfld, Info);
}
}
}

View file

@ -1,51 +0,0 @@
using ChocolArm64.State;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeLoadState : IILEmit
{
private ILBlock _block;
private bool _isSubEntry;
public ILOpCodeLoadState(ILBlock block, bool isSubEntry = false)
{
_block = block;
_isSubEntry = isSubEntry;
}
public void Emit(ILMethodBuilder context)
{
long intInputs = context.RegUsage.GetIntInputs(_block);
long vecInputs = context.RegUsage.GetVecInputs(_block);
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
{
intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, context.IsAarch64);
vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, context.IsAarch64);
}
LoadLocals(context, intInputs, RegisterType.Int);
LoadLocals(context, vecInputs, RegisterType.Vector);
}
private void LoadLocals(ILMethodBuilder context, long inputs, RegisterType baseType)
{
for (int bit = 0; bit < 64; bit++)
{
long mask = 1L << bit;
if ((inputs & mask) != 0)
{
Register reg = ILMethodBuilder.GetRegFromBit(bit, baseType);
context.Generator.EmitLdarg(TranslatedSub.StateArgIdx);
context.Generator.Emit(OpCodes.Ldfld, reg.GetField());
context.Generator.EmitStloc(context.GetLocalIndex(reg));
}
}
}
}
}

View file

@ -1,17 +0,0 @@
namespace ChocolArm64.Translation
{
struct ILOpCodeLog : IILEmit
{
public string Text { get; }
public ILOpCodeLog(string text)
{
Text = text;
}
public void Emit(ILMethodBuilder context)
{
context.Generator.EmitWriteLine(Text);
}
}
}

View file

@ -1,46 +0,0 @@
using ChocolArm64.State;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeStore : IILEmit
{
public int Index { get; }
public VarType VarType { get; }
public RegisterSize RegisterSize { get; }
public ILOpCodeStore(int index, VarType varType, RegisterSize registerSize = 0)
{
Index = index;
VarType = varType;
RegisterSize = registerSize;
}
public void Emit(ILMethodBuilder context)
{
switch (VarType)
{
case VarType.Arg: context.Generator.EmitStarg(Index); break;
case VarType.Flag: EmitStloc(context, Index, RegisterType.Flag); break;
case VarType.Int: EmitStloc(context, Index, RegisterType.Int); break;
case VarType.Vector: EmitStloc(context, Index, RegisterType.Vector); break;
}
}
private void EmitStloc(ILMethodBuilder context, int index, RegisterType registerType)
{
Register reg = new Register(index, registerType);
if (registerType == RegisterType.Int &&
RegisterSize == RegisterSize.Int32)
{
context.Generator.Emit(OpCodes.Conv_U8);
}
context.Generator.EmitStloc(context.GetLocalIndex(reg));
}
}
}

View file

@ -1,60 +0,0 @@
using ChocolArm64.State;
using System.Reflection.Emit;
namespace ChocolArm64.Translation
{
struct ILOpCodeStoreState : IILEmit
{
private ILBlock _block;
private TranslatedSub _callSub;
public ILOpCodeStoreState(ILBlock block, TranslatedSub callSub = null)
{
_block = block;
_callSub = callSub;
}
public void Emit(ILMethodBuilder context)
{
long intOutputs = context.RegUsage.GetIntOutputs(_block);
long vecOutputs = context.RegUsage.GetVecOutputs(_block);
if (Optimizations.AssumeStrictAbiCompliance && context.IsSubComplete)
{
intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, context.IsAarch64);
vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, context.IsAarch64);
}
if (_callSub != null)
{
//Those register are assigned on the callee function, without
//reading it's value first. We don't need to write them because
//they are not going to be read on the callee.
intOutputs &= ~_callSub.IntNiRegsMask;
vecOutputs &= ~_callSub.VecNiRegsMask;
}
StoreLocals(context, intOutputs, RegisterType.Int);
StoreLocals(context, vecOutputs, RegisterType.Vector);
}
private void StoreLocals(ILMethodBuilder context, long outputs, RegisterType baseType)
{
for (int bit = 0; bit < 64; bit++)
{
long mask = 1L << bit;
if ((outputs & mask) != 0)
{
Register reg = ILMethodBuilder.GetRegFromBit(bit, baseType);
context.Generator.EmitLdarg(TranslatedSub.StateArgIdx);
context.Generator.EmitLdloc(context.GetLocalIndex(reg));
context.Generator.Emit(OpCodes.Stfld, reg.GetField());
}
}
}
}
}

View file

@ -1,269 +1,160 @@
using System;
using ChocolArm64.IntermediateRepresentation;
using ChocolArm64.State;
using System.Collections.Generic;
namespace ChocolArm64.Translation
{
class RegisterUsage
{
public const long CallerSavedIntRegistersMask = 0x7fL << 9;
public const long PStateNzcvFlagsMask = 0xfL << 60;
private const long CallerSavedIntRegistersMask = 0x7fL << 9;
private const long PStateNzcvFlagsMask = 0xfL << 60;
public const long CallerSavedVecRegistersMask = 0xffffL << 16;
private const long CallerSavedVecRegistersMask = 0xffffL << 16;
private class PathIo
private RegisterMask[] _inputs;
private RegisterMask[] _outputs;
public RegisterUsage(BasicBlock entryBlock, int blocksCount)
{
private Dictionary<ILBlock, long> _allInputs;
private Dictionary<ILBlock, long> _cmnOutputs;
_inputs = new RegisterMask[blocksCount];
_outputs = new RegisterMask[blocksCount];
private long _allOutputs;
HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
public PathIo()
Stack<BasicBlock> blockStack = new Stack<BasicBlock>();
List<BasicBlock> postOrderBlocks = new List<BasicBlock>(blocksCount);
visited.Add(entryBlock);
blockStack.Push(entryBlock);
while (blockStack.TryPop(out BasicBlock block))
{
_allInputs = new Dictionary<ILBlock, long>();
_cmnOutputs = new Dictionary<ILBlock, long>();
}
public void Set(ILBlock entry, long inputs, long outputs)
{
if (!_allInputs.TryAdd(entry, inputs))
if (block.Next != null && visited.Add(block.Next))
{
_allInputs[entry] |= inputs;
blockStack.Push(block);
blockStack.Push(block.Next);
}
if (!_cmnOutputs.TryAdd(entry, outputs))
else if (block.Branch != null && visited.Add(block.Branch))
{
_cmnOutputs[entry] &= outputs;
blockStack.Push(block);
blockStack.Push(block.Branch);
}
_allOutputs |= outputs;
}
public long GetInputs(ILBlock entry)
{
if (_allInputs.TryGetValue(entry, out long inputs))
else
{
//We also need to read the registers that may not be written
//by all paths that can reach a exit point, to ensure that
//the local variable will not remain uninitialized depending
//on the flow path taken.
return inputs | (_allOutputs & ~_cmnOutputs[entry]);
}
return 0;
}
public long GetOutputs()
{
return _allOutputs;
}
}
private Dictionary<ILBlock, PathIo> _intPaths;
private Dictionary<ILBlock, PathIo> _vecPaths;
private struct BlockIo : IEquatable<BlockIo>
{
public ILBlock Block { get; }
public ILBlock Entry { get; }
public long IntInputs { get; set; }
public long VecInputs { get; set; }
public long IntOutputs { get; set; }
public long VecOutputs { get; set; }
public BlockIo(ILBlock block, ILBlock entry)
{
Block = block;
Entry = entry;
IntInputs = IntOutputs = 0;
VecInputs = VecOutputs = 0;
}
public BlockIo(
ILBlock block,
ILBlock entry,
long intInputs,
long vecInputs,
long intOutputs,
long vecOutputs) : this(block, entry)
{
IntInputs = intInputs;
VecInputs = vecInputs;
IntOutputs = intOutputs;
VecOutputs = vecOutputs;
}
public override bool Equals(object obj)
{
if (!(obj is BlockIo other))
{
return false;
}
return Equals(other);
}
public bool Equals(BlockIo other)
{
return other.Block == Block &&
other.Entry == Entry &&
other.IntInputs == IntInputs &&
other.VecInputs == VecInputs &&
other.IntOutputs == IntOutputs &&
other.VecOutputs == VecOutputs;
}
public override int GetHashCode()
{
return HashCode.Combine(Block, Entry, IntInputs, VecInputs, IntOutputs, VecOutputs);
}
public static bool operator ==(BlockIo lhs, BlockIo rhs)
{
return lhs.Equals(rhs);
}
public static bool operator !=(BlockIo lhs, BlockIo rhs)
{
return !(lhs == rhs);
}
}
public RegisterUsage()
{
_intPaths = new Dictionary<ILBlock, PathIo>();
_vecPaths = new Dictionary<ILBlock, PathIo>();
}
public void BuildUses(ILBlock entry)
{
//This will go through all possible paths on the graph,
//and store all inputs/outputs for each block. A register
//that was previously written to already is not considered an input.
//When a block can be reached by more than one path, then the
//output from all paths needs to be set for this block, and
//only outputs present in all of the parent blocks can be considered
//when doing input elimination. Each block chain has a entry, that's where
//the code starts executing. They are present on the subroutine start point,
//and on call return points too (address written to X30 by BL).
HashSet<BlockIo> visited = new HashSet<BlockIo>();
Queue<BlockIo> unvisited = new Queue<BlockIo>();
void Enqueue(BlockIo block)
{
if (visited.Add(block))
{
unvisited.Enqueue(block);
postOrderBlocks.Add(block);
}
}
Enqueue(new BlockIo(entry, entry));
RegisterMask[] cmnOutputMasks = new RegisterMask[blocksCount];
while (unvisited.Count > 0)
bool modified;
bool firstPass = true;
do
{
BlockIo current = unvisited.Dequeue();
modified = false;
current.IntInputs |= current.Block.IntInputs & ~current.IntOutputs;
current.VecInputs |= current.Block.VecInputs & ~current.VecOutputs;
current.IntOutputs |= current.Block.IntOutputs;
current.VecOutputs |= current.Block.VecOutputs;
//Check if this is a exit block
//(a block that returns or calls another sub).
if ((current.Block.Next == null &&
current.Block.Branch == null) || current.Block.HasStateStore)
for (int blkIndex = postOrderBlocks.Count - 1; blkIndex >= 0; blkIndex--)
{
if (!_intPaths.TryGetValue(current.Block, out PathIo intPath))
BasicBlock block = postOrderBlocks[blkIndex];
if (block.Predecessors.Count != 0 && !block.HasStateLoad)
{
_intPaths.Add(current.Block, intPath = new PathIo());
BasicBlock predecessor = block.Predecessors[0];
RegisterMask cmnOutputs = predecessor.RegOutputs | cmnOutputMasks[predecessor.Index];
RegisterMask outputs = _outputs[predecessor.Index];
for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
{
predecessor = block.Predecessors[pIndex];
cmnOutputs &= predecessor.RegOutputs | cmnOutputMasks[predecessor.Index];
outputs |= _outputs[predecessor.Index];
}
_inputs[block.Index] |= outputs & ~cmnOutputs;
if (!firstPass)
{
cmnOutputs &= cmnOutputMasks[block.Index];
}
if (Exchange(cmnOutputMasks, block.Index, cmnOutputs))
{
modified = true;
}
outputs |= block.RegOutputs;
if (Exchange(_outputs, block.Index, _outputs[block.Index] | outputs))
{
modified = true;
}
}
else if (Exchange(_outputs, block.Index, block.RegOutputs))
{
modified = true;
}
}
firstPass = false;
}
while (modified);
do
{
modified = false;
for (int blkIndex = 0; blkIndex < postOrderBlocks.Count; blkIndex++)
{
BasicBlock block = postOrderBlocks[blkIndex];
RegisterMask inputs = block.RegInputs;
if (block.Next != null)
{
inputs |= _inputs[block.Next.Index];
}
if (!_vecPaths.TryGetValue(current.Block, out PathIo vecPath))
if (block.Branch != null)
{
_vecPaths.Add(current.Block, vecPath = new PathIo());
inputs |= _inputs[block.Branch.Index];
}
intPath.Set(current.Entry, current.IntInputs, current.IntOutputs);
vecPath.Set(current.Entry, current.VecInputs, current.VecOutputs);
}
inputs &= ~cmnOutputMasks[block.Index];
void EnqueueFromCurrent(ILBlock block, bool retTarget)
{
BlockIo blockIo;
if (retTarget)
if (Exchange(_inputs, block.Index, _inputs[block.Index] | inputs))
{
blockIo = new BlockIo(block, block);
modified = true;
}
else
{
blockIo = new BlockIo(
block,
current.Entry,
current.IntInputs,
current.VecInputs,
current.IntOutputs,
current.VecOutputs);
}
Enqueue(blockIo);
}
if (current.Block.Next != null)
{
EnqueueFromCurrent(current.Block.Next, current.Block.HasStateStore);
}
if (current.Block.Branch != null)
{
EnqueueFromCurrent(current.Block.Branch, false);
}
}
while (modified);
}
public long GetIntInputs(ILBlock entry) => GetInputsImpl(entry, _intPaths.Values);
public long GetVecInputs(ILBlock entry) => GetInputsImpl(entry, _vecPaths.Values);
private long GetInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
{
long inputs = 0;
RegisterMask oldValue = masks[blkIndex];
foreach (PathIo path in values)
{
inputs |= path.GetInputs(entry);
}
masks[blkIndex] = value;
return inputs;
return oldValue != value;
}
public long GetIntNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _intPaths.Values);
public long GetVecNotInputs(ILBlock entry) => GetNotInputsImpl(entry, _vecPaths.Values);
public RegisterMask GetInputs(BasicBlock entryBlock) => _inputs[entryBlock.Index];
private long GetNotInputsImpl(ILBlock entry, IEnumerable<PathIo> values)
{
//Returns a mask with registers that are written to
//before being read. Only those registers that are
//written in all paths, and is not read before being
//written to on those paths, should be set on the mask.
long mask = -1L;
public RegisterMask GetOutputs(BasicBlock block) => _outputs[block.Index];
foreach (PathIo path in values)
{
mask &= path.GetOutputs() & ~path.GetInputs(entry);
}
return mask;
}
public long GetIntOutputs(ILBlock block) => _intPaths[block].GetOutputs();
public long GetVecOutputs(ILBlock block) => _vecPaths[block].GetOutputs();
public static long ClearCallerSavedIntRegs(long mask, bool isAarch64)
public static long ClearCallerSavedIntRegs(long mask, ExecutionMode mode)
{
//TODO: ARM32 support.
if (isAarch64)
if (mode == ExecutionMode.Aarch64)
{
mask &= ~(CallerSavedIntRegistersMask | PStateNzcvFlagsMask);
}
@ -271,10 +162,10 @@ namespace ChocolArm64.Translation
return mask;
}
public static long ClearCallerSavedVecRegs(long mask, bool isAarch64)
public static long ClearCallerSavedVecRegs(long mask, ExecutionMode mode)
{
//TODO: ARM32 support.
if (isAarch64)
if (mode == ExecutionMode.Aarch64)
{
mask &= ~CallerSavedVecRegistersMask;
}

View file

@ -26,25 +26,15 @@ namespace ChocolArm64.Translation
public TranslationTier Tier { get; }
public long IntNiRegsMask { get; }
public long VecNiRegsMask { get; }
private bool _isWorthOptimizing;
private bool _rejit;
private int _callCount;
public TranslatedSub(
DynamicMethod method,
long intNiRegsMask,
long vecNiRegsMask,
TranslationTier tier,
bool isWorthOptimizing)
public TranslatedSub(DynamicMethod method, TranslationTier tier, bool rejit)
{
Method = method ?? throw new ArgumentNullException(nameof(method));;
IntNiRegsMask = intNiRegsMask;
VecNiRegsMask = vecNiRegsMask;
_isWorthOptimizing = isWorthOptimizing;
Tier = tier;
Method = method ?? throw new ArgumentNullException(nameof(method));;
Tier = tier;
_rejit = rejit;
}
static TranslatedSub()
@ -82,9 +72,9 @@ namespace ChocolArm64.Translation
return Delegate(threadState, memory);
}
public bool IsWorthOptimizing()
public bool Rejit()
{
if (!_isWorthOptimizing)
if (!_rejit)
{
return false;
}
@ -94,9 +84,8 @@ namespace ChocolArm64.Translation
return false;
}
//Only return true once, so that it is
//added to the queue only once.
_isWorthOptimizing = false;
//Only return true once, so that it is added to the queue only once.
_rejit = false;
return true;
}

View file

@ -0,0 +1,274 @@
using ChocolArm64.IntermediateRepresentation;
using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
using static ChocolArm64.State.RegisterConsts;
namespace ChocolArm64.Translation
{
class TranslatedSubBuilder
{
private ExecutionMode _mode;
private bool _isComplete;
private Dictionary<Register, int> _locals;
private RegisterUsage _regUsage;
public TranslatedSubBuilder(ExecutionMode mode, bool isComplete = false)
{
_mode = mode;
_isComplete = isComplete;
}
public TranslatedSub Build(BasicBlock[] blocks, string name, TranslationTier tier, bool rejit = true)
{
_regUsage = new RegisterUsage(blocks[0], blocks.Length);
DynamicMethod method = new DynamicMethod(name, typeof(long), TranslatedSub.FixedArgTypes);
TranslatedSub subroutine = new TranslatedSub(method, tier, rejit);
_locals = new Dictionary<Register, int>();
Dictionary<ILLabel, Label> labels = new Dictionary<ILLabel, Label>();
ILGenerator generator = method.GetILGenerator();
Label GetLabel(ILLabel label)
{
if (!labels.TryGetValue(label, out Label ilLabel))
{
ilLabel = generator.DefineLabel();
labels.Add(label, ilLabel);
}
return ilLabel;
}
foreach (BasicBlock block in blocks)
{
for (int index = 0; index < block.Count; index++)
{
Operation operation = block.GetOperation(index);
switch (operation.Type)
{
case OperationType.Call:
generator.Emit(OpCodes.Call, operation.GetArg<MethodInfo>(0));
break;
case OperationType.CallVirtual:
generator.Emit(OpCodes.Callvirt, operation.GetArg<MethodInfo>(0));
break;
case OperationType.IL:
generator.Emit(operation.GetArg<OpCode>(0));
break;
case OperationType.ILBranch:
generator.Emit(operation.GetArg<OpCode>(0), GetLabel(operation.GetArg<ILLabel>(1)));
break;
case OperationType.LoadArgument:
generator.EmitLdarg(operation.GetArg<int>(0));
break;
case OperationType.LoadConstant:
EmitLoadConstant(generator, operation.GetArg(0));
break;
case OperationType.LoadContext:
EmitLoadContext(generator, operation.Parent);
break;
case OperationType.LoadField:
generator.Emit(OpCodes.Ldfld, operation.GetArg<FieldInfo>(0));
break;
case OperationType.LoadLocal:
EmitLoadLocal(
generator,
operation.GetArg<int>(0),
operation.GetArg<RegisterType>(1),
operation.GetArg<RegisterSize>(2));
break;
case OperationType.MarkLabel:
generator.MarkLabel(GetLabel(operation.GetArg<ILLabel>(0)));
break;
case OperationType.StoreContext:
EmitStoreContext(generator, operation.Parent);
break;
case OperationType.StoreLocal:
EmitStoreLocal(
generator,
operation.GetArg<int>(0),
operation.GetArg<RegisterType>(1),
operation.GetArg<RegisterSize>(2));
break;
}
}
}
subroutine.PrepareMethod();
return subroutine;
}
private static void EmitLoadConstant(ILGenerator generator, object value)
{
switch (value)
{
case int valI4: generator.EmitLdc_I4(valI4); break;
case long valI8: generator.Emit(OpCodes.Ldc_I8, valI8); break;
case float valR4: generator.Emit(OpCodes.Ldc_R4, valR4); break;
case double valR8: generator.Emit(OpCodes.Ldc_R8, valR8); break;
}
}
private void EmitLoadContext(ILGenerator generator, BasicBlock block)
{
RegisterMask inputs = _regUsage.GetInputs(block);
long intInputs = inputs.IntMask;
long vecInputs = inputs.VecMask;
if (Optimizations.AssumeStrictAbiCompliance && _isComplete)
{
intInputs = RegisterUsage.ClearCallerSavedIntRegs(intInputs, _mode);
vecInputs = RegisterUsage.ClearCallerSavedVecRegs(vecInputs, _mode);
}
LoadLocals(generator, intInputs, RegisterType.Int);
LoadLocals(generator, vecInputs, RegisterType.Vector);
}
private void LoadLocals(ILGenerator generator, long inputs, RegisterType baseType)
{
for (int bit = 0; bit < 64; bit++)
{
long mask = 1L << bit;
if ((inputs & mask) != 0)
{
Register reg = GetRegFromBit(bit, baseType);
generator.EmitLdarg(TranslatedSub.StateArgIdx);
generator.Emit(OpCodes.Ldfld, reg.GetField());
generator.EmitStloc(GetLocalIndex(generator, reg));
}
}
}
private void EmitStoreContext(ILGenerator generator, BasicBlock block)
{
RegisterMask outputs = _regUsage.GetOutputs(block);
long intOutputs = outputs.IntMask;
long vecOutputs = outputs.VecMask;
if (Optimizations.AssumeStrictAbiCompliance && _isComplete)
{
intOutputs = RegisterUsage.ClearCallerSavedIntRegs(intOutputs, _mode);
vecOutputs = RegisterUsage.ClearCallerSavedVecRegs(vecOutputs, _mode);
}
StoreLocals(generator, intOutputs, RegisterType.Int);
StoreLocals(generator, vecOutputs, RegisterType.Vector);
}
private void StoreLocals(ILGenerator generator, long outputs, RegisterType baseType)
{
for (int bit = 0; bit < 64; bit++)
{
long mask = 1L << bit;
if ((outputs & mask) != 0)
{
Register reg = GetRegFromBit(bit, baseType);
generator.EmitLdarg(TranslatedSub.StateArgIdx);
generator.EmitLdloc(GetLocalIndex(generator, reg));
generator.Emit(OpCodes.Stfld, reg.GetField());
}
}
}
private void EmitLoadLocal(ILGenerator generator, int index, RegisterType type, RegisterSize size)
{
Register reg = new Register(index, type);
generator.EmitLdloc(GetLocalIndex(generator, reg));
if (type == RegisterType.Int && size == RegisterSize.Int32)
{
generator.Emit(OpCodes.Conv_U4);
}
}
private void EmitStoreLocal(ILGenerator generator, int index, RegisterType type, RegisterSize size)
{
Register reg = new Register(index, type);
if (type == RegisterType.Int && size == RegisterSize.Int32)
{
generator.Emit(OpCodes.Conv_U8);
}
generator.EmitStloc(GetLocalIndex(generator, reg));
}
private int GetLocalIndex(ILGenerator generator, Register reg)
{
if (!_locals.TryGetValue(reg, out int index))
{
generator.DeclareLocal(GetFieldType(reg.Type));
index = _locals.Count;
_locals.Add(reg, index);
}
return index;
}
private static Type GetFieldType(RegisterType regType)
{
switch (regType)
{
case RegisterType.Flag: return typeof(bool);
case RegisterType.Int: return typeof(ulong);
case RegisterType.Vector: return typeof(Vector128<float>);
}
throw new ArgumentException(nameof(regType));
}
private static Register GetRegFromBit(int bit, RegisterType baseType)
{
if (bit < RegsCount)
{
return new Register(bit, baseType);
}
else if (baseType == RegisterType.Int)
{
return new Register(bit & RegsMask, RegisterType.Flag);
}
else
{
throw new ArgumentOutOfRangeException(nameof(bit));
}
}
}
}

View file

@ -1,8 +1,10 @@
using ChocolArm64.Decoders;
using ChocolArm64.Events;
using ChocolArm64.IntermediateRepresentation;
using ChocolArm64.Memory;
using ChocolArm64.State;
using System;
using System.Reflection.Emit;
using System.Threading;
namespace ChocolArm64.Translation
@ -82,7 +84,7 @@ namespace ChocolArm64.Translation
sub = TranslateLowCq(position, state.GetExecutionMode());
}
if (sub.IsWorthOptimizing())
if (sub.Rejit())
{
bool isComplete = cs == CallType.Call ||
cs == CallType.VirtualCall;
@ -124,58 +126,125 @@ namespace ChocolArm64.Translation
private TranslatedSub TranslateLowCq(long position, ExecutionMode mode)
{
Block block = Decoder.DecodeBasicBlock(_memory, position, mode);
Block[] blocks = Decoder.DecodeBasicBlock(_memory, (ulong)position, mode);
ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier0, block);
ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier0);
string subName = GetSubroutineName(position);
BasicBlock[] bbs = EmitAndGetBlocks(context, blocks);
bool isAarch64 = mode == ExecutionMode.Aarch64;
TranslatedSubBuilder builder = new TranslatedSubBuilder(mode);
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName, isAarch64);
string name = GetSubroutineName(position);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0, isWorthOptimizing: true);
TranslatedSub subroutine = builder.Build(bbs, name, TranslationTier.Tier0);
return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count);
return _cache.GetOrAdd(position, subroutine, GetOpsCount(bbs));
}
private TranslatedSub TranslateHighCq(long position, ExecutionMode mode, bool isComplete)
{
Block graph = Decoder.DecodeSubroutine(_memory, position, mode);
Block[] blocks = Decoder.DecodeSubroutine(_memory, (ulong)position, mode);
ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier1, graph);
ILEmitterCtx context = new ILEmitterCtx(_memory, _cache, _queue, TranslationTier.Tier1);
ILBlock[] ilBlocks = context.GetILBlocks();
if (blocks[0].Address != (ulong)position)
{
context.Emit(OpCodes.Br, context.GetLabel(position));
}
string subName = GetSubroutineName(position);
bool isAarch64 = mode == ExecutionMode.Aarch64;
BasicBlock[] bbs = EmitAndGetBlocks(context, blocks);
isComplete &= !context.HasIndirectJump;
ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName, isAarch64, isComplete);
TranslatedSubBuilder builder = new TranslatedSubBuilder(mode, isComplete);
TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1, context.HasSlowCall);
string name = GetSubroutineName(position);
int ilOpCount = 0;
foreach (ILBlock ilBlock in ilBlocks)
{
ilOpCount += ilBlock.Count;
}
TranslatedSub subroutine = builder.Build(bbs, name, TranslationTier.Tier1, context.HasSlowCall);
ForceAheadOfTimeCompilation(subroutine);
_cache.AddOrUpdate(position, subroutine, ilOpCount);
_cache.AddOrUpdate(position, subroutine, GetOpsCount(bbs));
return subroutine;
}
private string GetSubroutineName(long position)
private static BasicBlock[] EmitAndGetBlocks(ILEmitterCtx context, Block[] blocks)
{
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
Block block = blocks[blkIndex];
context.CurrBlock = block;
context.MarkLabel(context.GetLabel((long)block.Address));
for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++)
{
OpCode64 opCode = block.OpCodes[opcIndex];
context.CurrOp = opCode;
bool isLastOp = opcIndex == block.OpCodes.Count - 1;
if (isLastOp && block.Branch != null && block.Branch.Address <= block.Address)
{
context.EmitSynchronization();
}
ILLabel lblPredicateSkip = null;
if (opCode is OpCode32 op && op.Cond < Condition.Al)
{
lblPredicateSkip = new ILLabel();
context.EmitCondBranch(lblPredicateSkip, op.Cond.Invert());
}
opCode.Emitter(context);
if (lblPredicateSkip != null)
{
context.MarkLabel(lblPredicateSkip);
context.ResetBlockStateForPredicatedOp();
//If this is the last op on the block, and there's no "next" block
//after this one, then we have to return right now, with the address
//of the next instruction to be executed (in the case that the condition
//is false, and the branch was not taken, as all basic blocks should end
//with some kind of branch).
if (isLastOp && block.Next == null)
{
context.EmitStoreContext();
context.EmitLdc_I8(opCode.Position + opCode.OpCodeSizeInBytes);
context.Emit(OpCodes.Ret);
}
}
}
}
return context.GetBlocks();
}
private static string GetSubroutineName(long position)
{
return $"Sub{position:x16}";
}
private static int GetOpsCount(BasicBlock[] blocks)
{
int opCount = 0;
foreach (BasicBlock block in blocks)
{
opCount += block.Count;
}
return opCount;
}
private void ForceAheadOfTimeCompilation(TranslatedSub subroutine)
{
subroutine.Execute(_dummyThreadState, null);

View file

@ -1,10 +0,0 @@
namespace ChocolArm64.Translation
{
enum VarType
{
Arg,
Flag,
Int,
Vector
}
}