From a694420d11ef74e4f0bf473be2b6f64635bc89c7 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Mon, 4 Feb 2019 18:26:05 -0300 Subject: [PATCH] Implement speculative translation on the CPU (#515) * Implement speculative translation on the cpu, and change the way how branches to unknown or untranslated addresses works * Port t0opt changes and other cleanups * Change namespace from translation related classes to ChocolArm64.Translation, other minor tweaks * Fix typo * Translate higher quality code for indirect jumps aswell, and on some cases that were missed when lower quality (tier 0) code was available * Remove debug print * Remove direct argument passing optimization, and enable tail calls for BR instructions * Call delegates directly with Callvirt rather than calling Execute, do not emit calls for tier 0 code * Remove unused property * Rename argument on ArmSubroutine delegate --- ChocolArm64/CpuThread.cs | 1 + ChocolArm64/Decoders/Decoder.cs | 73 +++++-- ChocolArm64/Instructions/InstEmitFlow.cs | 30 ++- .../Instructions/InstEmitFlowHelper.cs | 122 +++++++++++- ChocolArm64/State/CpuThreadState.cs | 3 + ChocolArm64/TranslatedSub.cs | 140 ------------- ChocolArm64/TranslatedSubType.cs | 8 - ChocolArm64/Translation/ILEmitterCtx.cs | 46 +++-- ChocolArm64/Translation/ILMethodBuilder.cs | 54 +---- ChocolArm64/Translation/ILOpCodeCall.cs | 11 +- ChocolArm64/Translation/ILOpCodeLoadField.cs | 20 ++ ChocolArm64/Translation/TranslatedSub.cs | 65 ++++++ ChocolArm64/Translation/TranslationTier.cs | 11 + ChocolArm64/Translation/Translator.cs | 188 ++++++++++++++++++ .../{ => Translation}/TranslatorCache.cs | 27 ++- ChocolArm64/Translation/TranslatorQueue.cs | 83 ++++++++ .../Translation/TranslatorQueueItem.cs | 20 ++ ChocolArm64/Translator.cs | 120 ----------- Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs | 2 +- Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs | 3 +- Ryujinx.Tests/Cpu/CpuTest.cs | 5 +- 21 files changed, 656 insertions(+), 376 deletions(-) delete mode 100644 ChocolArm64/TranslatedSub.cs delete mode 100644 ChocolArm64/TranslatedSubType.cs create mode 100644 ChocolArm64/Translation/ILOpCodeLoadField.cs create mode 100644 ChocolArm64/Translation/TranslatedSub.cs create mode 100644 ChocolArm64/Translation/TranslationTier.cs create mode 100644 ChocolArm64/Translation/Translator.cs rename ChocolArm64/{ => Translation}/TranslatorCache.cs (87%) create mode 100644 ChocolArm64/Translation/TranslatorQueue.cs create mode 100644 ChocolArm64/Translation/TranslatorQueueItem.cs delete mode 100644 ChocolArm64/Translator.cs diff --git a/ChocolArm64/CpuThread.cs b/ChocolArm64/CpuThread.cs index 87b21395..6cd34f81 100644 --- a/ChocolArm64/CpuThread.cs +++ b/ChocolArm64/CpuThread.cs @@ -1,5 +1,6 @@ using ChocolArm64.Memory; using ChocolArm64.State; +using ChocolArm64.Translation; using System; using System.Threading; diff --git a/ChocolArm64/Decoders/Decoder.cs b/ChocolArm64/Decoders/Decoder.cs index 2b195412..6b5d79f0 100644 --- a/ChocolArm64/Decoders/Decoder.cs +++ b/ChocolArm64/Decoders/Decoder.cs @@ -25,14 +25,53 @@ namespace ChocolArm64.Decoders FillBlock(memory, mode, block); + OpCode64 lastOp = block.GetLastOp(); + + if (IsBranch(lastOp) && !IsCall(lastOp) && lastOp is IOpCodeBImm op) + { + //It's possible that the branch on this block lands on the middle of the block. + //This is more common on tight loops. In this case, we can improve the codegen + //a bit by changing the CFG and either making the branch point to the same block + //(which indicates that the block is a loop that jumps back to the start), and the + //other possible case is a jump somewhere on the middle of the block, which is + //also a loop, but in this case we need to split the block in half. + if (op.Imm == start) + { + block.Branch = block; + } + else if ((ulong)op.Imm > (ulong)start && + (ulong)op.Imm < (ulong)block.EndPosition) + { + Block botBlock = new Block(op.Imm); + + int botBlockIndex = 0; + + long currPosition = start; + + while ((ulong)currPosition < (ulong)op.Imm) + { + currPosition += block.OpCodes[botBlockIndex++].OpCodeSizeInBytes; + } + + botBlock.OpCodes.AddRange(block.OpCodes); + + botBlock.OpCodes.RemoveRange(0, botBlockIndex); + + block.OpCodes.RemoveRange(botBlockIndex, block.OpCodes.Count - botBlockIndex); + + botBlock.EndPosition = block.EndPosition; + + block.EndPosition = op.Imm; + + botBlock.Branch = botBlock; + block.Next = botBlock; + } + } + return block; } - public static Block DecodeSubroutine( - TranslatorCache cache, - MemoryManager memory, - long start, - ExecutionMode mode) + public static Block DecodeSubroutine(MemoryManager memory, long start, ExecutionMode mode) { Dictionary visited = new Dictionary(); Dictionary visitedEnd = new Dictionary(); @@ -67,23 +106,16 @@ namespace ChocolArm64.Decoders //(except BL/BLR that are sub calls) or end of executable, Next is null. if (current.OpCodes.Count > 0) { - bool hasCachedSub = false; - OpCode64 lastOp = current.GetLastOp(); - if (lastOp is IOpCodeBImm op) + bool isCall = IsCall(lastOp); + + if (lastOp is IOpCodeBImm op && !isCall) { - if (op.Emitter == InstEmit.Bl) - { - hasCachedSub = cache.HasSubroutine(op.Imm); - } - else - { - current.Branch = Enqueue(op.Imm); - } + current.Branch = Enqueue(op.Imm); } - if (!IsUnconditionalBranch(lastOp) || hasCachedSub) + if (!IsUnconditionalBranch(lastOp) || isCall) { current.Next = Enqueue(current.EndPosition); } @@ -223,6 +255,13 @@ namespace ChocolArm64.Decoders opCode is IOpCode32BReg; } + private static bool IsCall(OpCode64 opCode) + { + //TODO (CQ): ARM32 support. + return opCode.Emitter == InstEmit.Bl || + opCode.Emitter == InstEmit.Blr; + } + private static bool IsException(OpCode64 opCode) { return opCode.Emitter == InstEmit.Brk || diff --git a/ChocolArm64/Instructions/InstEmitFlow.cs b/ChocolArm64/Instructions/InstEmitFlow.cs index 181c6a04..a842dca9 100644 --- a/ChocolArm64/Instructions/InstEmitFlow.cs +++ b/ChocolArm64/Instructions/InstEmitFlow.cs @@ -3,6 +3,8 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System.Reflection.Emit; +using static ChocolArm64.Instructions.InstEmitFlowHelper; + namespace ChocolArm64.Instructions { static partial class InstEmit @@ -39,7 +41,7 @@ namespace ChocolArm64.Instructions context.EmitStint(RegisterAlias.Lr); context.EmitStoreState(); - InstEmitFlowHelper.EmitCall(context, op.Imm); + EmitCall(context, op.Imm); } public static void Blr(ILEmitterCtx context) @@ -51,7 +53,7 @@ namespace ChocolArm64.Instructions context.EmitStint(RegisterAlias.Lr); context.EmitStoreState(); - context.Emit(OpCodes.Ret); + EmitVirtualCall(context); } public static void Br(ILEmitterCtx context) @@ -61,7 +63,7 @@ namespace ChocolArm64.Instructions context.EmitStoreState(); context.EmitLdintzr(op.Rn); - context.Emit(OpCodes.Ret); + EmitVirtualJump(context); } public static void Cbnz(ILEmitterCtx context) => EmitCb(context, OpCodes.Bne_Un); @@ -106,10 +108,17 @@ namespace ChocolArm64.Instructions { OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; - if (context.CurrBlock.Next != null && - context.CurrBlock.Branch != null) + if (context.CurrBlock.Branch != null) { context.EmitCondBranch(context.GetLabel(op.Imm), cond); + + if (context.CurrBlock.Next == null) + { + context.EmitStoreState(); + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + } } else { @@ -135,10 +144,17 @@ namespace ChocolArm64.Instructions { OpCodeBImm64 op = (OpCodeBImm64)context.CurrOp; - if (context.CurrBlock.Next != null && - context.CurrBlock.Branch != null) + if (context.CurrBlock.Branch != null) { context.Emit(ilOp, context.GetLabel(op.Imm)); + + if (context.CurrBlock.Next == null) + { + context.EmitStoreState(); + context.EmitLdc_I8(op.Position + 4); + + context.Emit(OpCodes.Ret); + } } else { diff --git a/ChocolArm64/Instructions/InstEmitFlowHelper.cs b/ChocolArm64/Instructions/InstEmitFlowHelper.cs index cf093bb3..e93ef426 100644 --- a/ChocolArm64/Instructions/InstEmitFlowHelper.cs +++ b/ChocolArm64/Instructions/InstEmitFlowHelper.cs @@ -1,4 +1,6 @@ +using ChocolArm64.State; using ChocolArm64.Translation; +using System.Reflection; using System.Reflection.Emit; namespace ChocolArm64.Instructions @@ -7,12 +9,120 @@ namespace ChocolArm64.Instructions { public static void EmitCall(ILEmitterCtx context, long imm) { - if (context.TryOptEmitSubroutineCall()) + if (context.Tier == TranslationTier.Tier0) + { + context.TranslateAhead(imm); + + context.EmitLdc_I8(imm); + + context.Emit(OpCodes.Ret); + + return; + } + + if (!context.TryOptEmitSubroutineCall()) + { + context.TranslateAhead(imm); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), + BindingFlags.Instance | + BindingFlags.NonPublic)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdc_I8(imm); + + context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateSubroutine)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + context.EmitCall(typeof(TranslatedSub), nameof(TranslatedSub.Execute)); + } + + EmitContinueOrReturnCheck(context); + } + + public static void EmitVirtualCall(ILEmitterCtx context) + { + EmitVirtualCallOrJump(context, isJump: false); + } + + public static void EmitVirtualJump(ILEmitterCtx context) + { + EmitVirtualCallOrJump(context, isJump: true); + } + + private static void EmitVirtualCallOrJump(ILEmitterCtx context, bool isJump) + { + if (context.Tier == TranslationTier.Tier0) + { + context.Emit(OpCodes.Dup); + + context.EmitSttmp(); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), + BindingFlags.Instance | + BindingFlags.NonPublic)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdtmp(); + + context.EmitPrivateCall(typeof(Translator), nameof(Translator.TranslateVirtualSubroutine)); + + context.Emit(OpCodes.Ret); + } + else + { + context.EmitSttmp(); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitFieldLoad(typeof(CpuThreadState).GetField(nameof(CpuThreadState.CurrentTranslator), + BindingFlags.Instance | + BindingFlags.NonPublic)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdtmp(); + + context.EmitPrivateCall(typeof(Translator), nameof(Translator.GetOrTranslateVirtualSubroutine)); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + context.EmitLdarg(TranslatedSub.MemoryArgIdx); + + if (isJump) + { + //The tail prefix allows the JIT to jump to the next function, + //while releasing the stack space used by the current one. + //This is ideal for BR ARM instructions, which are + //basically indirect tail calls. + context.Emit(OpCodes.Tailcall); + } + + MethodInfo mthdInfo = typeof(ArmSubroutine).GetMethod("Invoke"); + + context.EmitCall(mthdInfo, isVirtual: true); + + if (!isJump) + { + EmitContinueOrReturnCheck(context); + } + else + { + context.Emit(OpCodes.Ret); + } + } + } + + private static void EmitContinueOrReturnCheck(ILEmitterCtx context) + { + //Note: The return value of the called method will be placed + //at the Stack, the return value is always a Int64 with the + //return address of the function. We check if the address is + //correct, if it isn't we keep returning until we reach the dispatcher. + if (context.CurrBlock.Next != null) { - //Note: the return value of the called method will be placed - //at the Stack, the return value is always a Int64 with the - //return address of the function. We check if the address is - //correct, if it isn't we keep returning until we reach the dispatcher. context.Emit(OpCodes.Dup); context.EmitLdc_I8(context.CurrOp.Position + 4); @@ -30,8 +140,6 @@ namespace ChocolArm64.Instructions } else { - context.EmitLdc_I8(imm); - context.Emit(OpCodes.Ret); } } diff --git a/ChocolArm64/State/CpuThreadState.cs b/ChocolArm64/State/CpuThreadState.cs index 12edc429..abec60bb 100644 --- a/ChocolArm64/State/CpuThreadState.cs +++ b/ChocolArm64/State/CpuThreadState.cs @@ -1,4 +1,5 @@ using ChocolArm64.Events; +using ChocolArm64.Translation; using System; using System.Diagnostics; using System.Runtime.CompilerServices; @@ -82,6 +83,8 @@ namespace ChocolArm64.State private static double _hostTickFreq; + internal Translator CurrentTranslator; + static CpuThreadState() { _hostTickFreq = 1.0 / Stopwatch.Frequency; diff --git a/ChocolArm64/TranslatedSub.cs b/ChocolArm64/TranslatedSub.cs deleted file mode 100644 index 653abcca..00000000 --- a/ChocolArm64/TranslatedSub.cs +++ /dev/null @@ -1,140 +0,0 @@ -using ChocolArm64.Memory; -using ChocolArm64.State; -using System; -using System.Collections.Generic; -using System.Collections.ObjectModel; -using System.Linq; -using System.Reflection; -using System.Reflection.Emit; - -namespace ChocolArm64 -{ - class TranslatedSub - { - private delegate long Aa64Subroutine(CpuThreadState register, MemoryManager memory); - - private const int MinCallCountForReJit = 250; - - private Aa64Subroutine _execDelegate; - - public static int StateArgIdx { get; private set; } - public static int MemoryArgIdx { get; private set; } - - public static Type[] FixedArgTypes { get; private set; } - - public DynamicMethod Method { get; private set; } - - public ReadOnlyCollection SubArgs { get; private set; } - - private HashSet _callers; - - private TranslatedSubType _type; - - private int _callCount; - - private bool _needsReJit; - - public TranslatedSub(DynamicMethod method, List subArgs) - { - Method = method ?? throw new ArgumentNullException(nameof(method));; - SubArgs = subArgs?.AsReadOnly() ?? throw new ArgumentNullException(nameof(subArgs)); - - _callers = new HashSet(); - - PrepareDelegate(); - } - - static TranslatedSub() - { - MethodInfo mthdInfo = typeof(Aa64Subroutine).GetMethod("Invoke"); - - ParameterInfo[] Params = mthdInfo.GetParameters(); - - FixedArgTypes = new Type[Params.Length]; - - for (int index = 0; index < Params.Length; index++) - { - Type paramType = Params[index].ParameterType; - - FixedArgTypes[index] = paramType; - - if (paramType == typeof(CpuThreadState)) - { - StateArgIdx = index; - } - else if (paramType == typeof(MemoryManager)) - { - MemoryArgIdx = index; - } - } - } - - private void PrepareDelegate() - { - string name = $"{Method.Name}_Dispatch"; - - DynamicMethod mthd = new DynamicMethod(name, typeof(long), FixedArgTypes); - - ILGenerator generator = mthd.GetILGenerator(); - - generator.EmitLdargSeq(FixedArgTypes.Length); - - foreach (Register reg in SubArgs) - { - generator.EmitLdarg(StateArgIdx); - - generator.Emit(OpCodes.Ldfld, reg.GetField()); - } - - generator.Emit(OpCodes.Call, Method); - generator.Emit(OpCodes.Ret); - - _execDelegate = (Aa64Subroutine)mthd.CreateDelegate(typeof(Aa64Subroutine)); - } - - public bool ShouldReJit() - { - if (_needsReJit && _callCount < MinCallCountForReJit) - { - _callCount++; - - return false; - } - - return _needsReJit; - } - - public long Execute(CpuThreadState threadState, MemoryManager memory) - { - return _execDelegate(threadState, memory); - } - - public void AddCaller(long position) - { - lock (_callers) - { - _callers.Add(position); - } - } - - public long[] GetCallerPositions() - { - lock (_callers) - { - return _callers.ToArray(); - } - } - - public void SetType(TranslatedSubType type) - { - _type = type; - - if (type == TranslatedSubType.SubTier0) - { - _needsReJit = true; - } - } - - public void MarkForReJit() => _needsReJit = true; - } -} \ No newline at end of file diff --git a/ChocolArm64/TranslatedSubType.cs b/ChocolArm64/TranslatedSubType.cs deleted file mode 100644 index f57aea94..00000000 --- a/ChocolArm64/TranslatedSubType.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace ChocolArm64 -{ - enum TranslatedSubType - { - SubTier0, - SubTier1 - } -} \ No newline at end of file diff --git a/ChocolArm64/Translation/ILEmitterCtx.cs b/ChocolArm64/Translation/ILEmitterCtx.cs index b5ebff75..ef63e60c 100644 --- a/ChocolArm64/Translation/ILEmitterCtx.cs +++ b/ChocolArm64/Translation/ILEmitterCtx.cs @@ -11,6 +11,7 @@ namespace ChocolArm64.Translation class ILEmitterCtx { private TranslatorCache _cache; + private TranslatorQueue _queue; private Dictionary _labels; @@ -23,6 +24,8 @@ namespace ChocolArm64.Translation public Block CurrBlock => _currBlock; public OpCode64 CurrOp => _currBlock?.OpCodes[_opcIndex]; + public TranslationTier Tier { get; } + public Aarch32Mode Mode { get; } = Aarch32Mode.User; //TODO private Dictionary _visitedBlocks; @@ -47,11 +50,14 @@ namespace ChocolArm64.Translation private const int VecTmp1Index = -5; private const int VecTmp2Index = -6; - public ILEmitterCtx(TranslatorCache cache, Block graph) + public ILEmitterCtx(TranslatorCache cache, TranslatorQueue queue, TranslationTier tier, Block graph) { _cache = cache ?? throw new ArgumentNullException(nameof(cache)); + _queue = queue ?? throw new ArgumentNullException(nameof(queue)); _currBlock = graph ?? throw new ArgumentNullException(nameof(graph)); + Tier = tier; + _labels = new Dictionary(); _visitedBlocks = new Dictionary(); @@ -243,6 +249,16 @@ namespace ChocolArm64.Translation return new ILBlock(); } + public void TranslateAhead(long position, ExecutionMode mode = ExecutionMode.Aarch64) + { + if (_cache.TryGetSubroutine(position, out TranslatedSub sub) && sub.Tier != TranslationTier.Tier0) + { + return; + } + + _queue.Enqueue(new TranslatorQueueItem(position, mode, TranslationTier.Tier1)); + } + public bool TryOptEmitSubroutineCall() { if (_currBlock.Next == null) @@ -265,20 +281,8 @@ namespace ChocolArm64.Translation EmitLdarg(index); } - foreach (Register reg in subroutine.SubArgs) - { - switch (reg.Type) - { - case RegisterType.Flag: Ldloc(reg.Index, IoType.Flag); break; - case RegisterType.Int: Ldloc(reg.Index, IoType.Int); break; - case RegisterType.Vector: Ldloc(reg.Index, IoType.Vector); break; - } - } - EmitCall(subroutine.Method); - subroutine.AddCaller(_subPosition); - return true; } @@ -463,7 +467,12 @@ namespace ChocolArm64.Translation _ilBlock.Add(new ILOpCodeBranch(ilOp, label)); } - public void Emit(string text) + public void EmitFieldLoad(FieldInfo info) + { + _ilBlock.Add(new ILOpCodeLoadField(info)); + } + + public void EmitPrint(string text) { _ilBlock.Add(new ILOpCodeLog(text)); } @@ -618,14 +627,9 @@ namespace ChocolArm64.Translation EmitCall(objType.GetMethod(mthdName, BindingFlags.Instance | BindingFlags.NonPublic)); } - public void EmitCall(MethodInfo mthdInfo) + public void EmitCall(MethodInfo mthdInfo, bool isVirtual = false) { - if (mthdInfo == null) - { - throw new ArgumentNullException(nameof(mthdInfo)); - } - - _ilBlock.Add(new ILOpCodeCall(mthdInfo)); + _ilBlock.Add(new ILOpCodeCall(mthdInfo ?? throw new ArgumentNullException(nameof(mthdInfo)), isVirtual)); } public void EmitLdc_I(long value) diff --git a/ChocolArm64/Translation/ILMethodBuilder.cs b/ChocolArm64/Translation/ILMethodBuilder.cs index 70d9a2db..892f831b 100644 --- a/ChocolArm64/Translation/ILMethodBuilder.cs +++ b/ChocolArm64/Translation/ILMethodBuilder.cs @@ -26,74 +26,32 @@ namespace ChocolArm64.Translation _subName = subName; } - public TranslatedSub GetSubroutine() + public TranslatedSub GetSubroutine(TranslationTier tier) { LocalAlloc = new LocalAlloc(_ilBlocks, _ilBlocks[0]); - List subArgs = new List(); - - void SetArgs(long inputs, RegisterType baseType) - { - for (int bit = 0; bit < 64; bit++) - { - long mask = 1L << bit; - - if ((inputs & mask) != 0) - { - subArgs.Add(GetRegFromBit(bit, baseType)); - } - } - } - - SetArgs(LocalAlloc.GetIntInputs(_ilBlocks[0]), RegisterType.Int); - SetArgs(LocalAlloc.GetVecInputs(_ilBlocks[0]), RegisterType.Vector); - - DynamicMethod method = new DynamicMethod(_subName, typeof(long), GetArgumentTypes(subArgs)); + DynamicMethod method = new DynamicMethod(_subName, typeof(long), TranslatedSub.FixedArgTypes); Generator = method.GetILGenerator(); - TranslatedSub subroutine = new TranslatedSub(method, subArgs); - - int argsStart = TranslatedSub.FixedArgTypes.Length; + TranslatedSub subroutine = new TranslatedSub(method, tier); _locals = new Dictionary(); _localsCount = 0; - for (int index = 0; index < subroutine.SubArgs.Count; index++) - { - Register reg = subroutine.SubArgs[index]; - - Generator.EmitLdarg(index + argsStart); - Generator.EmitStloc(GetLocalIndex(reg)); - } + new ILOpCodeLoadState(_ilBlocks[0]).Emit(this); foreach (ILBlock ilBlock in _ilBlocks) { ilBlock.Emit(this); } + subroutine.PrepareMethod(); + return subroutine; } - private Type[] GetArgumentTypes(IList Params) - { - Type[] fixedArgs = TranslatedSub.FixedArgTypes; - - Type[] output = new Type[Params.Count + fixedArgs.Length]; - - fixedArgs.CopyTo(output, 0); - - int typeIdx = fixedArgs.Length; - - for (int index = 0; index < Params.Count; index++) - { - output[typeIdx++] = GetFieldType(Params[index].Type); - } - - return output; - } - public int GetLocalIndex(Register reg) { if (!_locals.TryGetValue(reg, out int index)) diff --git a/ChocolArm64/Translation/ILOpCodeCall.cs b/ChocolArm64/Translation/ILOpCodeCall.cs index 8486a791..c046aeeb 100644 --- a/ChocolArm64/Translation/ILOpCodeCall.cs +++ b/ChocolArm64/Translation/ILOpCodeCall.cs @@ -5,16 +5,19 @@ namespace ChocolArm64.Translation { struct ILOpCodeCall : IILEmit { - private MethodInfo _mthdInfo; + public MethodInfo Info { get; private set; } - public ILOpCodeCall(MethodInfo mthdInfo) + public bool IsVirtual { get; private set; } + + public ILOpCodeCall(MethodInfo info, bool isVirtual) { - _mthdInfo = mthdInfo; + Info = info; + IsVirtual = isVirtual; } public void Emit(ILMethodBuilder context) { - context.Generator.Emit(OpCodes.Call, _mthdInfo); + context.Generator.Emit(IsVirtual ? OpCodes.Callvirt : OpCodes.Call, Info); } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/ILOpCodeLoadField.cs b/ChocolArm64/Translation/ILOpCodeLoadField.cs new file mode 100644 index 00000000..abcd37c3 --- /dev/null +++ b/ChocolArm64/Translation/ILOpCodeLoadField.cs @@ -0,0 +1,20 @@ +using System.Reflection; +using System.Reflection.Emit; + +namespace ChocolArm64.Translation +{ + struct ILOpCodeLoadField : IILEmit + { + public FieldInfo Info { get; private set; } + + public ILOpCodeLoadField(FieldInfo info) + { + Info = info; + } + + public void Emit(ILMethodBuilder context) + { + context.Generator.Emit(OpCodes.Ldfld, Info); + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Translation/TranslatedSub.cs b/ChocolArm64/Translation/TranslatedSub.cs new file mode 100644 index 00000000..65d70351 --- /dev/null +++ b/ChocolArm64/Translation/TranslatedSub.cs @@ -0,0 +1,65 @@ +using ChocolArm64.Memory; +using ChocolArm64.State; +using System; +using System.Reflection; +using System.Reflection.Emit; + +namespace ChocolArm64.Translation +{ + delegate long ArmSubroutine(CpuThreadState state, MemoryManager memory); + + class TranslatedSub + { + public ArmSubroutine Delegate { get; private set; } + + public static int StateArgIdx { get; private set; } + public static int MemoryArgIdx { get; private set; } + + public static Type[] FixedArgTypes { get; private set; } + + public DynamicMethod Method { get; private set; } + + public TranslationTier Tier { get; private set; } + + public TranslatedSub(DynamicMethod method, TranslationTier tier) + { + Method = method ?? throw new ArgumentNullException(nameof(method));; + Tier = tier; + } + + static TranslatedSub() + { + MethodInfo mthdInfo = typeof(ArmSubroutine).GetMethod("Invoke"); + + ParameterInfo[] Params = mthdInfo.GetParameters(); + + FixedArgTypes = new Type[Params.Length]; + + for (int index = 0; index < Params.Length; index++) + { + Type argType = Params[index].ParameterType; + + FixedArgTypes[index] = argType; + + if (argType == typeof(CpuThreadState)) + { + StateArgIdx = index; + } + else if (argType == typeof(MemoryManager)) + { + MemoryArgIdx = index; + } + } + } + + public void PrepareMethod() + { + Delegate = (ArmSubroutine)Method.CreateDelegate(typeof(ArmSubroutine)); + } + + public long Execute(CpuThreadState threadState, MemoryManager memory) + { + return Delegate(threadState, memory); + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Translation/TranslationTier.cs b/ChocolArm64/Translation/TranslationTier.cs new file mode 100644 index 00000000..13afd9c5 --- /dev/null +++ b/ChocolArm64/Translation/TranslationTier.cs @@ -0,0 +1,11 @@ +namespace ChocolArm64.Translation +{ + enum TranslationTier + { + Tier0, + Tier1, + Tier2, + + Count + } +} \ No newline at end of file diff --git a/ChocolArm64/Translation/Translator.cs b/ChocolArm64/Translation/Translator.cs new file mode 100644 index 00000000..7f7df6e5 --- /dev/null +++ b/ChocolArm64/Translation/Translator.cs @@ -0,0 +1,188 @@ +using ChocolArm64.Decoders; +using ChocolArm64.Events; +using ChocolArm64.Memory; +using ChocolArm64.State; +using System; +using System.Threading; + +namespace ChocolArm64.Translation +{ + public class Translator + { + private MemoryManager _memory; + + private CpuThreadState _dummyThreadState; + + private TranslatorCache _cache; + private TranslatorQueue _queue; + + private Thread _backgroundTranslator; + + public event EventHandler CpuTrace; + + public bool EnableCpuTrace { get; set; } + + private volatile int _threadCount; + + public Translator(MemoryManager memory) + { + _memory = memory; + + _dummyThreadState = new CpuThreadState(); + + _dummyThreadState.Running = false; + + _cache = new TranslatorCache(); + _queue = new TranslatorQueue(); + } + + internal void ExecuteSubroutine(CpuThread thread, long position) + { + if (Interlocked.Increment(ref _threadCount) == 1) + { + _backgroundTranslator = new Thread(TranslateQueuedSubs); + _backgroundTranslator.Start(); + } + + ExecuteSubroutine(thread.ThreadState, position); + + if (Interlocked.Decrement(ref _threadCount) == 0) + { + _queue.ForceSignal(); + } + } + + private void ExecuteSubroutine(CpuThreadState state, long position) + { + state.CurrentTranslator = this; + + do + { + if (EnableCpuTrace) + { + CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); + } + + TranslatedSub subroutine = GetOrTranslateSubroutine(state, position); + + position = subroutine.Execute(state, _memory); + } + while (position != 0 && state.Running); + + state.CurrentTranslator = null; + } + + internal void TranslateVirtualSubroutine(CpuThreadState state, long position) + { + if (!_cache.TryGetSubroutine(position, out TranslatedSub sub) || sub.Tier == TranslationTier.Tier0) + { + _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); + } + } + + internal ArmSubroutine GetOrTranslateVirtualSubroutine(CpuThreadState state, long position) + { + if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) + { + sub = TranslateLowCq(position, state.GetExecutionMode()); + } + + if (sub.Tier == TranslationTier.Tier0) + { + _queue.Enqueue(new TranslatorQueueItem(position, state.GetExecutionMode(), TranslationTier.Tier1)); + } + + return sub.Delegate; + } + + internal TranslatedSub GetOrTranslateSubroutine(CpuThreadState state, long position) + { + if (!_cache.TryGetSubroutine(position, out TranslatedSub subroutine)) + { + subroutine = TranslateLowCq(position, state.GetExecutionMode()); + } + + return subroutine; + } + + private void TranslateQueuedSubs() + { + while (_threadCount != 0) + { + if (_queue.TryDequeue(out TranslatorQueueItem item)) + { + bool isCached = _cache.TryGetSubroutine(item.Position, out TranslatedSub sub); + + if (isCached && item.Tier <= sub.Tier) + { + continue; + } + + if (item.Tier == TranslationTier.Tier0) + { + TranslateLowCq(item.Position, item.Mode); + } + else + { + TranslateHighCq(item.Position, item.Mode); + } + } + else + { + _queue.WaitForItems(); + } + } + } + + private TranslatedSub TranslateLowCq(long position, ExecutionMode mode) + { + Block block = Decoder.DecodeBasicBlock(_memory, position, mode); + + ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier0, block); + + string subName = GetSubroutineName(position); + + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier0); + + return _cache.GetOrAdd(position, subroutine, block.OpCodes.Count); + } + + private void TranslateHighCq(long position, ExecutionMode mode) + { + Block graph = Decoder.DecodeSubroutine(_memory, position, mode); + + ILEmitterCtx context = new ILEmitterCtx(_cache, _queue, TranslationTier.Tier1, graph); + + ILBlock[] ilBlocks = context.GetILBlocks(); + + string subName = GetSubroutineName(position); + + ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); + + TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(TranslationTier.Tier1); + + int ilOpCount = 0; + + foreach (ILBlock ilBlock in ilBlocks) + { + ilOpCount += ilBlock.Count; + } + + _cache.AddOrUpdate(position, subroutine, ilOpCount); + + ForceAheadOfTimeCompilation(subroutine); + } + + private string GetSubroutineName(long position) + { + return $"Sub{position:x16}"; + } + + private void ForceAheadOfTimeCompilation(TranslatedSub subroutine) + { + subroutine.Execute(_dummyThreadState, null); + } + } +} \ No newline at end of file diff --git a/ChocolArm64/TranslatorCache.cs b/ChocolArm64/Translation/TranslatorCache.cs similarity index 87% rename from ChocolArm64/TranslatorCache.cs rename to ChocolArm64/Translation/TranslatorCache.cs index 9903ccaa..d10d6757 100644 --- a/ChocolArm64/TranslatorCache.cs +++ b/ChocolArm64/Translation/TranslatorCache.cs @@ -4,7 +4,7 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Threading; -namespace ChocolArm64 +namespace ChocolArm64.Translation { class TranslatorCache { @@ -58,6 +58,31 @@ namespace ChocolArm64 _sortedCache = new LinkedList(); } + public TranslatedSub GetOrAdd(long position, TranslatedSub subroutine, int size) + { + ClearCacheIfNeeded(); + + lock (_sortedCache) + { + LinkedListNode node = _sortedCache.AddLast(position); + + CacheBucket bucket = new CacheBucket(subroutine, node, size); + + bucket = _cache.GetOrAdd(position, bucket); + + if (bucket.Node == node) + { + _totalSize += size; + } + else + { + _sortedCache.Remove(node); + } + + return bucket.Subroutine; + } + } + public void AddOrUpdate(long position, TranslatedSub subroutine, int size) { ClearCacheIfNeeded(); diff --git a/ChocolArm64/Translation/TranslatorQueue.cs b/ChocolArm64/Translation/TranslatorQueue.cs new file mode 100644 index 00000000..89d665bf --- /dev/null +++ b/ChocolArm64/Translation/TranslatorQueue.cs @@ -0,0 +1,83 @@ +using System.Collections.Concurrent; +using System.Threading; + +namespace ChocolArm64.Translation +{ + class TranslatorQueue + { + //This is the maximum number of functions to be translated that the queue can hold. + //The value may need some tuning to find the sweet spot. + private const int MaxQueueSize = 1024; + + private ConcurrentStack[] _translationQueue; + + private ManualResetEvent _queueDataReceivedEvent; + + private bool _signaled; + + public TranslatorQueue() + { + _translationQueue = new ConcurrentStack[(int)TranslationTier.Count]; + + for (int prio = 0; prio < _translationQueue.Length; prio++) + { + _translationQueue[prio] = new ConcurrentStack(); + } + + _queueDataReceivedEvent = new ManualResetEvent(false); + } + + public void Enqueue(TranslatorQueueItem item) + { + ConcurrentStack queue = _translationQueue[(int)item.Tier]; + + if (queue.Count >= MaxQueueSize) + { + queue.TryPop(out _); + } + + queue.Push(item); + + _queueDataReceivedEvent.Set(); + } + + public bool TryDequeue(out TranslatorQueueItem item) + { + for (int prio = 0; prio < _translationQueue.Length; prio++) + { + if (_translationQueue[prio].TryPop(out item)) + { + return true; + } + } + + item = default(TranslatorQueueItem); + + return false; + } + + public void WaitForItems() + { + _queueDataReceivedEvent.WaitOne(); + + lock (_queueDataReceivedEvent) + { + if (!_signaled) + { + _queueDataReceivedEvent.Reset(); + } + } + } + + public void ForceSignal() + { + lock (_queueDataReceivedEvent) + { + _signaled = true; + + _queueDataReceivedEvent.Set(); + _queueDataReceivedEvent.Close(); + } + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Translation/TranslatorQueueItem.cs b/ChocolArm64/Translation/TranslatorQueueItem.cs new file mode 100644 index 00000000..0988414a --- /dev/null +++ b/ChocolArm64/Translation/TranslatorQueueItem.cs @@ -0,0 +1,20 @@ +using ChocolArm64.State; + +namespace ChocolArm64.Translation +{ + struct TranslatorQueueItem + { + public long Position { get; } + + public ExecutionMode Mode { get; } + + public TranslationTier Tier { get; } + + public TranslatorQueueItem(long position, ExecutionMode mode, TranslationTier tier) + { + Position = position; + Mode = mode; + Tier = tier; + } + } +} \ No newline at end of file diff --git a/ChocolArm64/Translator.cs b/ChocolArm64/Translator.cs deleted file mode 100644 index af2586f4..00000000 --- a/ChocolArm64/Translator.cs +++ /dev/null @@ -1,120 +0,0 @@ -using ChocolArm64.Decoders; -using ChocolArm64.Events; -using ChocolArm64.Memory; -using ChocolArm64.State; -using ChocolArm64.Translation; -using System; - -namespace ChocolArm64 -{ - public class Translator - { - private TranslatorCache _cache; - - public event EventHandler CpuTrace; - - public bool EnableCpuTrace { get; set; } - - public Translator() - { - _cache = new TranslatorCache(); - } - - internal void ExecuteSubroutine(CpuThread thread, long position) - { - ExecuteSubroutine(thread.ThreadState, thread.Memory, position); - } - - private void ExecuteSubroutine(CpuThreadState state, MemoryManager memory, long position) - { - do - { - if (EnableCpuTrace) - { - CpuTrace?.Invoke(this, new CpuTraceEventArgs(position)); - } - - if (!_cache.TryGetSubroutine(position, out TranslatedSub sub)) - { - sub = TranslateTier0(memory, position, state.GetExecutionMode()); - } - - if (sub.ShouldReJit()) - { - TranslateTier1(memory, position, state.GetExecutionMode()); - } - - position = sub.Execute(state, memory); - } - while (position != 0 && state.Running); - } - - internal bool HasCachedSub(long position) - { - return _cache.HasSubroutine(position); - } - - private TranslatedSub TranslateTier0(MemoryManager memory, long position, ExecutionMode mode) - { - Block block = Decoder.DecodeBasicBlock(memory, position, mode); - - ILEmitterCtx context = new ILEmitterCtx(_cache, block); - - string subName = GetSubroutineName(position); - - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(context.GetILBlocks(), subName); - - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(); - - subroutine.SetType(TranslatedSubType.SubTier0); - - _cache.AddOrUpdate(position, subroutine, block.OpCodes.Count); - - return subroutine; - } - - private void TranslateTier1(MemoryManager memory, long position, ExecutionMode mode) - { - Block graph = Decoder.DecodeSubroutine(_cache, memory, position, mode); - - ILEmitterCtx context = new ILEmitterCtx(_cache, graph); - - ILBlock[] ilBlocks = context.GetILBlocks(); - - string subName = GetSubroutineName(position); - - ILMethodBuilder ilMthdBuilder = new ILMethodBuilder(ilBlocks, subName); - - TranslatedSub subroutine = ilMthdBuilder.GetSubroutine(); - - subroutine.SetType(TranslatedSubType.SubTier1); - - int ilOpCount = 0; - - foreach (ILBlock ilBlock in ilBlocks) - { - ilOpCount += ilBlock.Count; - } - - _cache.AddOrUpdate(position, subroutine, ilOpCount); - - //Mark all methods that calls this method for ReJiting, - //since we can now call it directly which is faster. - if (_cache.TryGetSubroutine(position, out TranslatedSub oldSub)) - { - foreach (long callerPos in oldSub.GetCallerPositions()) - { - if (_cache.TryGetSubroutine(position, out TranslatedSub callerSub)) - { - callerSub.MarkForReJit(); - } - } - } - } - - private string GetSubroutineName(long position) - { - return $"Sub{position:x16}"; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs index 749f5fdc..9ff3b36a 100644 --- a/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs +++ b/Ryujinx.Graphics/Graphics3d/NvGpuEngine3d.cs @@ -789,7 +789,7 @@ namespace Ryujinx.Graphics.Graphics3d GalVertexAttribType Type = (GalVertexAttribType)((Packed >> 27) & 0x7); bool IsRgba = ((Packed >> 31) & 1) != 0; - + // Check vertex array is enabled to avoid out of bounds exception when reading bytes bool Enable = (ReadRegister(NvGpuEngine3dReg.VertexArrayNControl + ArrayIndex * 4) & 0x1000) != 0; diff --git a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs index fd473014..338e5543 100644 --- a/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs +++ b/Ryujinx.HLE/HOS/Kernel/Process/KProcess.cs @@ -1,6 +1,7 @@ using ChocolArm64; using ChocolArm64.Events; using ChocolArm64.Memory; +using ChocolArm64.Translation; using Ryujinx.Common; using Ryujinx.Common.Logging; using Ryujinx.HLE.Exceptions; @@ -109,7 +110,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Process _threads = new LinkedList(); - Translator = new Translator(); + Translator = new Translator(CpuMemory); Translator.CpuTrace += CpuTraceHandler; diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index b970e055..47feb573 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -1,6 +1,7 @@ using ChocolArm64; using ChocolArm64.Memory; using ChocolArm64.State; +using ChocolArm64.Translation; using NUnit.Framework; @@ -48,10 +49,12 @@ namespace Ryujinx.Tests.Cpu _entryPoint = Position; - Translator translator = new Translator(); _ramPointer = Marshal.AllocHGlobal(new IntPtr(_size)); _memory = new MemoryManager(_ramPointer); _memory.Map(Position, 0, _size); + + Translator translator = new Translator(_memory); + _thread = new CpuThread(translator, _memory, _entryPoint); if (_unicornAvailable)