Improve CPU initial translation speeds (#50)
* Add background translation to the CPU * Do not use a separate thread for translation, implement 2 tiers translation * Remove unnecessary usings * Lower MinCallCountForReJit * Remove unused variable
This commit is contained in:
parent
ee9df32e3e
commit
3edb66f389
10 changed files with 319 additions and 107 deletions
|
@ -58,11 +58,13 @@ namespace ChocolArm64.Translation
|
|||
this.Root = ILBlocks[Array.IndexOf(Graph, Root)];
|
||||
}
|
||||
|
||||
public ATranslatedSub GetSubroutine()
|
||||
public AILBlock GetILBlock(int Index) => ILBlocks[Index];
|
||||
|
||||
public ATranslatedSub GetSubroutine(HashSet<long> Callees)
|
||||
{
|
||||
LocalAlloc = new ALocalAlloc(ILBlocks, Root);
|
||||
|
||||
InitSubroutine();
|
||||
InitSubroutine(Callees);
|
||||
InitLocals();
|
||||
|
||||
foreach (AILBlock ILBlock in ILBlocks)
|
||||
|
@ -73,24 +75,7 @@ namespace ChocolArm64.Translation
|
|||
return Subroutine;
|
||||
}
|
||||
|
||||
public AILBlock GetILBlock(int Index) => ILBlocks[Index];
|
||||
|
||||
private void InitLocals()
|
||||
{
|
||||
int ParamsStart = ATranslatedSub.FixedArgTypes.Length;
|
||||
|
||||
Locals = new Dictionary<ARegister, int>();
|
||||
|
||||
for (int Index = 0; Index < Subroutine.Params.Count; Index++)
|
||||
{
|
||||
ARegister Reg = Subroutine.Params[Index];
|
||||
|
||||
Generator.EmitLdarg(Index + ParamsStart);
|
||||
Generator.EmitStloc(GetLocalIndex(Reg));
|
||||
}
|
||||
}
|
||||
|
||||
private void InitSubroutine()
|
||||
private void InitSubroutine(HashSet<long> Callees)
|
||||
{
|
||||
List<ARegister> Params = new List<ARegister>();
|
||||
|
||||
|
@ -114,9 +99,24 @@ namespace ChocolArm64.Translation
|
|||
|
||||
Generator = Mthd.GetILGenerator();
|
||||
|
||||
Subroutine = new ATranslatedSub(Mthd, Params);
|
||||
Subroutine = new ATranslatedSub(Mthd, Params, Callees);
|
||||
}
|
||||
|
||||
private void InitLocals()
|
||||
{
|
||||
int ParamsStart = ATranslatedSub.FixedArgTypes.Length;
|
||||
|
||||
Locals = new Dictionary<ARegister, int>();
|
||||
|
||||
for (int Index = 0; Index < Subroutine.Params.Count; Index++)
|
||||
{
|
||||
ARegister Reg = Subroutine.Params[Index];
|
||||
|
||||
Generator.EmitLdarg(Index + ParamsStart);
|
||||
Generator.EmitStloc(GetLocalIndex(Reg));
|
||||
}
|
||||
}
|
||||
|
||||
private Type[] GetParamTypes(IList<ARegister> Params)
|
||||
{
|
||||
Type[] FixedArgs = ATranslatedSub.FixedArgTypes;
|
||||
|
|
|
@ -12,14 +12,9 @@ namespace ChocolArm64.Translation
|
|||
{
|
||||
private ATranslator Translator;
|
||||
|
||||
private Dictionary<long, AILLabel> Labels;
|
||||
private HashSet<long> Callees;
|
||||
|
||||
private AILEmitter Emitter;
|
||||
|
||||
private AILBlock ILBlock;
|
||||
|
||||
private AOpCode OptOpLastCompare;
|
||||
private AOpCode OptOpLastFlagSet;
|
||||
private Dictionary<long, AILLabel> Labels;
|
||||
|
||||
private int BlkIndex;
|
||||
private int OpcIndex;
|
||||
|
@ -29,6 +24,13 @@ namespace ChocolArm64.Translation
|
|||
public ABlock CurrBlock => Graph[BlkIndex];
|
||||
public AOpCode CurrOp => Graph[BlkIndex].OpCodes[OpcIndex];
|
||||
|
||||
private AILEmitter Emitter;
|
||||
|
||||
private AILBlock ILBlock;
|
||||
|
||||
private AOpCode OptOpLastCompare;
|
||||
private AOpCode OptOpLastFlagSet;
|
||||
|
||||
//This is the index of the temporary register, used to store temporary
|
||||
//values needed by some functions, since IL doesn't have a swap instruction.
|
||||
//You can use any value here as long it doesn't conflict with the indices
|
||||
|
@ -45,10 +47,27 @@ namespace ChocolArm64.Translation
|
|||
ABlock Root,
|
||||
string SubName)
|
||||
{
|
||||
if (Translator == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(Translator));
|
||||
}
|
||||
|
||||
if (Graph == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(Graph));
|
||||
}
|
||||
|
||||
if (Root == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(Root));
|
||||
}
|
||||
|
||||
this.Translator = Translator;
|
||||
this.Graph = Graph;
|
||||
this.Root = Root;
|
||||
|
||||
Callees = new HashSet<long>();
|
||||
|
||||
Labels = new Dictionary<long, AILLabel>();
|
||||
|
||||
Emitter = new AILEmitter(Graph, Root, SubName);
|
||||
|
@ -57,23 +76,27 @@ namespace ChocolArm64.Translation
|
|||
|
||||
OpcIndex = -1;
|
||||
|
||||
if (!AdvanceOpCode())
|
||||
if (Graph.Length == 0 || !AdvanceOpCode())
|
||||
{
|
||||
throw new ArgumentException(nameof(Graph));
|
||||
}
|
||||
}
|
||||
|
||||
public ATranslatedSub GetSubroutine() => Emitter.GetSubroutine();
|
||||
public ATranslatedSub GetSubroutine()
|
||||
{
|
||||
return Emitter.GetSubroutine(Callees);
|
||||
}
|
||||
|
||||
public bool AdvanceOpCode()
|
||||
{
|
||||
if (OpcIndex + 1 == CurrBlock.OpCodes.Count &&
|
||||
BlkIndex + 1 == Graph.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
while (++OpcIndex >= (CurrBlock?.OpCodes.Count ?? 0))
|
||||
{
|
||||
if (BlkIndex + 1 >= Graph.Length)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
BlkIndex++;
|
||||
OpcIndex = -1;
|
||||
|
||||
|
@ -100,6 +123,13 @@ namespace ChocolArm64.Translation
|
|||
|
||||
public bool TryOptEmitSubroutineCall()
|
||||
{
|
||||
Callees.Add(((AOpCodeBImm)CurrOp).Imm);
|
||||
|
||||
if (CurrBlock.Next == null)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub))
|
||||
{
|
||||
return false;
|
||||
|
|
|
@ -67,14 +67,15 @@ namespace ChocolArm64.Translation
|
|||
public long VecOutputs;
|
||||
}
|
||||
|
||||
private const int MaxOptGraphLength = 55;
|
||||
private const int MaxOptGraphLength = 40;
|
||||
|
||||
public ALocalAlloc(AILBlock[] Graph, AILBlock Root)
|
||||
{
|
||||
IntPaths = new Dictionary<AILBlock, PathIo>();
|
||||
VecPaths = new Dictionary<AILBlock, PathIo>();
|
||||
|
||||
if (Graph.Length < MaxOptGraphLength)
|
||||
if (Graph.Length > 1 &&
|
||||
Graph.Length < MaxOptGraphLength)
|
||||
{
|
||||
InitializeOptimal(Graph, Root);
|
||||
}
|
||||
|
@ -179,10 +180,8 @@ namespace ChocolArm64.Translation
|
|||
{
|
||||
//This is WAY faster than InitializeOptimal, but results in
|
||||
//uneeded loads and stores, so the resulting code will be slower.
|
||||
long IntInputs = 0;
|
||||
long IntOutputs = 0;
|
||||
long VecInputs = 0;
|
||||
long VecOutputs = 0;
|
||||
long IntInputs = 0, IntOutputs = 0;
|
||||
long VecInputs = 0, VecOutputs = 0;
|
||||
|
||||
foreach (AILBlock Block in Graph)
|
||||
{
|
||||
|
@ -196,8 +195,11 @@ namespace ChocolArm64.Translation
|
|||
//in those cases if we attempt to write an output registers that was
|
||||
//not written, we will be just writing zero and messing up the old register value.
|
||||
//So we just need to ensure that all outputs are loaded.
|
||||
IntInputs |= IntOutputs;
|
||||
VecInputs |= VecOutputs;
|
||||
if (Graph.Length > 1)
|
||||
{
|
||||
IntInputs |= IntOutputs;
|
||||
VecInputs |= VecOutputs;
|
||||
}
|
||||
|
||||
foreach (AILBlock Block in Graph)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue