Improve CPU initial translation speeds (#50)

* Add background translation to the CPU

* Do not use a separate thread for translation, implement 2 tiers translation

* Remove unnecessary usings

* Lower MinCallCountForReJit

* Remove unused variable
This commit is contained in:
gdkchan 2018-03-04 14:09:59 -03:00 committed by GitHub
parent ee9df32e3e
commit 3edb66f389
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 319 additions and 107 deletions

View file

@ -58,11 +58,13 @@ namespace ChocolArm64.Translation
this.Root = ILBlocks[Array.IndexOf(Graph, Root)];
}
public ATranslatedSub GetSubroutine()
public AILBlock GetILBlock(int Index) => ILBlocks[Index];
public ATranslatedSub GetSubroutine(HashSet<long> Callees)
{
LocalAlloc = new ALocalAlloc(ILBlocks, Root);
InitSubroutine();
InitSubroutine(Callees);
InitLocals();
foreach (AILBlock ILBlock in ILBlocks)
@ -73,24 +75,7 @@ namespace ChocolArm64.Translation
return Subroutine;
}
public AILBlock GetILBlock(int Index) => ILBlocks[Index];
private void InitLocals()
{
int ParamsStart = ATranslatedSub.FixedArgTypes.Length;
Locals = new Dictionary<ARegister, int>();
for (int Index = 0; Index < Subroutine.Params.Count; Index++)
{
ARegister Reg = Subroutine.Params[Index];
Generator.EmitLdarg(Index + ParamsStart);
Generator.EmitStloc(GetLocalIndex(Reg));
}
}
private void InitSubroutine()
private void InitSubroutine(HashSet<long> Callees)
{
List<ARegister> Params = new List<ARegister>();
@ -114,9 +99,24 @@ namespace ChocolArm64.Translation
Generator = Mthd.GetILGenerator();
Subroutine = new ATranslatedSub(Mthd, Params);
Subroutine = new ATranslatedSub(Mthd, Params, Callees);
}
private void InitLocals()
{
int ParamsStart = ATranslatedSub.FixedArgTypes.Length;
Locals = new Dictionary<ARegister, int>();
for (int Index = 0; Index < Subroutine.Params.Count; Index++)
{
ARegister Reg = Subroutine.Params[Index];
Generator.EmitLdarg(Index + ParamsStart);
Generator.EmitStloc(GetLocalIndex(Reg));
}
}
private Type[] GetParamTypes(IList<ARegister> Params)
{
Type[] FixedArgs = ATranslatedSub.FixedArgTypes;

View file

@ -12,14 +12,9 @@ namespace ChocolArm64.Translation
{
private ATranslator Translator;
private Dictionary<long, AILLabel> Labels;
private HashSet<long> Callees;
private AILEmitter Emitter;
private AILBlock ILBlock;
private AOpCode OptOpLastCompare;
private AOpCode OptOpLastFlagSet;
private Dictionary<long, AILLabel> Labels;
private int BlkIndex;
private int OpcIndex;
@ -29,6 +24,13 @@ namespace ChocolArm64.Translation
public ABlock CurrBlock => Graph[BlkIndex];
public AOpCode CurrOp => Graph[BlkIndex].OpCodes[OpcIndex];
private AILEmitter Emitter;
private AILBlock ILBlock;
private AOpCode OptOpLastCompare;
private AOpCode OptOpLastFlagSet;
//This is the index of the temporary register, used to store temporary
//values needed by some functions, since IL doesn't have a swap instruction.
//You can use any value here as long it doesn't conflict with the indices
@ -45,10 +47,27 @@ namespace ChocolArm64.Translation
ABlock Root,
string SubName)
{
if (Translator == null)
{
throw new ArgumentNullException(nameof(Translator));
}
if (Graph == null)
{
throw new ArgumentNullException(nameof(Graph));
}
if (Root == null)
{
throw new ArgumentNullException(nameof(Root));
}
this.Translator = Translator;
this.Graph = Graph;
this.Root = Root;
Callees = new HashSet<long>();
Labels = new Dictionary<long, AILLabel>();
Emitter = new AILEmitter(Graph, Root, SubName);
@ -57,23 +76,27 @@ namespace ChocolArm64.Translation
OpcIndex = -1;
if (!AdvanceOpCode())
if (Graph.Length == 0 || !AdvanceOpCode())
{
throw new ArgumentException(nameof(Graph));
}
}
public ATranslatedSub GetSubroutine() => Emitter.GetSubroutine();
public ATranslatedSub GetSubroutine()
{
return Emitter.GetSubroutine(Callees);
}
public bool AdvanceOpCode()
{
if (OpcIndex + 1 == CurrBlock.OpCodes.Count &&
BlkIndex + 1 == Graph.Length)
{
return false;
}
while (++OpcIndex >= (CurrBlock?.OpCodes.Count ?? 0))
{
if (BlkIndex + 1 >= Graph.Length)
{
return false;
}
BlkIndex++;
OpcIndex = -1;
@ -100,6 +123,13 @@ namespace ChocolArm64.Translation
public bool TryOptEmitSubroutineCall()
{
Callees.Add(((AOpCodeBImm)CurrOp).Imm);
if (CurrBlock.Next == null)
{
return false;
}
if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub))
{
return false;

View file

@ -67,14 +67,15 @@ namespace ChocolArm64.Translation
public long VecOutputs;
}
private const int MaxOptGraphLength = 55;
private const int MaxOptGraphLength = 40;
public ALocalAlloc(AILBlock[] Graph, AILBlock Root)
{
IntPaths = new Dictionary<AILBlock, PathIo>();
VecPaths = new Dictionary<AILBlock, PathIo>();
if (Graph.Length < MaxOptGraphLength)
if (Graph.Length > 1 &&
Graph.Length < MaxOptGraphLength)
{
InitializeOptimal(Graph, Root);
}
@ -179,10 +180,8 @@ namespace ChocolArm64.Translation
{
//This is WAY faster than InitializeOptimal, but results in
//uneeded loads and stores, so the resulting code will be slower.
long IntInputs = 0;
long IntOutputs = 0;
long VecInputs = 0;
long VecOutputs = 0;
long IntInputs = 0, IntOutputs = 0;
long VecInputs = 0, VecOutputs = 0;
foreach (AILBlock Block in Graph)
{
@ -196,8 +195,11 @@ namespace ChocolArm64.Translation
//in those cases if we attempt to write an output registers that was
//not written, we will be just writing zero and messing up the old register value.
//So we just need to ensure that all outputs are loaded.
IntInputs |= IntOutputs;
VecInputs |= VecOutputs;
if (Graph.Length > 1)
{
IntInputs |= IntOutputs;
VecInputs |= VecOutputs;
}
foreach (AILBlock Block in Graph)
{