ARMeilleure: Respect FZ/RM flags for all floating point operations (#4618)
* ARMeilleure: Respect Fz flag for all floating point operations. This is a change in strategy for emulating the Fz FPCR flag. Before, it was set before instructions that "needed it" and reset after. However, this missed a few hot instructions like the multiplication instruction, and the entirety of A32. The new strategy is to set the Fz flag only in the following circumstances: - Set to match FPCR before translated functions/loop are executed. - Reset when calling SoftFloat methods, set when returning. - Reset when exiting execution. This allows us to remove the code around the existing Fz aware instructions, and get the accuracy benefits on all floating point instructions executed while in translated code. Single step executions now need to be called with a context wrapper - right now it just contains the Fz flag initialization, and won't actually do anything on ARM. This fixes a bug in Breath of the Wild where some physics interactions could randomly crash the game due to subnormal values not flushing to zero. This is draft right now because I need to answer the questions: - Does dotnet avoid changing the value of Mxcsr? - Is it a good idea to assume that? Or should the flag set/restore be done on every managed method call, not just softfloat? - If we assume that, do we want a unit test to verify the behaviour? I recommend testing a bunch of games, especially games affected when this was originally added, such as #1611. * Remove unused method * Use FMA for Fmadd, Fmsub, Fnmadd, Fnmsub, Fmla, Fmls ...when available. Similar implementation to A32 * Use FMA for Frecps, Frsqrts * Don't set DAZ. * Add round mode to ARM FP mode * Fix mistakes * Add test for FP state when calling managed methods * Add explanatory comment to test. * Cleanup * Add A64 FPCR flags * Vrintx_S A32 fast path on A64 backend * Address feedback 1, re-enable DAZ * Fix FMA instructions By Elem * Address feedback
This commit is contained in:
parent
a1efd87c45
commit
9db73f74cf
22 changed files with 822 additions and 225 deletions
|
@ -188,6 +188,21 @@ namespace ARMeilleure.Translation
|
|||
}
|
||||
}
|
||||
|
||||
public void EnterArmFpMode()
|
||||
{
|
||||
InstEmitSimdHelper.EnterArmFpMode(this, InstEmitHelper.GetFpFlag);
|
||||
}
|
||||
|
||||
public void UpdateArmFpMode()
|
||||
{
|
||||
EnterArmFpMode();
|
||||
}
|
||||
|
||||
public void ExitArmFpMode()
|
||||
{
|
||||
InstEmitSimdHelper.ExitArmFpMode(this, (flag, value) => InstEmitHelper.SetFpFlag(this, flag, value));
|
||||
}
|
||||
|
||||
public Operand TryGetComparisonResult(Condition condition)
|
||||
{
|
||||
if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet)
|
||||
|
|
|
@ -3,4 +3,5 @@
|
|||
namespace ARMeilleure.Translation
|
||||
{
|
||||
delegate void DispatcherFunction(IntPtr nativeContext, ulong startAddress);
|
||||
delegate ulong WrapperFunction(IntPtr nativeContext, ulong startAddress);
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
|
|||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||
|
||||
private const uint InternalVersion = 4485; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const uint InternalVersion = 4626; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
|
|
@ -25,5 +25,10 @@ namespace ARMeilleure.Translation
|
|||
{
|
||||
return _func(context.NativeContextPtr);
|
||||
}
|
||||
|
||||
public ulong Execute(WrapperFunction dispatcher, State.ExecutionContext context)
|
||||
{
|
||||
return dispatcher(context.NativeContextPtr, (ulong)FuncPointer);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -183,7 +183,7 @@ namespace ARMeilleure.Translation
|
|||
|
||||
Statistics.StartTimer();
|
||||
|
||||
ulong nextAddr = func.Execute(context);
|
||||
ulong nextAddr = func.Execute(Stubs.ContextWrapper, context);
|
||||
|
||||
Statistics.StopTimer(address);
|
||||
|
||||
|
@ -194,7 +194,7 @@ namespace ARMeilleure.Translation
|
|||
{
|
||||
TranslatedFunction func = Translate(address, context.ExecutionMode, highCq: false, singleStep: true);
|
||||
|
||||
address = func.Execute(context);
|
||||
address = func.Execute(Stubs.ContextWrapper, context);
|
||||
|
||||
EnqueueForDeletion(address, func);
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ namespace ARMeilleure.Translation
|
|||
private readonly Translator _translator;
|
||||
private readonly Lazy<IntPtr> _dispatchStub;
|
||||
private readonly Lazy<DispatcherFunction> _dispatchLoop;
|
||||
private readonly Lazy<WrapperFunction> _contextWrapper;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the dispatch stub.
|
||||
|
@ -64,6 +65,20 @@ namespace ARMeilleure.Translation
|
|||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the context wrapper function.
|
||||
/// </summary>
|
||||
/// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception>
|
||||
public WrapperFunction ContextWrapper
|
||||
{
|
||||
get
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
return _contextWrapper.Value;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TranslatorStubs"/> class with the specified
|
||||
/// <see cref="Translator"/> instance.
|
||||
|
@ -77,6 +92,7 @@ namespace ARMeilleure.Translation
|
|||
_translator = translator;
|
||||
_dispatchStub = new(GenerateDispatchStub, isThreadSafe: true);
|
||||
_dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true);
|
||||
_contextWrapper = new(GenerateContextWrapper, isThreadSafe: true);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -202,6 +218,32 @@ namespace ARMeilleure.Translation
|
|||
return Marshal.GetFunctionPointerForDelegate(func);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Emits code that syncs FP state before executing guest code, or returns it to normal.
|
||||
/// </summary>
|
||||
/// <param name="context">Emitter context for the method</param>
|
||||
/// <param name="nativeContext">Pointer to the native context</param>
|
||||
/// <param name="enter">True if entering guest code, false otherwise</param>
|
||||
private void EmitSyncFpContext(EmitterContext context, Operand nativeContext, bool enter)
|
||||
{
|
||||
if (enter)
|
||||
{
|
||||
InstEmitSimdHelper.EnterArmFpMode(context, (flag) =>
|
||||
{
|
||||
Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
|
||||
return context.Load(OperandType.I32, flagAddress);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
InstEmitSimdHelper.ExitArmFpMode(context, (flag, value) =>
|
||||
{
|
||||
Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag))));
|
||||
context.Store(flagAddress, value);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a <see cref="DispatchLoop"/> function.
|
||||
/// </summary>
|
||||
|
@ -221,6 +263,8 @@ namespace ARMeilleure.Translation
|
|||
Operand runningAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRunningOffset()));
|
||||
Operand dispatchAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()));
|
||||
|
||||
EmitSyncFpContext(context, nativeContext, true);
|
||||
|
||||
context.MarkLabel(beginLbl);
|
||||
context.Store(dispatchAddress, guestAddress);
|
||||
context.Copy(guestAddress, context.Call(Const((ulong)DispatchStub), OperandType.I64, nativeContext));
|
||||
|
@ -229,6 +273,9 @@ namespace ARMeilleure.Translation
|
|||
context.Branch(beginLbl);
|
||||
|
||||
context.MarkLabel(endLbl);
|
||||
|
||||
EmitSyncFpContext(context, nativeContext, false);
|
||||
|
||||
context.Return();
|
||||
|
||||
var cfg = context.GetControlFlowGraph();
|
||||
|
@ -237,5 +284,29 @@ namespace ARMeilleure.Translation
|
|||
|
||||
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a <see cref="ContextWrapper"/> function.
|
||||
/// </summary>
|
||||
/// <returns><see cref="ContextWrapper"/> function</returns>
|
||||
private WrapperFunction GenerateContextWrapper()
|
||||
{
|
||||
var context = new EmitterContext();
|
||||
|
||||
Operand nativeContext = context.LoadArgument(OperandType.I64, 0);
|
||||
Operand guestMethod = context.LoadArgument(OperandType.I64, 1);
|
||||
|
||||
EmitSyncFpContext(context, nativeContext, true);
|
||||
Operand returnValue = context.Call(guestMethod, OperandType.I64, nativeContext);
|
||||
EmitSyncFpContext(context, nativeContext, false);
|
||||
|
||||
context.Return(returnValue);
|
||||
|
||||
var cfg = context.GetControlFlowGraph();
|
||||
var retType = OperandType.I64;
|
||||
var argTypes = new[] { OperandType.I64, OperandType.I64 };
|
||||
|
||||
return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<WrapperFunction>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
148
ARMeilleure/Translation/TranslatorTestMethods.cs
Normal file
148
ARMeilleure/Translation/TranslatorTestMethods.cs
Normal file
|
@ -0,0 +1,148 @@
|
|||
using ARMeilleure.CodeGen.X86;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.State;
|
||||
using ARMeilleure.Translation;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
|
||||
|
||||
namespace ARMeilleure.Translation
|
||||
{
|
||||
public static class TranslatorTestMethods
|
||||
{
|
||||
public delegate int FpFlagsPInvokeTest(IntPtr managedMethod);
|
||||
|
||||
private static bool SetPlatformFtz(EmitterContext context, bool ftz)
|
||||
{
|
||||
if (Optimizations.UseSse2)
|
||||
{
|
||||
Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr);
|
||||
|
||||
if (ftz)
|
||||
{
|
||||
mxcsr = context.BitwiseOr(mxcsr, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm)));
|
||||
}
|
||||
else
|
||||
{
|
||||
mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)Mxcsr.Ftz));
|
||||
}
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr);
|
||||
|
||||
return true;
|
||||
}
|
||||
else if (Optimizations.UseAdvSimd)
|
||||
{
|
||||
Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr);
|
||||
|
||||
if (ftz)
|
||||
{
|
||||
fpcr = context.BitwiseOr(fpcr, Const((int)FPCR.Fz));
|
||||
}
|
||||
else
|
||||
{
|
||||
fpcr = context.BitwiseAnd(fpcr, Const(~(int)FPCR.Fz));
|
||||
}
|
||||
|
||||
context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr);
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand FpBitsToInt(EmitterContext context, Operand fp)
|
||||
{
|
||||
Operand vec = context.VectorInsert(context.VectorZero(), fp, 0);
|
||||
return context.VectorExtract(OperandType.I32, vec, 0);
|
||||
}
|
||||
|
||||
public static FpFlagsPInvokeTest GenerateFpFlagsPInvokeTest()
|
||||
{
|
||||
EmitterContext context = new EmitterContext();
|
||||
|
||||
Operand methodAddress = context.Copy(context.LoadArgument(OperandType.I64, 0));
|
||||
|
||||
// Verify that default dotnet fp state does not flush to zero.
|
||||
// This is required for SoftFloat to function.
|
||||
|
||||
// Denormal + zero != 0
|
||||
|
||||
Operand denormal = ConstF(BitConverter.Int32BitsToSingle(1)); // 1.40129846432e-45
|
||||
Operand zeroF = ConstF(0f);
|
||||
Operand zero = Const(0);
|
||||
|
||||
Operand result = context.Add(zeroF, denormal);
|
||||
|
||||
// Must not be zero.
|
||||
|
||||
Operand correct1Label = Label();
|
||||
|
||||
context.BranchIfFalse(correct1Label, context.ICompareEqual(FpBitsToInt(context, result), zero));
|
||||
|
||||
context.Return(Const(1));
|
||||
|
||||
context.MarkLabel(correct1Label);
|
||||
|
||||
// Set flush to zero flag. If unsupported by the backend, just return true.
|
||||
|
||||
if (!SetPlatformFtz(context, true))
|
||||
{
|
||||
context.Return(Const(0));
|
||||
}
|
||||
|
||||
// Denormal + zero == 0
|
||||
|
||||
Operand resultFz = context.Add(zeroF, denormal);
|
||||
|
||||
// Must equal zero.
|
||||
|
||||
Operand correct2Label = Label();
|
||||
|
||||
context.BranchIfTrue(correct2Label, context.ICompareEqual(FpBitsToInt(context, resultFz), zero));
|
||||
|
||||
SetPlatformFtz(context, false);
|
||||
|
||||
context.Return(Const(2));
|
||||
|
||||
context.MarkLabel(correct2Label);
|
||||
|
||||
// Call a managed method. This method should not change Fz state.
|
||||
|
||||
context.Call(methodAddress, OperandType.None);
|
||||
|
||||
// Denormal + zero == 0
|
||||
|
||||
Operand resultFz2 = context.Add(zeroF, denormal);
|
||||
|
||||
// Must equal zero.
|
||||
|
||||
Operand correct3Label = Label();
|
||||
|
||||
context.BranchIfTrue(correct3Label, context.ICompareEqual(FpBitsToInt(context, resultFz2), zero));
|
||||
|
||||
SetPlatformFtz(context, false);
|
||||
|
||||
context.Return(Const(3));
|
||||
|
||||
context.MarkLabel(correct3Label);
|
||||
|
||||
// Success.
|
||||
|
||||
SetPlatformFtz(context, false);
|
||||
|
||||
context.Return(Const(0));
|
||||
|
||||
// Compile and return the function.
|
||||
|
||||
ControlFlowGraph cfg = context.GetControlFlowGraph();
|
||||
|
||||
OperandType[] argTypes = new OperandType[] { OperandType.I64 };
|
||||
|
||||
return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<FpFlagsPInvokeTest>();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue