Separate GPU engines and make state follow official docs (part 1/2) (#2422)

* Use DeviceState for compute and i2m

* Migrate 2D class, more comments

* Migrate DMA copy engine

* Remove now unused code

* Replace GpuState by GpuAccessorState on GpuAcessor, since compute no longer has a GpuState

* More comments

* Add logging (disabled)

* Add back i2m on 3D engine
This commit is contained in:
gdkchan 2021-07-07 20:56:06 -03:00 committed by GitHub
parent 31cbd09a75
commit 8b44eb1c98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
30 changed files with 2599 additions and 460 deletions

View file

@ -0,0 +1,283 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.Gpu.State;
using Ryujinx.Graphics.Texture;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
namespace Ryujinx.Graphics.Gpu.Engine.Dma
{
/// <summary>
/// Represents a DMA copy engine class.
/// </summary>
class DmaClass : IDeviceState
{
private readonly GpuContext _context;
private readonly GpuChannel _channel;
private readonly DeviceState<DmaClassState> _state;
/// <summary>
/// Copy flags passed on DMA launch.
/// </summary>
[Flags]
private enum CopyFlags
{
SrcLinear = 1 << 7,
DstLinear = 1 << 8,
MultiLineEnable = 1 << 9,
RemapEnable = 1 << 10
}
/// <summary>
/// Creates a new instance of the DMA copy engine class.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="channel">GPU channel</param>
public DmaClass(GpuContext context, GpuChannel channel)
{
_context = context;
_channel = channel;
_state = new DeviceState<DmaClassState>(new Dictionary<string, RwCallback>
{
{ nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) }
});
}
/// <summary>
/// Reads data from the class registers.
/// </summary>
/// <param name="offset">Register byte offset</param>
/// <returns>Data at the specified offset</returns>
public int Read(int offset) => _state.Read(offset);
/// <summary>
/// Writes data to the class registers.
/// </summary>
/// <param name="offset">Register byte offset</param>
/// <param name="data">Data to be written</param>
public void Write(int offset, int data) => _state.Write(offset, data);
/// <summary>
/// Determine if a buffer-to-texture region covers the entirety of a texture.
/// </summary>
/// <param name="tex">Texture to compare</param>
/// <param name="linear">True if the texture is linear, false if block linear</param>
/// <param name="bpp">Texture bytes per pixel</param>
/// <param name="stride">Texture stride</param>
/// <param name="xCount">Number of pixels to be copied</param>
/// <param name="yCount">Number of lines to be copied</param>
/// <returns></returns>
private static bool IsTextureCopyComplete(CopyBufferTexture tex, bool linear, int bpp, int stride, int xCount, int yCount)
{
if (linear)
{
int alignWidth = Constants.StrideAlignment / bpp;
return tex.RegionX == 0 &&
tex.RegionY == 0 &&
stride / bpp == BitUtils.AlignUp(xCount, alignWidth);
}
else
{
int alignWidth = Constants.GobAlignment / bpp;
return tex.RegionX == 0 &&
tex.RegionY == 0 &&
tex.Width == BitUtils.AlignUp(xCount, alignWidth) &&
tex.Height == yCount;
}
}
/// <summary>
/// Performs a buffer to buffer, or buffer to texture copy.
/// </summary>
/// <param name="argument">Method call argument</param>
private void LaunchDma(int argument)
{
var memoryManager = _channel.MemoryManager;
CopyFlags copyFlags = (CopyFlags)argument;
bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable);
uint size = _state.State.LineLengthIn;
if (size == 0)
{
return;
}
ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;
int xCount = (int)_state.State.LineLengthIn;
int yCount = (int)_state.State.LineCount;
_context.Methods.FlushUboDirty(memoryManager);
if (copy2D)
{
// Buffer to texture copy.
int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
int srcBpp = remap ? ((int)_state.State.SetRemapComponentsNumSrcComponents + 1) * componentSize : 1;
int dstBpp = remap ? ((int)_state.State.SetRemapComponentsNumDstComponents + 1) * componentSize : 1;
var dst = Unsafe.As<uint, CopyBufferTexture>(ref _state.State.SetDstBlockSize);
var src = Unsafe.As<uint, CopyBufferTexture>(ref _state.State.SetSrcBlockSize);
int srcStride = (int)_state.State.PitchIn;
int dstStride = (int)_state.State.PitchOut;
var srcCalculator = new OffsetCalculator(
src.Width,
src.Height,
srcStride,
srcLinear,
src.MemoryLayout.UnpackGobBlocksInY(),
src.MemoryLayout.UnpackGobBlocksInZ(),
srcBpp);
var dstCalculator = new OffsetCalculator(
dst.Width,
dst.Height,
dstStride,
dstLinear,
dst.MemoryLayout.UnpackGobBlocksInY(),
dst.MemoryLayout.UnpackGobBlocksInZ(),
dstBpp);
ulong srcBaseAddress = memoryManager.Translate(srcGpuVa);
ulong dstBaseAddress = memoryManager.Translate(dstGpuVa);
(int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount);
(int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount);
ReadOnlySpan<byte> srcSpan = memoryManager.Physical.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true);
Span<byte> dstSpan = memoryManager.Physical.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();
bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);
if (completeSource && completeDest)
{
var target = memoryManager.Physical.TextureCache.FindTexture(
memoryManager,
dst,
dstGpuVa,
dstBpp,
dstStride,
xCount,
yCount,
dstLinear);
if (target != null)
{
ReadOnlySpan<byte> data;
if (srcLinear)
{
data = LayoutConverter.ConvertLinearStridedToLinear(
target.Info.Width,
target.Info.Height,
1,
1,
srcStride,
target.Info.FormatInfo.BytesPerPixel,
srcSpan);
}
else
{
data = LayoutConverter.ConvertBlockLinearToLinear(
src.Width,
src.Height,
1,
target.Info.Levels,
1,
1,
1,
srcBpp,
src.MemoryLayout.UnpackGobBlocksInY(),
src.MemoryLayout.UnpackGobBlocksInZ(),
1,
new SizeInfo((int)target.Size),
srcSpan);
}
target.SetData(data);
target.SignalModified();
return;
}
else if (srcCalculator.LayoutMatches(dstCalculator))
{
srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
return;
}
}
unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged
{
fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan)
{
byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
byte* srcBase = srcPtr - srcBaseOffset;
for (int y = 0; y < yCount; y++)
{
srcCalculator.SetY(src.RegionY + y);
dstCalculator.SetY(dst.RegionY + y);
for (int x = 0; x < xCount; x++)
{
int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);
*(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset);
}
}
}
return true;
}
bool _ = srcBpp switch
{
1 => Convert<byte>(dstSpan, srcSpan),
2 => Convert<ushort>(dstSpan, srcSpan),
4 => Convert<uint>(dstSpan, srcSpan),
8 => Convert<ulong>(dstSpan, srcSpan),
12 => Convert<Bpp12Pixel>(dstSpan, srcSpan),
16 => Convert<Vector128<byte>>(dstSpan, srcSpan),
_ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
};
memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
}
else
{
if (remap &&
_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
_state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
_state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
_state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
_state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
_state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
_state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
{
// Fast path for clears when remap is enabled.
memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
}
else
{
// TODO: Implement remap functionality.
// Buffer to buffer copy.
memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
}
}
}
}
}

View file

@ -0,0 +1,271 @@
// This file was auto-generated from NVIDIA official Maxwell definitions.
namespace Ryujinx.Graphics.Gpu.Engine.Dma
{
/// <summary>
/// Physical mode target.
/// </summary>
enum SetPhysModeTarget
{
LocalFb = 0,
CoherentSysmem = 1,
NoncoherentSysmem = 2,
}
/// <summary>
/// DMA data transfer type.
/// </summary>
enum LaunchDmaDataTransferType
{
None = 0,
Pipelined = 1,
NonPipelined = 2,
}
/// <summary>
/// DMA semaphore type.
/// </summary>
enum LaunchDmaSemaphoreType
{
None = 0,
ReleaseOneWordSemaphore = 1,
ReleaseFourWordSemaphore = 2,
}
/// <summary>
/// DMA interrupt type.
/// </summary>
enum LaunchDmaInterruptType
{
None = 0,
Blocking = 1,
NonBlocking = 2,
}
/// <summary>
/// DMA destination memory layout.
/// </summary>
enum LaunchDmaMemoryLayout
{
Blocklinear = 0,
Pitch = 1,
}
/// <summary>
/// DMA type.
/// </summary>
enum LaunchDmaType
{
Virtual = 0,
Physical = 1,
}
/// <summary>
/// DMA semaphore reduction operation.
/// </summary>
enum LaunchDmaSemaphoreReduction
{
Imin = 0,
Imax = 1,
Ixor = 2,
Iand = 3,
Ior = 4,
Iadd = 5,
Inc = 6,
Dec = 7,
Fadd = 10,
}
/// <summary>
/// DMA semaphore reduction signedness.
/// </summary>
enum LaunchDmaSemaphoreReductionSign
{
Signed = 0,
Unsigned = 1,
}
/// <summary>
/// DMA L2 cache bypass.
/// </summary>
enum LaunchDmaBypassL2
{
UsePteSetting = 0,
ForceVolatile = 1,
}
/// <summary>
/// DMA component remapping source component.
/// </summary>
enum SetRemapComponentsDst
{
SrcX = 0,
SrcY = 1,
SrcZ = 2,
SrcW = 3,
ConstA = 4,
ConstB = 5,
NoWrite = 6,
}
/// <summary>
/// DMA component remapping component size.
/// </summary>
enum SetRemapComponentsComponentSize
{
One = 0,
Two = 1,
Three = 2,
Four = 3,
}
/// <summary>
/// DMA component remapping number of components.
/// </summary>
enum SetRemapComponentsNumComponents
{
One = 0,
Two = 1,
Three = 2,
Four = 3,
}
/// <summary>
/// Width in GOBs of the destination texture.
/// </summary>
enum SetBlockSizeWidth
{
QuarterGob = 14,
OneGob = 0,
}
/// <summary>
/// Height in GOBs of the destination texture.
/// </summary>
enum SetBlockSizeHeight
{
OneGob = 0,
TwoGobs = 1,
FourGobs = 2,
EightGobs = 3,
SixteenGobs = 4,
ThirtytwoGobs = 5,
}
/// <summary>
/// Depth in GOBs of the destination texture.
/// </summary>
enum SetBlockSizeDepth
{
OneGob = 0,
TwoGobs = 1,
FourGobs = 2,
EightGobs = 3,
SixteenGobs = 4,
ThirtytwoGobs = 5,
}
/// <summary>
/// Height of a single GOB in lines.
/// </summary>
enum SetBlockSizeGobHeight
{
GobHeightTesla4 = 0,
GobHeightFermi8 = 1,
}
/// <summary>
/// DMA copy class state.
/// </summary>
unsafe struct DmaClassState
{
#pragma warning disable CS0649
public fixed uint Reserved00[64];
public uint Nop;
public fixed uint Reserved104[15];
public uint PmTrigger;
public fixed uint Reserved144[63];
public uint SetSemaphoreA;
public int SetSemaphoreAUpper => (int)((SetSemaphoreA >> 0) & 0xFF);
public uint SetSemaphoreB;
public uint SetSemaphorePayload;
public fixed uint Reserved24C[2];
public uint SetRenderEnableA;
public int SetRenderEnableAUpper => (int)((SetRenderEnableA >> 0) & 0xFF);
public uint SetRenderEnableB;
public uint SetRenderEnableC;
public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7);
public uint SetSrcPhysMode;
public SetPhysModeTarget SetSrcPhysModeTarget => (SetPhysModeTarget)((SetSrcPhysMode >> 0) & 0x3);
public uint SetDstPhysMode;
public SetPhysModeTarget SetDstPhysModeTarget => (SetPhysModeTarget)((SetDstPhysMode >> 0) & 0x3);
public fixed uint Reserved268[38];
public uint LaunchDma;
public LaunchDmaDataTransferType LaunchDmaDataTransferType => (LaunchDmaDataTransferType)((LaunchDma >> 0) & 0x3);
public bool LaunchDmaFlushEnable => (LaunchDma & 0x4) != 0;
public LaunchDmaSemaphoreType LaunchDmaSemaphoreType => (LaunchDmaSemaphoreType)((LaunchDma >> 3) & 0x3);
public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 5) & 0x3);
public LaunchDmaMemoryLayout LaunchDmaSrcMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 7) & 0x1);
public LaunchDmaMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaMemoryLayout)((LaunchDma >> 8) & 0x1);
public bool LaunchDmaMultiLineEnable => (LaunchDma & 0x200) != 0;
public bool LaunchDmaRemapEnable => (LaunchDma & 0x400) != 0;
public bool LaunchDmaForceRmwdisable => (LaunchDma & 0x800) != 0;
public LaunchDmaType LaunchDmaSrcType => (LaunchDmaType)((LaunchDma >> 12) & 0x1);
public LaunchDmaType LaunchDmaDstType => (LaunchDmaType)((LaunchDma >> 13) & 0x1);
public LaunchDmaSemaphoreReduction LaunchDmaSemaphoreReduction => (LaunchDmaSemaphoreReduction)((LaunchDma >> 14) & 0xF);
public LaunchDmaSemaphoreReductionSign LaunchDmaSemaphoreReductionSign => (LaunchDmaSemaphoreReductionSign)((LaunchDma >> 18) & 0x1);
public bool LaunchDmaSemaphoreReductionEnable => (LaunchDma & 0x80000) != 0;
public LaunchDmaBypassL2 LaunchDmaBypassL2 => (LaunchDmaBypassL2)((LaunchDma >> 20) & 0x1);
public fixed uint Reserved304[63];
public uint OffsetInUpper;
public int OffsetInUpperUpper => (int)((OffsetInUpper >> 0) & 0xFF);
public uint OffsetInLower;
public uint OffsetOutUpper;
public int OffsetOutUpperUpper => (int)((OffsetOutUpper >> 0) & 0xFF);
public uint OffsetOutLower;
public uint PitchIn;
public uint PitchOut;
public uint LineLengthIn;
public uint LineCount;
public fixed uint Reserved420[184];
public uint SetRemapConstA;
public uint SetRemapConstB;
public uint SetRemapComponents;
public SetRemapComponentsDst SetRemapComponentsDstX => (SetRemapComponentsDst)((SetRemapComponents >> 0) & 0x7);
public SetRemapComponentsDst SetRemapComponentsDstY => (SetRemapComponentsDst)((SetRemapComponents >> 4) & 0x7);
public SetRemapComponentsDst SetRemapComponentsDstZ => (SetRemapComponentsDst)((SetRemapComponents >> 8) & 0x7);
public SetRemapComponentsDst SetRemapComponentsDstW => (SetRemapComponentsDst)((SetRemapComponents >> 12) & 0x7);
public SetRemapComponentsComponentSize SetRemapComponentsComponentSize => (SetRemapComponentsComponentSize)((SetRemapComponents >> 16) & 0x3);
public SetRemapComponentsNumComponents SetRemapComponentsNumSrcComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 20) & 0x3);
public SetRemapComponentsNumComponents SetRemapComponentsNumDstComponents => (SetRemapComponentsNumComponents)((SetRemapComponents >> 24) & 0x3);
public uint SetDstBlockSize;
public SetBlockSizeWidth SetDstBlockSizeWidth => (SetBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
public SetBlockSizeHeight SetDstBlockSizeHeight => (SetBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
public SetBlockSizeDepth SetDstBlockSizeDepth => (SetBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
public SetBlockSizeGobHeight SetDstBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetDstBlockSize >> 12) & 0xF);
public uint SetDstWidth;
public uint SetDstHeight;
public uint SetDstDepth;
public uint SetDstLayer;
public uint SetDstOrigin;
public int SetDstOriginX => (int)((SetDstOrigin >> 0) & 0xFFFF);
public int SetDstOriginY => (int)((SetDstOrigin >> 16) & 0xFFFF);
public uint Reserved724;
public uint SetSrcBlockSize;
public SetBlockSizeWidth SetSrcBlockSizeWidth => (SetBlockSizeWidth)((SetSrcBlockSize >> 0) & 0xF);
public SetBlockSizeHeight SetSrcBlockSizeHeight => (SetBlockSizeHeight)((SetSrcBlockSize >> 4) & 0xF);
public SetBlockSizeDepth SetSrcBlockSizeDepth => (SetBlockSizeDepth)((SetSrcBlockSize >> 8) & 0xF);
public SetBlockSizeGobHeight SetSrcBlockSizeGobHeight => (SetBlockSizeGobHeight)((SetSrcBlockSize >> 12) & 0xF);
public uint SetSrcWidth;
public uint SetSrcHeight;
public uint SetSrcDepth;
public uint SetSrcLayer;
public uint SetSrcOrigin;
public int SetSrcOriginX => (int)((SetSrcOrigin >> 0) & 0xFFFF);
public int SetSrcOriginY => (int)((SetSrcOrigin >> 16) & 0xFFFF);
public fixed uint Reserved740[629];
public uint PmTriggerEnd;
public fixed uint Reserved1118[2490];
#pragma warning restore CS0649
}
}