From ff5df5d8a1fec6947f7feed3ec3ca0889cd892a5 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 4 Aug 2021 17:20:58 -0300 Subject: [PATCH] Support non-contiguous copies on I2M and DMA engines (#2473) * Support non-contiguous copies on I2M and DMA engines * Vector copy should start aligned on I2M * Nits * Zero extend the offset --- Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs | 11 ++--- .../InlineToMemory/InlineToMemoryClass.cs | 42 +++++++++++-------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs index 70909168..3078cc8a 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs @@ -152,14 +152,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma dst.MemoryLayout.UnpackGobBlocksInZ(), dstBpp); - ulong srcBaseAddress = memoryManager.Translate(srcGpuVa); - ulong dstBaseAddress = memoryManager.Translate(dstGpuVa); - (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount); (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount); - ReadOnlySpan srcSpan = memoryManager.Physical.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true); - Span dstSpan = memoryManager.Physical.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray(); + ReadOnlySpan srcSpan = memoryManager.GetSpan(srcGpuVa + (uint)srcBaseOffset, srcSize, true); + Span dstSpan = memoryManager.GetSpan(dstGpuVa + (uint)dstBaseOffset, dstSize).ToArray(); bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount); bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount); @@ -217,7 +214,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma { srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely. - memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan); + memoryManager.Write(dstGpuVa + (uint)dstBaseOffset, dstSpan); return; } @@ -258,7 +255,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.") }; - memoryManager.Physical.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan); + memoryManager.Write(dstGpuVa + (uint)dstBaseOffset, dstSpan); } else { diff --git a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs index 186a4648..81c5ad77 100644 --- a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs @@ -4,6 +4,7 @@ using Ryujinx.Graphics.Texture; using System; using System.Collections.Generic; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory { @@ -169,13 +170,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory /// private void FinishTransfer() { - Span data = MemoryMarshal.Cast(_buffer).Slice(0, _size); + var memoryManager = _channel.MemoryManager; + + var data = MemoryMarshal.Cast(_buffer).Slice(0, _size); if (_isLinear && _lineCount == 1) { - ulong address = _channel.MemoryManager.Translate(_dstGpuVa); - - _channel.MemoryManager.Physical.Write(address, data); + memoryManager.Write(_dstGpuVa, data); } else { @@ -189,36 +190,43 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory int srcOffset = 0; - ulong dstBaseAddress = _channel.MemoryManager.Translate(_dstGpuVa); - for (int y = _dstY; y < _dstY + _lineCount; y++) { int x1 = _dstX; int x2 = _dstX + _lineLengthIn; - int x2Trunc = _dstX + BitUtils.AlignDown(_lineLengthIn, 16); + int x1Round = BitUtils.AlignUp(_dstX, 16); + int x2Trunc = BitUtils.AlignDown(x2, 16); - int x; + int x = x1; - for (x = x1; x < x2Trunc; x += 16, srcOffset += 16) + if (x1Round <= x2) + { + for (; x < x1Round; x++, srcOffset++) + { + int dstOffset = dstCalculator.GetOffset(x, y); + + ulong dstAddress = _dstGpuVa + (uint)dstOffset; + + memoryManager.Write(dstAddress, data[srcOffset]); + } + } + + for (; x < x2Trunc; x += 16, srcOffset += 16) { int dstOffset = dstCalculator.GetOffset(x, y); - ulong dstAddress = dstBaseAddress + (ulong)dstOffset; + ulong dstAddress = _dstGpuVa + (uint)dstOffset; - Span pixel = data.Slice(srcOffset, 16); - - _channel.MemoryManager.Physical.Write(dstAddress, pixel); + memoryManager.Write(dstAddress, MemoryMarshal.Cast>(data.Slice(srcOffset, 16))[0]); } for (; x < x2; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); - ulong dstAddress = dstBaseAddress + (ulong)dstOffset; + ulong dstAddress = _dstGpuVa + (uint)dstOffset; - Span pixel = data.Slice(srcOffset, 1); - - _channel.MemoryManager.Physical.Write(dstAddress, pixel); + memoryManager.Write(dstAddress, data[srcOffset]); } } }