From 216026c096d844f8bf09ee0e185dec4111c64095 Mon Sep 17 00:00:00 2001 From: jhorv <38920027+jhorv@users.noreply.github.com> Date: Sun, 21 Apr 2024 06:57:35 -0400 Subject: [PATCH] Use pooled memory and avoid memory copies (#6691) * perf: use ByteMemoryPool * feat: KPageTableBase/KPageTable new methods to read and write `ReadOnlySequence` * new: add IWritableBlock.Write(ulong, ReadOnlySequence) with default impl * perf: use GetReadOnlySequence() instead of GetSpan() * perf: make `Parcel` IDisposable, use `ByteMemoryPool` for internal allocation, and make Parcel consumers dispose of it * remove comment about copySize * remove unnecessary Clear() --- .../SDL2HardwareDeviceSession.cs | 6 +++- .../SoundIoHardwareDeviceSession.cs | 8 +++-- .../HOS/Kernel/Ipc/KServerSession.cs | 4 +-- .../HOS/Kernel/Memory/KPageTable.cs | 13 +++++++ .../HOS/Kernel/Memory/KPageTableBase.cs | 36 ++++++++++++++----- .../HOS/Services/SurfaceFlinger/IBinder.cs | 4 +-- .../HOS/Services/SurfaceFlinger/Parcel.cs | 32 ++++++++++++----- .../RootService/IApplicationDisplayService.cs | 4 +-- src/Ryujinx.Memory/IWritableBlock.cs | 16 +++++++++ 9 files changed, 98 insertions(+), 25 deletions(-) diff --git a/src/Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceSession.cs b/src/Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceSession.cs index 00188ba5..62fe5025 100644 --- a/src/Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceSession.cs +++ b/src/Ryujinx.Audio.Backends.SDL2/SDL2HardwareDeviceSession.cs @@ -1,8 +1,10 @@ using Ryujinx.Audio.Backends.Common; using Ryujinx.Audio.Common; using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; using Ryujinx.Memory; using System; +using System.Buffers; using System.Collections.Concurrent; using System.Threading; @@ -87,7 +89,9 @@ namespace Ryujinx.Audio.Backends.SDL2 return; } - byte[] samples = new byte[frameCount * _bytesPerFrame]; + using IMemoryOwner samplesOwner = ByteMemoryPool.Rent(frameCount * _bytesPerFrame); + + Span samples = samplesOwner.Memory.Span; _ringBuffer.Read(samples, 0, samples.Length); diff --git a/src/Ryujinx.Audio.Backends.SoundIo/SoundIoHardwareDeviceSession.cs b/src/Ryujinx.Audio.Backends.SoundIo/SoundIoHardwareDeviceSession.cs index f60982e3..4011a121 100644 --- a/src/Ryujinx.Audio.Backends.SoundIo/SoundIoHardwareDeviceSession.cs +++ b/src/Ryujinx.Audio.Backends.SoundIo/SoundIoHardwareDeviceSession.cs @@ -1,8 +1,10 @@ using Ryujinx.Audio.Backends.Common; using Ryujinx.Audio.Backends.SoundIo.Native; using Ryujinx.Audio.Common; +using Ryujinx.Common.Memory; using Ryujinx.Memory; using System; +using System.Buffers; using System.Collections.Concurrent; using System.Runtime.CompilerServices; using System.Threading; @@ -37,7 +39,7 @@ namespace Ryujinx.Audio.Backends.SoundIo _outputStream = _driver.OpenStream(RequestedSampleFormat, RequestedSampleRate, RequestedChannelCount); _outputStream.WriteCallback += Update; _outputStream.Volume = requestedVolume; - // TODO: Setup other callbacks (errors, ect). + // TODO: Setup other callbacks (errors, etc.) _outputStream.Open(); } @@ -120,7 +122,9 @@ namespace Ryujinx.Audio.Backends.SoundIo int channelCount = areas.Length; - byte[] samples = new byte[frameCount * bytesPerFrame]; + using IMemoryOwner samplesOwner = ByteMemoryPool.Rent(frameCount * bytesPerFrame); + + Span samples = samplesOwner.Memory.Span; _ringBuffer.Read(samples, 0, samples.Length); diff --git a/src/Ryujinx.HLE/HOS/Kernel/Ipc/KServerSession.cs b/src/Ryujinx.HLE/HOS/Kernel/Ipc/KServerSession.cs index 7e41a3f3..3b428085 100644 --- a/src/Ryujinx.HLE/HOS/Kernel/Ipc/KServerSession.cs +++ b/src/Ryujinx.HLE/HOS/Kernel/Ipc/KServerSession.cs @@ -570,7 +570,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Ipc } else { - serverProcess.CpuMemory.Write(copyDst, clientProcess.CpuMemory.GetSpan(copySrc, (int)copySize)); + serverProcess.CpuMemory.Write(copyDst, clientProcess.CpuMemory.GetReadOnlySequence(copySrc, (int)copySize)); } if (clientResult != Result.Success) @@ -858,7 +858,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Ipc } else { - clientProcess.CpuMemory.Write(copyDst, serverProcess.CpuMemory.GetSpan(copySrc, (int)copySize)); + clientProcess.CpuMemory.Write(copyDst, serverProcess.CpuMemory.GetReadOnlySequence(copySrc, (int)copySize)); } } diff --git a/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTable.cs b/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTable.cs index d262c159..4ffa447d 100644 --- a/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTable.cs +++ b/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTable.cs @@ -2,6 +2,7 @@ using Ryujinx.Horizon.Common; using Ryujinx.Memory; using Ryujinx.Memory.Range; using System; +using System.Buffers; using System.Collections.Generic; using System.Diagnostics; @@ -34,6 +35,12 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory } } + /// + protected override ReadOnlySequence GetReadOnlySequence(ulong va, int size) + { + return _cpuMemory.GetReadOnlySequence(va, size); + } + /// protected override ReadOnlySpan GetSpan(ulong va, int size) { @@ -247,6 +254,12 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory _cpuMemory.SignalMemoryTracking(va, size, write); } + /// + protected override void Write(ulong va, ReadOnlySequence data) + { + _cpuMemory.Write(va, data); + } + /// protected override void Write(ulong va, ReadOnlySpan data) { diff --git a/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTableBase.cs b/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTableBase.cs index ae99a434..58bbc0db 100644 --- a/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTableBase.cs +++ b/src/Ryujinx.HLE/HOS/Kernel/Memory/KPageTableBase.cs @@ -5,6 +5,7 @@ using Ryujinx.Horizon.Common; using Ryujinx.Memory; using Ryujinx.Memory.Range; using System; +using System.Buffers; using System.Collections.Generic; using System.Diagnostics; @@ -1568,7 +1569,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory while (size > 0) { - ulong copySize = 0x100000; // Copy chunck size. Any value will do, moderate sizes are recommended. + ulong copySize = int.MaxValue; if (copySize > size) { @@ -1577,11 +1578,11 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory if (toServer) { - currentProcess.CpuMemory.Write(serverAddress, GetSpan(clientAddress, (int)copySize)); + currentProcess.CpuMemory.Write(serverAddress, GetReadOnlySequence(clientAddress, (int)copySize)); } else { - Write(clientAddress, currentProcess.CpuMemory.GetSpan(serverAddress, (int)copySize)); + Write(clientAddress, currentProcess.CpuMemory.GetReadOnlySequence(serverAddress, (int)copySize)); } serverAddress += copySize; @@ -1911,9 +1912,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory Context.Memory.Fill(GetDramAddressFromPa(dstFirstPagePa), unusedSizeBefore, (byte)_ipcFillValue); ulong copySize = addressRounded <= endAddr ? addressRounded - address : size; - var data = srcPageTable.GetSpan(addressTruncated + unusedSizeBefore, (int)copySize); + var data = srcPageTable.GetReadOnlySequence(addressTruncated + unusedSizeBefore, (int)copySize); - Context.Memory.Write(GetDramAddressFromPa(dstFirstPagePa + unusedSizeBefore), data); + ((IWritableBlock)Context.Memory).Write(GetDramAddressFromPa(dstFirstPagePa + unusedSizeBefore), data); firstPageFillAddress += unusedSizeBefore + copySize; @@ -1977,9 +1978,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory if (send) { ulong copySize = endAddr - endAddrTruncated; - var data = srcPageTable.GetSpan(endAddrTruncated, (int)copySize); + var data = srcPageTable.GetReadOnlySequence(endAddrTruncated, (int)copySize); - Context.Memory.Write(GetDramAddressFromPa(dstLastPagePa), data); + ((IWritableBlock)Context.Memory).Write(GetDramAddressFromPa(dstLastPagePa), data); lastPageFillAddr += copySize; @@ -2943,6 +2944,18 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory /// Page list where the ranges will be added protected abstract void GetPhysicalRegions(ulong va, ulong size, KPageList pageList); + /// + /// Gets a read-only sequence of data from CPU mapped memory. + /// + /// + /// Allows reading non-contiguous memory without first copying it to a newly allocated single contiguous block. + /// + /// Virtual address of the data + /// Size of the data + /// A read-only sequence of the data + /// Throw for unhandled invalid or unmapped memory accesses + protected abstract ReadOnlySequence GetReadOnlySequence(ulong va, int size); + /// /// Gets a read-only span of data from CPU mapped memory. /// @@ -2952,7 +2965,6 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory /// /// Virtual address of the data /// Size of the data - /// True if read tracking is triggered on the span /// A read-only span of the data /// Throw for unhandled invalid or unmapped memory accesses protected abstract ReadOnlySpan GetSpan(ulong va, int size); @@ -3060,6 +3072,14 @@ namespace Ryujinx.HLE.HOS.Kernel.Memory /// Size of the region protected abstract void SignalMemoryTracking(ulong va, ulong size, bool write); + /// + /// Writes data to CPU mapped memory, with write tracking. + /// + /// Virtual address to write the data into + /// Data to be written + /// Throw for unhandled invalid or unmapped memory accesses + protected abstract void Write(ulong va, ReadOnlySequence data); + /// /// Writes data to CPU mapped memory, with write tracking. /// diff --git a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/IBinder.cs b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/IBinder.cs index 0fb2dfd2..54aac48a 100644 --- a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/IBinder.cs +++ b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/IBinder.cs @@ -13,10 +13,10 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger ResultCode OnTransact(uint code, uint flags, ReadOnlySpan inputParcel, Span outputParcel) { - Parcel inputParcelReader = new(inputParcel.ToArray()); + using Parcel inputParcelReader = new(inputParcel); // TODO: support objects? - Parcel outputParcelWriter = new((uint)(outputParcel.Length - Unsafe.SizeOf()), 0); + using Parcel outputParcelWriter = new((uint)(outputParcel.Length - Unsafe.SizeOf()), 0); string inputInterfaceToken = inputParcelReader.ReadInterfaceToken(); diff --git a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs index 4ac0525b..c6cd60d0 100644 --- a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs +++ b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/Parcel.cs @@ -1,7 +1,9 @@ using Ryujinx.Common; +using Ryujinx.Common.Memory; using Ryujinx.Common.Utilities; using Ryujinx.HLE.HOS.Services.SurfaceFlinger.Types; using System; +using System.Buffers; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -9,13 +11,13 @@ using System.Text; namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger { - class Parcel + sealed class Parcel : IDisposable { - private readonly byte[] _rawData; + private readonly IMemoryOwner _rawDataOwner; - private Span Raw => new(_rawData); + private Span Raw => _rawDataOwner.Memory.Span; - private ref ParcelHeader Header => ref MemoryMarshal.Cast(_rawData)[0]; + private ref ParcelHeader Header => ref MemoryMarshal.Cast(Raw)[0]; private Span Payload => Raw.Slice((int)Header.PayloadOffset, (int)Header.PayloadSize); @@ -24,9 +26,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger private int _payloadPosition; private int _objectPosition; - public Parcel(byte[] rawData) + private bool _isDisposed; + + public Parcel(ReadOnlySpan data) { - _rawData = rawData; + _rawDataOwner = ByteMemoryPool.RentCopy(data); _payloadPosition = 0; _objectPosition = 0; @@ -36,7 +40,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger { uint headerSize = (uint)Unsafe.SizeOf(); - _rawData = new byte[BitUtils.AlignUp(headerSize + payloadSize + objectsSize, 4)]; + _rawDataOwner = ByteMemoryPool.RentCleared(BitUtils.AlignUp(headerSize + payloadSize + objectsSize, 4)); Header.PayloadSize = payloadSize; Header.ObjectsSize = objectsSize; @@ -132,7 +136,9 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger // TODO: figure out what this value is - WriteInplaceObject(new byte[4] { 0, 0, 0, 0 }); + Span fourBytes = stackalloc byte[4]; + + WriteInplaceObject(fourBytes); } public AndroidStrongPointer ReadStrongPointer() where T : unmanaged, IFlattenable @@ -219,5 +225,15 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger return Raw[..(int)(Header.PayloadSize + Header.ObjectsSize + Unsafe.SizeOf())]; } + + public void Dispose() + { + if (!_isDisposed) + { + _isDisposed = true; + + _rawDataOwner.Dispose(); + } + } } } diff --git a/src/Ryujinx.HLE/HOS/Services/Vi/RootService/IApplicationDisplayService.cs b/src/Ryujinx.HLE/HOS/Services/Vi/RootService/IApplicationDisplayService.cs index b6988f08..a2b1fb52 100644 --- a/src/Ryujinx.HLE/HOS/Services/Vi/RootService/IApplicationDisplayService.cs +++ b/src/Ryujinx.HLE/HOS/Services/Vi/RootService/IApplicationDisplayService.cs @@ -250,7 +250,7 @@ namespace Ryujinx.HLE.HOS.Services.Vi.RootService context.Device.System.SurfaceFlinger.SetRenderLayer(layerId); - Parcel parcel = new(0x28, 0x4); + using Parcel parcel = new(0x28, 0x4); parcel.WriteObject(producer, "dispdrv\0"); @@ -288,7 +288,7 @@ namespace Ryujinx.HLE.HOS.Services.Vi.RootService context.Device.System.SurfaceFlinger.SetRenderLayer(layerId); - Parcel parcel = new(0x28, 0x4); + using Parcel parcel = new(0x28, 0x4); parcel.WriteObject(producer, "dispdrv\0"); diff --git a/src/Ryujinx.Memory/IWritableBlock.cs b/src/Ryujinx.Memory/IWritableBlock.cs index 0858e0c9..78ae2479 100644 --- a/src/Ryujinx.Memory/IWritableBlock.cs +++ b/src/Ryujinx.Memory/IWritableBlock.cs @@ -1,9 +1,25 @@ using System; +using System.Buffers; namespace Ryujinx.Memory { public interface IWritableBlock { + /// + /// Writes data to CPU mapped memory, with write tracking. + /// + /// Virtual address to write the data into + /// Data to be written + /// Throw for unhandled invalid or unmapped memory accesses + void Write(ulong va, ReadOnlySequence data) + { + foreach (ReadOnlyMemory segment in data) + { + Write(va, segment.Span); + va += (ulong)segment.Length; + } + } + void Write(ulong va, ReadOnlySpan data); void WriteUntracked(ulong va, ReadOnlySpan data) => Write(va, data);