From f6c3f1cdfdb9145634be3bfc54400f0d408f36dd Mon Sep 17 00:00:00 2001 From: riperiperi Date: Mon, 25 Sep 2023 22:07:03 +0100 Subject: [PATCH] GPU: Discard data when getting texture before full clear (#5719) * GPU: Discard data when getting texture before full clear * Fix rules and order of clear checks * Fix formatting --- src/Ryujinx.Graphics.GAL/Format.cs | 39 ++++++++++++ .../Engine/Threed/DrawManager.cs | 55 ++++++++++++++-- .../Engine/Threed/RenderTargetUpdateFlags.cs | 5 ++ .../Engine/Threed/StateUpdater.cs | 3 + src/Ryujinx.Graphics.Gpu/Image/Texture.cs | 12 ++++ .../Image/TextureCache.cs | 62 +++++++++++++++++-- .../Image/TextureGroup.cs | 18 ++++++ .../Image/TextureGroupHandle.cs | 19 +++++- .../Image/TextureSearchFlags.cs | 1 + src/Ryujinx.Graphics.Gpu/Window.cs | 2 +- 10 files changed, 205 insertions(+), 11 deletions(-) diff --git a/src/Ryujinx.Graphics.GAL/Format.cs b/src/Ryujinx.Graphics.GAL/Format.cs index f6feec1c..99c89dce 100644 --- a/src/Ryujinx.Graphics.GAL/Format.cs +++ b/src/Ryujinx.Graphics.GAL/Format.cs @@ -335,6 +335,45 @@ namespace Ryujinx.Graphics.GAL return 1; } + /// + /// Checks if the texture format is a depth or depth-stencil format. + /// + /// Texture format + /// True if the format is a depth or depth-stencil format, false otherwise + public static bool HasDepth(this Format format) + { + switch (format) + { + case Format.D16Unorm: + case Format.D24UnormS8Uint: + case Format.S8UintD24Unorm: + case Format.D32Float: + case Format.D32FloatS8Uint: + return true; + } + + return false; + } + + /// + /// Checks if the texture format is a stencil or depth-stencil format. + /// + /// Texture format + /// True if the format is a stencil or depth-stencil format, false otherwise + public static bool HasStencil(this Format format) + { + switch (format) + { + case Format.D24UnormS8Uint: + case Format.S8UintD24Unorm: + case Format.D32FloatS8Uint: + case Format.S8Uint: + return true; + } + + return false; + } + /// /// Checks if the texture format is valid to use as image format. /// diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs index 18e7ac00..1c31312c 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -1,6 +1,7 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw; using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; using Ryujinx.Graphics.Gpu.Memory; using System; @@ -806,25 +807,69 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed updateFlags |= RenderTargetUpdateFlags.Layered; } - if (clearDepth || clearStencil) + bool clearDS = clearDepth || clearStencil; + + if (clearDS) { updateFlags |= RenderTargetUpdateFlags.UpdateDepthStencil; } - engine.UpdateRenderTargetState(updateFlags, singleUse: componentMask != 0 ? index : -1); - // If there is a mismatch on the host clip region and the one explicitly defined by the guest // on the screen scissor state, then we need to force only one texture to be bound to avoid // host clipping. var screenScissorState = _state.State.ScreenScissorState; + bool clearAffectedByStencilMask = (_state.State.ClearFlags & 1) != 0; + bool clearAffectedByScissor = (_state.State.ClearFlags & 0x100) != 0; + + if (clearDS || componentMask == 15) + { + // A full clear if scissor is disabled, or it matches the screen scissor state. + + bool fullClear = screenScissorState.X == 0 && screenScissorState.Y == 0; + + if (fullClear && clearAffectedByScissor && _state.State.ScissorState[0].Enable) + { + ref var scissorState = ref _state.State.ScissorState[0]; + + fullClear = scissorState.X1 == screenScissorState.X && + scissorState.Y1 == screenScissorState.Y && + scissorState.X2 >= screenScissorState.X + screenScissorState.Width && + scissorState.Y2 >= screenScissorState.Y + screenScissorState.Height; + } + + if (fullClear && clearDS) + { + // Must clear all aspects of the depth-stencil format. + + FormatInfo dsFormat = _state.State.RtDepthStencilState.Format.Convert(); + + bool hasDepth = dsFormat.Format.HasDepth(); + bool hasStencil = dsFormat.Format.HasStencil(); + + if (hasStencil && (!clearStencil || (clearAffectedByStencilMask && _state.State.StencilTestState.FrontMask != 0xff))) + { + fullClear = false; + } + else if (hasDepth && !clearDepth) + { + fullClear = false; + } + } + + if (fullClear) + { + updateFlags |= RenderTargetUpdateFlags.DiscardClip; + } + } + + engine.UpdateRenderTargetState(updateFlags, singleUse: componentMask != 0 ? index : -1); + // Must happen after UpdateRenderTargetState to have up-to-date clip region values. bool clipMismatch = (screenScissorState.X | screenScissorState.Y) != 0 || screenScissorState.Width != _channel.TextureManager.ClipRegionWidth || screenScissorState.Height != _channel.TextureManager.ClipRegionHeight; - bool clearAffectedByStencilMask = (_state.State.ClearFlags & 1) != 0; - bool clearAffectedByScissor = (_state.State.ClearFlags & 0x100) != 0; bool needsCustomScissor = !clearAffectedByScissor || clipMismatch; // Scissor and rasterizer discard also affect clears. diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs index e575923d..58c7bdb4 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/RenderTargetUpdateFlags.cs @@ -33,6 +33,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// UpdateDepthStencil = 1 << 3, + /// + /// Indicates that the data in the clip region can be discarded for the next use. + /// + DiscardClip = 1 << 4, + /// /// Default update flags for draw. /// diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index 48a497b5..37e41c51 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -447,6 +447,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed bool useControl = updateFlags.HasFlag(RenderTargetUpdateFlags.UseControl); bool layered = updateFlags.HasFlag(RenderTargetUpdateFlags.Layered); bool singleColor = updateFlags.HasFlag(RenderTargetUpdateFlags.SingleColor); + bool discard = updateFlags.HasFlag(RenderTargetUpdateFlags.DiscardClip); int count = useControl ? rtControl.UnpackCount() : Constants.TotalRenderTargets; @@ -486,6 +487,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed memoryManager, colorState, _vtgWritesRtLayer || layered, + discard, samplesInX, samplesInY, sizeHint); @@ -525,6 +527,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed dsState, dsSize, _vtgWritesRtLayer || layered, + discard, samplesInX, samplesInY, sizeHint); diff --git a/src/Ryujinx.Graphics.Gpu/Image/Texture.cs b/src/Ryujinx.Graphics.Gpu/Image/Texture.cs index 0fce4deb..022a3839 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -570,6 +570,18 @@ namespace Ryujinx.Graphics.Gpu.Image return Group.CheckDirty(this, consume); } + /// + /// Discards all data for this texture. + /// This clears all dirty flags, modified flags, and pending copies from other textures. + /// It should be used if the texture data will be fully overwritten by the next use. + /// + public void DiscardData() + { + Group.DiscardData(this); + + _dirty = false; + } + /// /// Synchronizes guest and host memory. /// This will overwrite the texture data with the texture data on the guest memory, if a CPU diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs index 3f215a4a..55000ac6 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureCache.cs @@ -311,7 +311,7 @@ namespace Ryujinx.Graphics.Gpu.Image flags |= TextureSearchFlags.NoCreate; } - Texture texture = FindOrCreateTexture(memoryManager, flags, info, 0); + Texture texture = FindOrCreateTexture(memoryManager, flags, info, 0, sizeHint: sizeHint); texture?.SynchronizeMemory(); @@ -324,6 +324,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// GPU memory manager where the texture is mapped /// Color buffer texture to find or create /// Indicates if the texture might be accessed with a non-zero layer index + /// Indicates that the sizeHint region's data will be overwritten /// Number of samples in the X direction, for MSAA /// Number of samples in the Y direction, for MSAA /// A hint indicating the minimum used size for the texture @@ -332,6 +333,7 @@ namespace Ryujinx.Graphics.Gpu.Image MemoryManager memoryManager, RtColorState colorState, bool layered, + bool discard, int samplesInX, int samplesInY, Size sizeHint) @@ -398,7 +400,14 @@ namespace Ryujinx.Graphics.Gpu.Image int layerSize = !isLinear ? colorState.LayerSize * 4 : 0; - Texture texture = FindOrCreateTexture(memoryManager, TextureSearchFlags.WithUpscale, info, layerSize); + var flags = TextureSearchFlags.WithUpscale; + + if (discard) + { + flags |= TextureSearchFlags.DiscardData; + } + + Texture texture = FindOrCreateTexture(memoryManager, flags, info, layerSize, sizeHint: sizeHint); texture?.SynchronizeMemory(); @@ -412,6 +421,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// Depth-stencil buffer texture to find or create /// Size of the depth-stencil texture /// Indicates if the texture might be accessed with a non-zero layer index + /// Indicates that the sizeHint region's data will be overwritten /// Number of samples in the X direction, for MSAA /// Number of samples in the Y direction, for MSAA /// A hint indicating the minimum used size for the texture @@ -421,6 +431,7 @@ namespace Ryujinx.Graphics.Gpu.Image RtDepthStencilState dsState, Size3D size, bool layered, + bool discard, int samplesInX, int samplesInY, Size sizeHint) @@ -465,7 +476,14 @@ namespace Ryujinx.Graphics.Gpu.Image target, formatInfo); - Texture texture = FindOrCreateTexture(memoryManager, TextureSearchFlags.WithUpscale, info, dsState.LayerSize * 4); + var flags = TextureSearchFlags.WithUpscale; + + if (discard) + { + flags |= TextureSearchFlags.DiscardData; + } + + Texture texture = FindOrCreateTexture(memoryManager, flags, info, dsState.LayerSize * 4, sizeHint: sizeHint); texture?.SynchronizeMemory(); @@ -500,6 +518,37 @@ namespace Ryujinx.Graphics.Gpu.Image return Math.Clamp(widthAligned - alignment + 1, minimumWidth, widthAligned); } + /// + /// Determines if texture data should be fully discarded + /// based on the size hint region and whether it is set to be discarded. + /// + /// Whether the size hint region should be discarded + /// The texture being discarded + /// A hint indicating the minimum used size for the texture + /// True if the data should be discarded, false otherwise + private static bool ShouldDiscard(bool discard, Texture texture, Size? sizeHint) + { + return discard && + texture.Info.DepthOrLayers == 1 && + sizeHint != null && + texture.Width <= sizeHint.Value.Width && + texture.Height <= sizeHint.Value.Height; + } + + /// + /// Discards texture data if requested and possible. + /// + /// Whether the size hint region should be discarded + /// The texture being discarded + /// A hint indicating the minimum used size for the texture + private static void DiscardIfNeeded(bool discard, Texture texture, Size? sizeHint) + { + if (ShouldDiscard(discard, texture, sizeHint)) + { + texture.DiscardData(); + } + } + /// /// Tries to find an existing texture, or create a new one if not found. /// @@ -507,6 +556,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// The texture search flags, defines texture comparison rules /// Texture information of the texture to be found or created /// Size in bytes of a single texture layer + /// A hint indicating the minimum used size for the texture /// Optional ranges of physical memory where the texture data is located /// The texture public Texture FindOrCreateTexture( @@ -514,9 +564,11 @@ namespace Ryujinx.Graphics.Gpu.Image TextureSearchFlags flags, TextureInfo info, int layerSize = 0, + Size? sizeHint = null, MultiRange? range = null) { bool isSamplerTexture = (flags & TextureSearchFlags.ForSampler) != 0; + bool discard = (flags & TextureSearchFlags.DiscardData) != 0; TextureScaleMode scaleMode = IsUpscaleCompatible(info, (flags & TextureSearchFlags.WithUpscale) != 0); @@ -612,6 +664,8 @@ namespace Ryujinx.Graphics.Gpu.Image if (texture != null) { + DiscardIfNeeded(discard, texture, sizeHint); + texture.SynchronizeMemory(); return texture; @@ -907,7 +961,7 @@ namespace Ryujinx.Graphics.Gpu.Image // We need to synchronize before copying the old view data to the texture, // otherwise the copied data would be overwritten by a future synchronization. - texture.InitializeData(false, setData); + texture.InitializeData(false, setData && !ShouldDiscard(discard, texture, sizeHint)); texture.Group.InitializeOverlaps(); diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs index 1b947cd3..be33247c 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs @@ -278,6 +278,24 @@ namespace Ryujinx.Graphics.Gpu.Image return dirty; } + /// + /// Discards all data for a given texture. + /// This clears all dirty flags, modified flags, and pending copies from other textures. + /// + /// The texture being discarded + public void DiscardData(Texture texture) + { + EvaluateRelevantHandles(texture, (baseHandle, regionCount, split) => + { + for (int i = 0; i < regionCount; i++) + { + TextureGroupHandle group = _handles[baseHandle + i]; + + group.DiscardData(); + } + }); + } + /// /// Synchronize memory for a given texture. /// If overlapping tracking handles are dirty, fully or partially synchronize the texture data. diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureGroupHandle.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureGroupHandle.cs index ef7198e8..84171c7a 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureGroupHandle.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureGroupHandle.cs @@ -2,7 +2,6 @@ using Ryujinx.Memory.Tracking; using System; using System.Collections.Generic; -using System.Linq; using System.Threading; namespace Ryujinx.Graphics.Gpu.Image @@ -155,6 +154,24 @@ namespace Ryujinx.Graphics.Gpu.Image } } + /// + /// Discards all data for this handle. + /// This clears all dirty flags, modified flags, and pending copies from other handles. + /// + public void DiscardData() + { + Modified = false; + DeferredCopy = null; + + foreach (RegionHandle handle in Handles) + { + if (handle.Dirty) + { + handle.Reprotect(); + } + } + } + /// /// Calculate a list of which views overlap this handle. /// diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs index fb2a97b0..f651420a 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureSearchFlags.cs @@ -14,5 +14,6 @@ namespace Ryujinx.Graphics.Gpu.Image DepthAlias = 1 << 3, WithUpscale = 1 << 4, NoCreate = 1 << 5, + DiscardData = 1 << 6, } } diff --git a/src/Ryujinx.Graphics.Gpu/Window.cs b/src/Ryujinx.Graphics.Gpu/Window.cs index 1f94122d..3b236853 100644 --- a/src/Ryujinx.Graphics.Gpu/Window.cs +++ b/src/Ryujinx.Graphics.Gpu/Window.cs @@ -200,7 +200,7 @@ namespace Ryujinx.Graphics.Gpu { pt.AcquireCallback(_context, pt.UserObj); - Image.Texture texture = pt.Cache.FindOrCreateTexture(null, TextureSearchFlags.WithUpscale, pt.Info, 0, pt.Range); + Image.Texture texture = pt.Cache.FindOrCreateTexture(null, TextureSearchFlags.WithUpscale, pt.Info, 0, range: pt.Range); pt.Cache.Tick();