GPU: Eliminate CB0 accesses when storage buffer accesses are resolved (#3847)

* Eliminate CB0 accesses

Still some work to do, decouple from hle?

* Forgot the important part somehow

* Fix and improve alignment test

* Address Feedback

* Remove some complexity when checking storage buffer alignment

* Update Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
This commit is contained in:
riperiperi 2022-11-17 17:47:41 +00:00 committed by GitHub
parent 391e08dd27
commit 33a4d7d1ba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 317 additions and 68 deletions

View file

@ -138,7 +138,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
qmd.CtaThreadDimension1,
qmd.CtaThreadDimension2,
localMemorySize,
sharedMemorySize);
sharedMemorySize,
_channel.BufferManager.HasUnalignedStorageBuffers);
CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
@ -150,6 +151,33 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
ShaderProgramInfo info = cs.Shaders[0].Info;
bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
for (int index = 0; index < info.SBuffers.Count; index++)
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
int sbDescOffset = 0x310 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
}
if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
{
// Refetch the shader, as assumptions about storage buffer alignment have changed.
cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
info = cs.Shaders[0].Info;
}
for (int index = 0; index < info.CBuffers.Count; index++)
{
BufferDescriptor cb = info.CBuffers[index];
@ -174,21 +202,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
_channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
}
for (int index = 0; index < info.SBuffers.Count; index++)
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
int sbDescOffset = 0x310 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
}
_channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
_channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);