a731ab3a2a
* Start of the ARMeilleure project * Refactoring around the old IRAdapter, now renamed to PreAllocator * Optimize the LowestBitSet method * Add CLZ support and fix CLS implementation * Add missing Equals and GetHashCode overrides on some structs, misc small tweaks * Implement the ByteSwap IR instruction, and some refactoring on the assembler * Implement the DivideUI IR instruction and fix 64-bits IDIV * Correct constant operand type on CSINC * Move division instructions implementation to InstEmitDiv * Fix destination type for the ConditionalSelect IR instruction * Implement UMULH and SMULH, with new IR instructions * Fix some issues with shift instructions * Fix constant types for BFM instructions * Fix up new tests using the new V128 struct * Update tests * Move DIV tests to a separate file * Add support for calls, and some instructions that depends on them * Start adding support for SIMD & FP types, along with some of the related ARM instructions * Fix some typos and the divide instruction with FP operands * Fix wrong method call on Clz_V * Implement ARM FP & SIMD move instructions, Saddlv_V, and misc. fixes * Implement SIMD logical instructions and more misc. fixes * Fix PSRAD x86 instruction encoding, TRN, UABD and UABDL implementations * Implement float conversion instruction, merge in LDj3SNuD fixes, and some other misc. fixes * Implement SIMD shift instruction and fix Dup_V * Add SCVTF and UCVTF (vector, fixed-point) variants to the opcode table * Fix check with tolerance on tester * Implement FP & SIMD comparison instructions, and some fixes * Update FCVT (Scalar) encoding on the table to support the Half-float variants * Support passing V128 structs, some cleanup on the register allocator, merge LDj3SNuD fixes * Use old memory access methods, made a start on SIMD memory insts support, some fixes * Fix float constant passed to functions, save and restore non-volatile XMM registers, other fixes * Fix arguments count with struct return values, other fixes * More instructions * Misc. fixes and integrate LDj3SNuD fixes * Update tests * Add a faster linear scan allocator, unwinding support on windows, and other changes * Update Ryujinx.HLE * Update Ryujinx.Graphics * Fix V128 return pointer passing, RCX is clobbered * Update Ryujinx.Tests * Update ITimeZoneService * Stop using GetFunctionPointer as that can't be called from native code, misc. fixes and tweaks * Use generic GetFunctionPointerForDelegate method and other tweaks * Some refactoring on the code generator, assert on invalid operations and use a separate enum for intrinsics * Remove some unused code on the assembler * Fix REX.W prefix regression on float conversion instructions, add some sort of profiler * Add hardware capability detection * Fix regression on Sha1h and revert Fcm** changes * Add SSE2-only paths on vector extract and insert, some refactoring on the pre-allocator * Fix silly mistake introduced on last commit on CpuId * Generate inline stack probes when the stack allocation is too large * Initial support for the System-V ABI * Support multiple destination operands * Fix SSE2 VectorInsert8 path, and other fixes * Change placement of XMM callee save and restore code to match other compilers * Rename Dest to Destination and Inst to Instruction * Fix a regression related to calls and the V128 type * Add an extra space on comments to match code style * Some refactoring * Fix vector insert FP32 SSE2 path * Port over the ARM32 instructions * Avoid memory protection races on JIT Cache * Another fix on VectorInsert FP32 (thanks to LDj3SNuD * Float operands don't need to use the same register when VEX is supported * Add a new register allocator, higher quality code for hot code (tier up), and other tweaks * Some nits, small improvements on the pre allocator * CpuThreadState is gone * Allow changing CPU emulators with a config entry * Add runtime identifiers on the ARMeilleure project * Allow switching between CPUs through a config entry (pt. 2) * Change win10-x64 to win-x64 on projects * Update the Ryujinx project to use ARMeilleure * Ensure that the selected register is valid on the hybrid allocator * Allow exiting on returns to 0 (should fix test regression) * Remove register assignments for most used variables on the hybrid allocator * Do not use fixed registers as spill temp * Add missing namespace and remove unneeded using * Address PR feedback * Fix types, etc * Enable AssumeStrictAbiCompliance by default * Ensure that Spill and Fill don't load or store any more than necessary
404 lines
14 KiB
C#
404 lines
14 KiB
C#
using ARMeilleure.Memory;
|
|
using Ryujinx.Audio;
|
|
using Ryujinx.Audio.Adpcm;
|
|
using Ryujinx.Common.Logging;
|
|
using Ryujinx.HLE.HOS.Ipc;
|
|
using Ryujinx.HLE.HOS.Kernel.Common;
|
|
using Ryujinx.HLE.HOS.Kernel.Threading;
|
|
using Ryujinx.HLE.Utilities;
|
|
using System;
|
|
using System.Runtime.InteropServices;
|
|
using System.Runtime.Intrinsics;
|
|
using System.Runtime.Intrinsics.X86;
|
|
|
|
namespace Ryujinx.HLE.HOS.Services.Aud.AudioRenderer
|
|
{
|
|
class IAudioRenderer : IpcService, IDisposable
|
|
{
|
|
// This is the amount of samples that are going to be appended
|
|
// each time that RequestUpdateAudioRenderer is called. Ideally,
|
|
// this value shouldn't be neither too small (to avoid the player
|
|
// starving due to running out of samples) or too large (to avoid
|
|
// high latency).
|
|
private const int MixBufferSamplesCount = 960;
|
|
|
|
private KEvent _updateEvent;
|
|
|
|
private IMemoryManager _memory;
|
|
|
|
private IAalOutput _audioOut;
|
|
|
|
private AudioRendererParameter _params;
|
|
|
|
private MemoryPoolContext[] _memoryPools;
|
|
|
|
private VoiceContext[] _voices;
|
|
|
|
private int _track;
|
|
|
|
private PlayState _playState;
|
|
|
|
public IAudioRenderer(
|
|
Horizon system,
|
|
IMemoryManager memory,
|
|
IAalOutput audioOut,
|
|
AudioRendererParameter Params)
|
|
{
|
|
_updateEvent = new KEvent(system);
|
|
|
|
_memory = memory;
|
|
_audioOut = audioOut;
|
|
_params = Params;
|
|
|
|
_track = audioOut.OpenTrack(
|
|
AudioConsts.HostSampleRate,
|
|
AudioConsts.HostChannelsCount,
|
|
AudioCallback);
|
|
|
|
_memoryPools = CreateArray<MemoryPoolContext>(Params.EffectCount + Params.VoiceCount * 4);
|
|
|
|
_voices = CreateArray<VoiceContext>(Params.VoiceCount);
|
|
|
|
InitializeAudioOut();
|
|
|
|
_playState = PlayState.Stopped;
|
|
}
|
|
|
|
[Command(0)]
|
|
// GetSampleRate() -> u32
|
|
public ResultCode GetSampleRate(ServiceCtx context)
|
|
{
|
|
context.ResponseData.Write(_params.SampleRate);
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
[Command(1)]
|
|
// GetSampleCount() -> u32
|
|
public ResultCode GetSampleCount(ServiceCtx context)
|
|
{
|
|
context.ResponseData.Write(_params.SampleCount);
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
[Command(2)]
|
|
// GetMixBufferCount() -> u32
|
|
public ResultCode GetMixBufferCount(ServiceCtx context)
|
|
{
|
|
context.ResponseData.Write(_params.MixCount);
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
[Command(3)]
|
|
// GetState() -> u32
|
|
public ResultCode GetState(ServiceCtx context)
|
|
{
|
|
context.ResponseData.Write((int)_playState);
|
|
|
|
Logger.PrintStub(LogClass.ServiceAudio, new { State = Enum.GetName(typeof(PlayState), _playState) });
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
private void AudioCallback()
|
|
{
|
|
_updateEvent.ReadableEvent.Signal();
|
|
}
|
|
|
|
private static T[] CreateArray<T>(int size) where T : new()
|
|
{
|
|
T[] output = new T[size];
|
|
|
|
for (int index = 0; index < size; index++)
|
|
{
|
|
output[index] = new T();
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
private void InitializeAudioOut()
|
|
{
|
|
AppendMixedBuffer(0);
|
|
AppendMixedBuffer(1);
|
|
AppendMixedBuffer(2);
|
|
|
|
_audioOut.Start(_track);
|
|
}
|
|
|
|
[Command(4)]
|
|
// RequestUpdateAudioRenderer(buffer<nn::audio::detail::AudioRendererUpdateDataHeader, 5>)
|
|
// -> (buffer<nn::audio::detail::AudioRendererUpdateDataHeader, 6>, buffer<nn::audio::detail::AudioRendererUpdateDataHeader, 6>)
|
|
public ResultCode RequestUpdateAudioRenderer(ServiceCtx context)
|
|
{
|
|
long outputPosition = context.Request.ReceiveBuff[0].Position;
|
|
long outputSize = context.Request.ReceiveBuff[0].Size;
|
|
|
|
MemoryHelper.FillWithZeros(context.Memory, outputPosition, (int)outputSize);
|
|
|
|
long inputPosition = context.Request.SendBuff[0].Position;
|
|
|
|
StructReader reader = new StructReader(context.Memory, inputPosition);
|
|
StructWriter writer = new StructWriter(context.Memory, outputPosition);
|
|
|
|
UpdateDataHeader inputHeader = reader.Read<UpdateDataHeader>();
|
|
|
|
reader.Read<BehaviorIn>(inputHeader.BehaviorSize);
|
|
|
|
MemoryPoolIn[] memoryPoolsIn = reader.Read<MemoryPoolIn>(inputHeader.MemoryPoolSize);
|
|
|
|
for (int index = 0; index < memoryPoolsIn.Length; index++)
|
|
{
|
|
MemoryPoolIn memoryPool = memoryPoolsIn[index];
|
|
|
|
if (memoryPool.State == MemoryPoolState.RequestAttach)
|
|
{
|
|
_memoryPools[index].OutStatus.State = MemoryPoolState.Attached;
|
|
}
|
|
else if (memoryPool.State == MemoryPoolState.RequestDetach)
|
|
{
|
|
_memoryPools[index].OutStatus.State = MemoryPoolState.Detached;
|
|
}
|
|
}
|
|
|
|
reader.Read<VoiceChannelResourceIn>(inputHeader.VoiceResourceSize);
|
|
|
|
VoiceIn[] voicesIn = reader.Read<VoiceIn>(inputHeader.VoiceSize);
|
|
|
|
for (int index = 0; index < voicesIn.Length; index++)
|
|
{
|
|
VoiceIn voice = voicesIn[index];
|
|
|
|
VoiceContext voiceCtx = _voices[index];
|
|
|
|
voiceCtx.SetAcquireState(voice.Acquired != 0);
|
|
|
|
if (voice.Acquired == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (voice.FirstUpdate != 0)
|
|
{
|
|
voiceCtx.AdpcmCtx = GetAdpcmDecoderContext(
|
|
voice.AdpcmCoeffsPosition,
|
|
voice.AdpcmCoeffsSize);
|
|
|
|
voiceCtx.SampleFormat = voice.SampleFormat;
|
|
voiceCtx.SampleRate = voice.SampleRate;
|
|
voiceCtx.ChannelsCount = voice.ChannelsCount;
|
|
|
|
voiceCtx.SetBufferIndex(voice.BaseWaveBufferIndex);
|
|
}
|
|
|
|
voiceCtx.WaveBuffers[0] = voice.WaveBuffer0;
|
|
voiceCtx.WaveBuffers[1] = voice.WaveBuffer1;
|
|
voiceCtx.WaveBuffers[2] = voice.WaveBuffer2;
|
|
voiceCtx.WaveBuffers[3] = voice.WaveBuffer3;
|
|
voiceCtx.Volume = voice.Volume;
|
|
voiceCtx.PlayState = voice.PlayState;
|
|
}
|
|
|
|
UpdateAudio();
|
|
|
|
UpdateDataHeader outputHeader = new UpdateDataHeader();
|
|
|
|
int updateHeaderSize = Marshal.SizeOf<UpdateDataHeader>();
|
|
|
|
outputHeader.Revision = IAudioRendererManager.RevMagic;
|
|
outputHeader.BehaviorSize = 0xb0;
|
|
outputHeader.MemoryPoolSize = (_params.EffectCount + _params.VoiceCount * 4) * 0x10;
|
|
outputHeader.VoiceSize = _params.VoiceCount * 0x10;
|
|
outputHeader.EffectSize = _params.EffectCount * 0x10;
|
|
outputHeader.SinkSize = _params.SinkCount * 0x20;
|
|
outputHeader.PerformanceManagerSize = 0x10;
|
|
outputHeader.TotalSize = updateHeaderSize +
|
|
outputHeader.BehaviorSize +
|
|
outputHeader.MemoryPoolSize +
|
|
outputHeader.VoiceSize +
|
|
outputHeader.EffectSize +
|
|
outputHeader.SinkSize +
|
|
outputHeader.PerformanceManagerSize;
|
|
|
|
writer.Write(outputHeader);
|
|
|
|
foreach (MemoryPoolContext memoryPool in _memoryPools)
|
|
{
|
|
writer.Write(memoryPool.OutStatus);
|
|
}
|
|
|
|
foreach (VoiceContext voice in _voices)
|
|
{
|
|
writer.Write(voice.OutStatus);
|
|
}
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
[Command(5)]
|
|
// Start()
|
|
public ResultCode StartAudioRenderer(ServiceCtx context)
|
|
{
|
|
Logger.PrintStub(LogClass.ServiceAudio);
|
|
|
|
_playState = PlayState.Playing;
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
[Command(6)]
|
|
// Stop()
|
|
public ResultCode StopAudioRenderer(ServiceCtx context)
|
|
{
|
|
Logger.PrintStub(LogClass.ServiceAudio);
|
|
|
|
_playState = PlayState.Stopped;
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
[Command(7)]
|
|
// QuerySystemEvent() -> handle<copy, event>
|
|
public ResultCode QuerySystemEvent(ServiceCtx context)
|
|
{
|
|
if (context.Process.HandleTable.GenerateHandle(_updateEvent.ReadableEvent, out int handle) != KernelResult.Success)
|
|
{
|
|
throw new InvalidOperationException("Out of handles!");
|
|
}
|
|
|
|
context.Response.HandleDesc = IpcHandleDesc.MakeCopy(handle);
|
|
|
|
return ResultCode.Success;
|
|
}
|
|
|
|
private AdpcmDecoderContext GetAdpcmDecoderContext(long position, long size)
|
|
{
|
|
if (size == 0)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
AdpcmDecoderContext context = new AdpcmDecoderContext();
|
|
|
|
context.Coefficients = new short[size >> 1];
|
|
|
|
for (int offset = 0; offset < size; offset += 2)
|
|
{
|
|
context.Coefficients[offset >> 1] = _memory.ReadInt16(position + offset);
|
|
}
|
|
|
|
return context;
|
|
}
|
|
|
|
private void UpdateAudio()
|
|
{
|
|
long[] released = _audioOut.GetReleasedBuffers(_track, 2);
|
|
|
|
for (int index = 0; index < released.Length; index++)
|
|
{
|
|
AppendMixedBuffer(released[index]);
|
|
}
|
|
}
|
|
|
|
private void AppendMixedBuffer(long tag)
|
|
{
|
|
int[] mixBuffer = new int[MixBufferSamplesCount * AudioConsts.HostChannelsCount];
|
|
|
|
foreach (VoiceContext voice in _voices)
|
|
{
|
|
if (!voice.Playing || voice.CurrentWaveBuffer.Size == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
int outOffset = 0;
|
|
int pendingSamples = MixBufferSamplesCount;
|
|
float volume = voice.Volume;
|
|
|
|
while (pendingSamples > 0)
|
|
{
|
|
int[] samples = voice.GetBufferData(_memory, pendingSamples, out int returnedSamples);
|
|
|
|
if (returnedSamples == 0)
|
|
{
|
|
break;
|
|
}
|
|
|
|
pendingSamples -= returnedSamples;
|
|
|
|
for (int offset = 0; offset < samples.Length; offset++)
|
|
{
|
|
mixBuffer[outOffset++] += (int)(samples[offset] * voice.Volume);
|
|
}
|
|
}
|
|
}
|
|
|
|
_audioOut.AppendBuffer(_track, tag, GetFinalBuffer(mixBuffer));
|
|
}
|
|
|
|
private unsafe static short[] GetFinalBuffer(int[] buffer)
|
|
{
|
|
short[] output = new short[buffer.Length];
|
|
|
|
int offset = 0;
|
|
|
|
// Perform Saturation using SSE2 if supported
|
|
if (Sse2.IsSupported)
|
|
{
|
|
fixed (int* inptr = buffer)
|
|
fixed (short* outptr = output)
|
|
{
|
|
for (; offset + 32 <= buffer.Length; offset += 32)
|
|
{
|
|
// Unroll the loop a little to ensure the CPU pipeline
|
|
// is always full.
|
|
Vector128<int> block1A = Sse2.LoadVector128(inptr + offset + 0);
|
|
Vector128<int> block1B = Sse2.LoadVector128(inptr + offset + 4);
|
|
|
|
Vector128<int> block2A = Sse2.LoadVector128(inptr + offset + 8);
|
|
Vector128<int> block2B = Sse2.LoadVector128(inptr + offset + 12);
|
|
|
|
Vector128<int> block3A = Sse2.LoadVector128(inptr + offset + 16);
|
|
Vector128<int> block3B = Sse2.LoadVector128(inptr + offset + 20);
|
|
|
|
Vector128<int> block4A = Sse2.LoadVector128(inptr + offset + 24);
|
|
Vector128<int> block4B = Sse2.LoadVector128(inptr + offset + 28);
|
|
|
|
Vector128<short> output1 = Sse2.PackSignedSaturate(block1A, block1B);
|
|
Vector128<short> output2 = Sse2.PackSignedSaturate(block2A, block2B);
|
|
Vector128<short> output3 = Sse2.PackSignedSaturate(block3A, block3B);
|
|
Vector128<short> output4 = Sse2.PackSignedSaturate(block4A, block4B);
|
|
|
|
Sse2.Store(outptr + offset + 0, output1);
|
|
Sse2.Store(outptr + offset + 8, output2);
|
|
Sse2.Store(outptr + offset + 16, output3);
|
|
Sse2.Store(outptr + offset + 24, output4);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process left overs
|
|
for (; offset < buffer.Length; offset++)
|
|
{
|
|
output[offset] = DspUtils.Saturate(buffer[offset]);
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
public void Dispose()
|
|
{
|
|
Dispose(true);
|
|
}
|
|
|
|
protected virtual void Dispose(bool disposing)
|
|
{
|
|
if (disposing)
|
|
{
|
|
_audioOut.CloseTrack(_track);
|
|
}
|
|
}
|
|
}
|
|
} |