New NVDEC and VIC implementation (#1384)

* Initial NVDEC and VIC implementation

* Update FFmpeg.AutoGen to 4.3.0

* Add nvdec dependencies for Windows

* Unify some VP9 structures

* Rename VP9 structure fields

* Improvements to Video API

* XML docs for Common.Memory

* Remove now unused or redundant overloads from MemoryAccessor

* NVDEC UV surface read/write scalar paths

* Add FIXME comments about hacky things/stuff that will need to be fixed in the future

* Cleaned up VP9 memory allocation

* Remove some debug logs

* Rename some VP9 structs

* Remove unused struct

* No need to compile Ryujinx.Graphics.Host1x with unsafe anymore

* Name AsyncWorkQueue threads to make debugging easier

* Make Vp9PictureInfo a ref struct

* LayoutConverter no longer needs the depth argument (broken by rebase)

* Pooling of VP9 buffers, plus fix a memory leak on VP9

* Really wish VS could rename projects properly...

* Address feedback

* Remove using

* Catch OperationCanceledException

* Add licensing informations

* Add THIRDPARTY.md to release too

Co-authored-by: Thog <me@thog.eu>
This commit is contained in:
gdkchan 2020-07-12 00:07:01 -03:00 committed by GitHub
parent 38b26cf424
commit 4d02a2d2c0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
202 changed files with 20563 additions and 2567 deletions

View file

@ -0,0 +1,40 @@
using Ryujinx.Graphics.Video;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
public class Decoder : IH264Decoder
{
public bool IsHardwareAccelerated => false;
private const int WorkBufferSize = 0x200;
private readonly byte[] _workBuffer = new byte[WorkBufferSize];
private readonly FFmpegContext _context = new FFmpegContext();
public ISurface CreateSurface(int width, int height)
{
return new Surface();
}
public bool Decode(ref H264PictureInfo pictureInfo, ISurface output, ReadOnlySpan<byte> bitstream)
{
Span<byte> bs = Prepend(bitstream, SpsAndPpsReconstruction.Reconstruct(ref pictureInfo, _workBuffer));
return _context.DecodeFrame((Surface)output, bs) == 0;
}
private static byte[] Prepend(ReadOnlySpan<byte> data, ReadOnlySpan<byte> prep)
{
byte[] output = new byte[data.Length + prep.Length];
prep.CopyTo(output);
data.CopyTo(new Span<byte>(output).Slice(prep.Length));
return output;
}
public void Dispose() => _context.Dispose();
}
}

View file

@ -0,0 +1,51 @@
using FFmpeg.AutoGen;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
unsafe class FFmpegContext : IDisposable
{
private readonly AVCodec* _codec;
private AVCodecContext* _context;
public FFmpegContext()
{
_codec = ffmpeg.avcodec_find_decoder(AVCodecID.AV_CODEC_ID_H264);
_context = ffmpeg.avcodec_alloc_context3(_codec);
ffmpeg.avcodec_open2(_context, _codec, null);
}
public int DecodeFrame(Surface output, ReadOnlySpan<byte> bitstream)
{
AVPacket packet;
ffmpeg.av_init_packet(&packet);
fixed (byte* ptr = bitstream)
{
packet.data = ptr;
packet.size = bitstream.Length;
int rc = ffmpeg.avcodec_send_packet(_context, &packet);
if (rc != 0)
{
return rc;
}
}
return ffmpeg.avcodec_receive_frame(_context, output.Frame);
}
public void Dispose()
{
ffmpeg.avcodec_close(_context);
fixed (AVCodecContext** ppContext = &_context)
{
ffmpeg.avcodec_free_context(ppContext);
}
}
}
}

View file

@ -0,0 +1,121 @@
using System;
using System.Numerics;
namespace Ryujinx.Graphics.Nvdec.H264
{
struct H264BitStreamWriter
{
private const int BufferSize = 8;
private readonly byte[] _workBuffer;
private int _offset;
private int _buffer;
private int _bufferPos;
public H264BitStreamWriter(byte[] workBuffer)
{
_workBuffer = workBuffer;
_offset = 0;
_buffer = 0;
_bufferPos = 0;
}
public void WriteBit(bool value)
{
WriteBits(value ? 1 : 0, 1);
}
public void WriteBits(int value, int valueSize)
{
int valuePos = 0;
int remaining = valueSize;
while (remaining > 0)
{
int copySize = remaining;
int free = GetFreeBufferBits();
if (copySize > free)
{
copySize = free;
}
int mask = (1 << copySize) - 1;
int srcShift = (valueSize - valuePos) - copySize;
int dstShift = (BufferSize - _bufferPos) - copySize;
_buffer |= ((value >> srcShift) & mask) << dstShift;
valuePos += copySize;
_bufferPos += copySize;
remaining -= copySize;
}
}
private int GetFreeBufferBits()
{
if (_bufferPos == BufferSize)
{
Flush();
}
return BufferSize - _bufferPos;
}
public void Flush()
{
if (_bufferPos != 0)
{
_workBuffer[_offset++] = (byte)_buffer;
_buffer = 0;
_bufferPos = 0;
}
}
public void End()
{
WriteBit(true);
Flush();
}
public Span<byte> AsSpan()
{
return new Span<byte>(_workBuffer).Slice(0, _offset);
}
public void WriteU(uint value, int valueSize) => WriteBits((int)value, valueSize);
public void WriteSe(int value) => WriteExpGolombCodedInt(value);
public void WriteUe(uint value) => WriteExpGolombCodedUInt(value);
private void WriteExpGolombCodedInt(int value)
{
int sign = value <= 0 ? 0 : 1;
if (value < 0)
{
value = -value;
}
value = (value << 1) - sign;
WriteExpGolombCodedUInt((uint)value);
}
private void WriteExpGolombCodedUInt(uint value)
{
int size = 32 - BitOperations.LeadingZeroCount(value + 1);
WriteBits(1, size);
value -= (1u << (size - 1)) - 1;
WriteBits((int)value, size - 1);
}
}
}

View file

@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="FFmpeg.AutoGen" Version="4.3.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
</ItemGroup>
</Project>

View file

@ -0,0 +1,159 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
static class SpsAndPpsReconstruction
{
public static Span<byte> Reconstruct(ref H264PictureInfo pictureInfo, byte[] workBuffer)
{
H264BitStreamWriter writer = new H264BitStreamWriter(workBuffer);
// Sequence Parameter Set.
writer.WriteU(1, 24);
writer.WriteU(0, 1);
writer.WriteU(3, 2);
writer.WriteU(7, 5);
writer.WriteU(100, 8); // Profile idc
writer.WriteU(0, 8); // Reserved
writer.WriteU(31, 8); // Level idc
writer.WriteUe(0); // Seq parameter set id
writer.WriteUe(pictureInfo.ChromaFormatIdc);
if (pictureInfo.ChromaFormatIdc == 3)
{
writer.WriteBit(false); // Separate colour plane flag
}
writer.WriteUe(0); // Bit depth luma minus 8
writer.WriteUe(0); // Bit depth chroma minus 8
writer.WriteBit(pictureInfo.QpprimeYZeroTransformBypassFlag);
writer.WriteBit(false); // Scaling matrix present flag
writer.WriteUe(pictureInfo.Log2MaxFrameNumMinus4);
writer.WriteUe(pictureInfo.PicOrderCntType);
if (pictureInfo.PicOrderCntType == 0)
{
writer.WriteUe(pictureInfo.Log2MaxPicOrderCntLsbMinus4);
}
else if (pictureInfo.PicOrderCntType == 1)
{
writer.WriteBit(pictureInfo.DeltaPicOrderAlwaysZeroFlag);
writer.WriteSe(0); // Offset for non-ref pic
writer.WriteSe(0); // Offset for top to bottom field
writer.WriteUe(0); // Num ref frames in pic order cnt cycle
}
writer.WriteUe(16); // Max num ref frames
writer.WriteBit(false); // Gaps in frame num value allowed flag
writer.WriteUe(pictureInfo.PicWidthInMbsMinus1);
writer.WriteUe(pictureInfo.PicHeightInMapUnitsMinus1);
writer.WriteBit(pictureInfo.FrameMbsOnlyFlag);
if (!pictureInfo.FrameMbsOnlyFlag)
{
writer.WriteBit(pictureInfo.MbAdaptiveFrameFieldFlag);
}
writer.WriteBit(pictureInfo.Direct8x8InferenceFlag);
writer.WriteBit(false); // Frame cropping flag
writer.WriteBit(false); // VUI parameter present flag
writer.End();
// Picture Parameter Set.
writer.WriteU(1, 24);
writer.WriteU(0, 1);
writer.WriteU(3, 2);
writer.WriteU(8, 5);
writer.WriteUe(0); // Pic parameter set id
writer.WriteUe(0); // Seq parameter set id
writer.WriteBit(pictureInfo.EntropyCodingModeFlag);
writer.WriteBit(false); // Bottom field pic order in frame present flag
writer.WriteUe(0); // Num slice groups minus 1
writer.WriteUe(pictureInfo.NumRefIdxL0ActiveMinus1);
writer.WriteUe(pictureInfo.NumRefIdxL1ActiveMinus1);
writer.WriteBit(pictureInfo.WeightedPredFlag);
writer.WriteU(pictureInfo.WeightedBipredIdc, 2);
writer.WriteSe(pictureInfo.PicInitQpMinus26);
writer.WriteSe(0); // Pic init qs minus 26
writer.WriteSe(pictureInfo.ChromaQpIndexOffset);
writer.WriteBit(pictureInfo.DeblockingFilterControlPresentFlag);
writer.WriteBit(pictureInfo.ConstrainedIntraPredFlag);
writer.WriteBit(pictureInfo.RedundantPicCntPresentFlag);
writer.WriteBit(pictureInfo.Transform8x8ModeFlag);
writer.WriteBit(pictureInfo.ScalingMatrixPresent);
if (pictureInfo.ScalingMatrixPresent)
{
for (int index = 0; index < 6; index++)
{
writer.WriteBit(true);
WriteScalingList(ref writer, pictureInfo.ScalingLists4x4[index]);
}
if (pictureInfo.Transform8x8ModeFlag)
{
for (int index = 0; index < 2; index++)
{
writer.WriteBit(true);
WriteScalingList(ref writer, pictureInfo.ScalingLists8x8[index]);
}
}
}
writer.WriteSe(pictureInfo.SecondChromaQpIndexOffset);
writer.End();
return writer.AsSpan();
}
// ZigZag LUTs from libavcodec.
private static readonly byte[] ZigZagDirect = new byte[]
{
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
27, 20, 13, 6, 7, 14, 21, 28,
35, 42, 49, 56, 57, 50, 43, 36,
29, 22, 15, 23, 30, 37, 44, 51,
58, 59, 52, 45, 38, 31, 39, 46,
53, 60, 61, 54, 47, 55, 62, 63
};
private static readonly byte[] ZigZagScan = new byte[]
{
0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4
};
private static void WriteScalingList(ref H264BitStreamWriter writer, IArray<byte> list)
{
byte[] scan = list.Length == 16 ? ZigZagScan : ZigZagDirect;
int lastScale = 8;
for (int index = 0; index < list.Length; index++)
{
byte value = list[scan[index]];
int deltaScale = value - lastScale;
writer.WriteSe(deltaScale);
lastScale = value;
}
}
}
}

View file

@ -0,0 +1,33 @@
using FFmpeg.AutoGen;
using Ryujinx.Graphics.Video;
using System;
namespace Ryujinx.Graphics.Nvdec.H264
{
unsafe class Surface : ISurface
{
public AVFrame* Frame { get; }
public Plane YPlane => new Plane((IntPtr)Frame->data[0], Stride * Height);
public Plane UPlane => new Plane((IntPtr)Frame->data[1], UvStride * UvHeight);
public Plane VPlane => new Plane((IntPtr)Frame->data[2], UvStride * UvHeight);
public int Width => Frame->width;
public int Height => Frame->height;
public int Stride => Frame->linesize[0];
public int UvWidth => (Frame->width + 1) >> 1;
public int UvHeight => (Frame->height + 1) >> 1;
public int UvStride => Frame->linesize[1];
public Surface()
{
Frame = ffmpeg.av_frame_alloc();
}
public void Dispose()
{
ffmpeg.av_frame_unref(Frame);
ffmpeg.av_free(Frame);
}
}
}