New NVDEC and VIC implementation (#1384)

* Initial NVDEC and VIC implementation

* Update FFmpeg.AutoGen to 4.3.0

* Add nvdec dependencies for Windows

* Unify some VP9 structures

* Rename VP9 structure fields

* Improvements to Video API

* XML docs for Common.Memory

* Remove now unused or redundant overloads from MemoryAccessor

* NVDEC UV surface read/write scalar paths

* Add FIXME comments about hacky things/stuff that will need to be fixed in the future

* Cleaned up VP9 memory allocation

* Remove some debug logs

* Rename some VP9 structs

* Remove unused struct

* No need to compile Ryujinx.Graphics.Host1x with unsafe anymore

* Name AsyncWorkQueue threads to make debugging easier

* Make Vp9PictureInfo a ref struct

* LayoutConverter no longer needs the depth argument (broken by rebase)

* Pooling of VP9 buffers, plus fix a memory leak on VP9

* Really wish VS could rename projects properly...

* Address feedback

* Remove using

* Catch OperationCanceledException

* Add licensing informations

* Add THIRDPARTY.md to release too

Co-authored-by: Thog <me@thog.eu>
This commit is contained in:
gdkchan 2020-07-12 00:07:01 -03:00 committed by GitHub
parent 38b26cf424
commit 4d02a2d2c0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
202 changed files with 20563 additions and 2567 deletions

View file

@ -0,0 +1,120 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Types.H264
{
struct PictureInfo
{
Array18<uint> Unknown0;
public uint BitstreamSize;
public uint NumSlices;
public uint Unknown50;
public uint Unknown54;
public uint Log2MaxPicOrderCntLsbMinus4;
public uint DeltaPicOrderAlwaysZeroFlag;
public uint FrameMbsOnlyFlag;
public uint PicWidthInMbs;
public uint PicHeightInMbs;
public uint BlockLayout; // Not supported on T210
public uint EntropyCodingModeFlag;
public uint PicOrderPresentFlag;
public uint NumRefIdxL0ActiveMinus1;
public uint NumRefIdxL1ActiveMinus1;
public uint DeblockingFilterControlPresentFlag;
public uint RedundantPicCntPresentFlag;
public uint Transform8x8ModeFlag;
public uint LumaPitch;
public uint ChromaPitch;
public uint Unknown94;
public uint LumaSecondFieldOffset;
public uint Unknown9C;
public uint UnknownA0;
public uint ChromaSecondFieldOffset;
public uint UnknownA8;
public uint UnknownAC;
public ulong Flags;
public Array2<int> FieldOrderCnt;
public Array16<ReferenceFrame> RefFrames;
public Array6<Array16<byte>> ScalingLists4x4;
public Array2<Array64<byte>> ScalingLists8x8;
public byte MvcextNumInterViewRefsL0;
public byte MvcextNumInterViewRefsL1;
public ushort Padding2A2;
public uint Unknown2A4;
public uint Unknown2A8;
public uint Unknown2AC;
public Array16<byte> MvcextViewRefMasksL0;
public Array16<byte> MvcextViewRefMasksL1;
public uint Flags2;
public Array10<uint> Unknown2D4;
public bool MbAdaptiveFrameFieldFlag => (Flags & (1 << 0)) != 0;
public bool Direct8x8InferenceFlag => (Flags & (1 << 1)) != 0;
public bool WeightedPredFlag => (Flags & (1 << 2)) != 0;
public bool ConstrainedIntraPredFlag => (Flags & (1 << 3)) != 0;
public bool IsReference => (Flags & (1 << 4)) != 0;
public bool FieldPicFlag => (Flags & (1 << 5)) != 0;
public bool BottomFieldFlag => (Flags & (1 << 6)) != 0;
public uint Log2MaxFrameNumMinus4 => (uint)(Flags >> 8) & 0xf;
public ushort ChromaFormatIdc => (ushort)((Flags >> 12) & 3);
public uint PicOrderCntType => (uint)(Flags >> 14) & 3;
public int PicInitQpMinus26 => ExtractSx(Flags, 16, 6);
public int ChromaQpIndexOffset => ExtractSx(Flags, 22, 5);
public int SecondChromaQpIndexOffset => ExtractSx(Flags, 27, 5);
public uint WeightedBipredIdc => (uint)(Flags >> 32) & 3;
public uint LumaOutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f;
public uint ChromaOutputSurfaceIndex => (uint)(Flags >> 41) & 0x1f;
public ushort FrameNum => (ushort)(Flags >> 46);
public bool QpprimeYZeroTransformBypassFlag => (Flags2 & (1 << 1)) != 0;
private static int ExtractSx(ulong packed, int lsb, int length)
{
return (int)((long)packed << (64 - (lsb + length)) >> (64 - length));
}
public H264PictureInfo Convert()
{
return new H264PictureInfo()
{
FieldOrderCnt = FieldOrderCnt,
IsReference = IsReference,
ChromaFormatIdc = ChromaFormatIdc,
FrameNum = FrameNum,
FieldPicFlag = FieldPicFlag,
BottomFieldFlag = BottomFieldFlag,
NumRefFrames = 0,
MbAdaptiveFrameFieldFlag = MbAdaptiveFrameFieldFlag,
ConstrainedIntraPredFlag = ConstrainedIntraPredFlag,
WeightedPredFlag = WeightedPredFlag,
WeightedBipredIdc = WeightedBipredIdc,
FrameMbsOnlyFlag = FrameMbsOnlyFlag != 0,
Transform8x8ModeFlag = Transform8x8ModeFlag != 0,
ChromaQpIndexOffset = ChromaQpIndexOffset,
SecondChromaQpIndexOffset = SecondChromaQpIndexOffset,
PicInitQpMinus26 = PicInitQpMinus26,
NumRefIdxL0ActiveMinus1 = NumRefIdxL0ActiveMinus1,
NumRefIdxL1ActiveMinus1 = NumRefIdxL1ActiveMinus1,
Log2MaxFrameNumMinus4 = Log2MaxFrameNumMinus4,
PicOrderCntType = PicOrderCntType,
Log2MaxPicOrderCntLsbMinus4 = Log2MaxPicOrderCntLsbMinus4,
DeltaPicOrderAlwaysZeroFlag = DeltaPicOrderAlwaysZeroFlag != 0,
Direct8x8InferenceFlag = Direct8x8InferenceFlag,
EntropyCodingModeFlag = EntropyCodingModeFlag != 0,
PicOrderPresentFlag = PicOrderPresentFlag != 0,
DeblockingFilterControlPresentFlag = DeblockingFilterControlPresentFlag != 0,
RedundantPicCntPresentFlag = RedundantPicCntPresentFlag != 0,
NumSliceGroupsMinus1 = 0,
SliceGroupMapType = 0,
SliceGroupChangeRateMinus1 = 0,
FmoAsoEnable = false,
ScalingMatrixPresent = true,
ScalingLists4x4 = ScalingLists4x4,
ScalingLists8x8 = ScalingLists8x8,
FrameType = 0,
PicWidthInMbsMinus1 = PicWidthInMbs - 1,
PicHeightInMapUnitsMinus1 = (PicHeightInMbs >> (FrameMbsOnlyFlag != 0 ? 0 : 1)) - 1,
QpprimeYZeroTransformBypassFlag = QpprimeYZeroTransformBypassFlag
};
}
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Nvdec.Types.H264
{
struct ReferenceFrame
{
public uint Unknown0;
public uint Unknown4;
public uint Unknown8;
public uint UnknownC;
}
}

View file

@ -0,0 +1,72 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
struct BackwardUpdates
{
public Array7<Array3<Array2<uint>>> InterModeCounts;
public Array4<Array10<uint>> YModeCounts;
public Array10<Array10<uint>> UvModeCounts;
public Array16<Array4<uint>> PartitionCounts;
public Array4<Array3<uint>> SwitchableInterpsCount;
public Array4<Array2<uint>> IntraInterCount;
public Array5<Array2<uint>> CompInterCount;
public Array5<Array2<Array2<uint>>> SingleRefCount;
public Array5<Array2<uint>> CompRefCount;
public Array2<Array4<uint>> Tx32x32;
public Array2<Array3<uint>> Tx16x16;
public Array2<Array2<uint>> Tx8x8;
public Array3<Array2<uint>> MbSkipCount;
public Array4<uint> Joints;
public Array2<Array2<uint>> Sign;
public Array2<Array11<uint>> Classes;
public Array2<Array2<uint>> Class0;
public Array2<Array10<Array2<uint>>> Bits;
public Array2<Array2<Array4<uint>>> Class0Fp;
public Array2<Array4<uint>> Fp;
public Array2<Array2<uint>> Class0Hp;
public Array2<Array2<uint>> Hp;
public Array4<Array2<Array2<Array6<Array6<Array4<uint>>>>>> CoefCounts;
public Array4<Array2<Array2<Array6<Array6<uint>>>>> EobCounts;
public BackwardUpdates(ref Vp9BackwardUpdates counts)
{
InterModeCounts = new Array7<Array3<Array2<uint>>>();
for (int i = 0; i < 7; i++)
{
InterModeCounts[i][0][0] = counts.InterMode[i][2];
InterModeCounts[i][0][1] = counts.InterMode[i][0] + counts.InterMode[i][1] + counts.InterMode[i][3];
InterModeCounts[i][1][0] = counts.InterMode[i][0];
InterModeCounts[i][1][1] = counts.InterMode[i][1] + counts.InterMode[i][3];
InterModeCounts[i][2][0] = counts.InterMode[i][1];
InterModeCounts[i][2][1] = counts.InterMode[i][3];
}
YModeCounts = counts.YMode;
UvModeCounts = counts.UvMode;
PartitionCounts = counts.Partition;
SwitchableInterpsCount = counts.SwitchableInterp;
IntraInterCount = counts.IntraInter;
CompInterCount = counts.CompInter;
SingleRefCount = counts.SingleRef;
CompRefCount = counts.CompRef;
Tx32x32 = counts.Tx32x32;
Tx16x16 = counts.Tx16x16;
Tx8x8 = counts.Tx8x8;
MbSkipCount = counts.Skip;
Joints = counts.Joints;
Sign = counts.Sign;
Classes = counts.Classes;
Class0 = counts.Class0;
Bits = counts.Bits;
Class0Fp = counts.Class0Fp;
Fp = counts.Fp;
Class0Hp = counts.Class0Hp;
Hp = counts.Hp;
CoefCounts = counts.Coef;
EobCounts = counts.EobBranch;
}
}
}

View file

@ -0,0 +1,139 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
struct EntropyProbs
{
public Array10<Array10<Array8<byte>>> KfYModeProbE0ToE7;
public Array10<Array10<byte>> KfYModeProbE8;
public Array3<byte> Padding384;
public Array7<byte> SegTreeProbs;
public Array3<byte> SegPredProbs;
public Array15<byte> Padding391;
public Array10<Array8<byte>> KfUvModeProbE0ToE7;
public Array10<byte> KfUvModeProbE8;
public Array6<byte> Padding3FA;
public Array7<Array4<byte>> InterModeProb;
public Array4<byte> IntraInterProb;
public Array10<Array8<byte>> UvModeProbE0ToE7;
public Array2<Array1<byte>> Tx8x8Prob;
public Array2<Array2<byte>> Tx16x16Prob;
public Array2<Array3<byte>> Tx32x32Prob;
public Array4<byte> YModeProbE8;
public Array4<Array8<byte>> YModeProbE0ToE7;
public Array16<Array4<byte>> KfPartitionProb;
public Array16<Array4<byte>> PartitionProb;
public Array10<byte> UvModeProbE8;
public Array4<Array2<byte>> SwitchableInterpProb;
public Array5<byte> CompInterProb;
public Array4<byte> SkipProbs;
public Array3<byte> Joints;
public Array2<byte> Sign;
public Array2<Array1<byte>> Class0;
public Array2<Array3<byte>> Fp;
public Array2<byte> Class0Hp;
public Array2<byte> Hp;
public Array2<Array10<byte>> Classes;
public Array2<Array2<Array3<byte>>> Class0Fp;
public Array2<Array10<byte>> Bits;
public Array5<Array2<byte>> SingleRefProb;
public Array5<byte> CompRefProb;
public Array17<byte> Padding58F;
public Array4<Array2<Array2<Array6<Array6<Array4<byte>>>>>> CoefProbs;
public void Convert(ref Vp9EntropyProbs fc)
{
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < 10; j++)
{
for (int k = 0; k < 9; k++)
{
fc.KfYModeProb[i][j][k] = k < 8 ? KfYModeProbE0ToE7[i][j][k] : KfYModeProbE8[i][j];
}
}
}
fc.SegTreeProb = SegTreeProbs;
fc.SegPredProb = SegPredProbs;
for (int i = 0; i < 7; i++)
{
for (int j = 0; j < 3; j++)
{
fc.InterModeProb[i][j] = InterModeProb[i][j];
}
}
fc.IntraInterProb = IntraInterProb;
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < 9; j++)
{
fc.KfUvModeProb[i][j] = j < 8 ? KfUvModeProbE0ToE7[i][j] : KfUvModeProbE8[i];
fc.UvModeProb[i][j] = j < 8 ? UvModeProbE0ToE7[i][j] : UvModeProbE8[i];
}
}
fc.Tx8x8Prob = Tx8x8Prob;
fc.Tx16x16Prob = Tx16x16Prob;
fc.Tx32x32Prob = Tx32x32Prob;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 9; j++)
{
fc.YModeProb[i][j] = j < 8 ? YModeProbE0ToE7[i][j] : YModeProbE8[i];
}
}
for (int i = 0; i < 16; i++)
{
for (int j = 0; j < 3; j++)
{
fc.KfPartitionProb[i][j] = KfPartitionProb[i][j];
fc.PartitionProb[i][j] = PartitionProb[i][j];
}
}
fc.SwitchableInterpProb = SwitchableInterpProb;
fc.CompInterProb = CompInterProb;
fc.SkipProb[0] = SkipProbs[0];
fc.SkipProb[1] = SkipProbs[1];
fc.SkipProb[2] = SkipProbs[2];
fc.Joints = Joints;
fc.Sign = Sign;
fc.Class0 = Class0;
fc.Fp = Fp;
fc.Class0Hp = Class0Hp;
fc.Hp = Hp;
fc.Classes = Classes;
fc.Class0Fp = Class0Fp;
fc.Bits = Bits;
fc.SingleRefProb = SingleRefProb;
fc.CompRefProb = CompRefProb;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 2; j++)
{
for (int k = 0; k < 2; k++)
{
for (int l = 0; l < 6; l++)
{
for (int m = 0; m < 6; m++)
{
for (int n = 0; n < 3; n++)
{
fc.CoefProbs[i][j][k][l][m][n] = CoefProbs[i][j][k][l][m][n];
}
}
}
}
}
}
}
}
}

View file

@ -0,0 +1,12 @@
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
enum FrameFlags : uint
{
IsKeyFrame = 1 << 0,
LastFrameIsKeyFrame = 1 << 1,
FrameSizeChanged = 1 << 2,
ErrorResilientMode = 1 << 3,
LastShowFrame = 1 << 4,
IntraOnly = 1 << 5
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
struct FrameSize
{
public ushort Width;
public ushort Height;
public ushort LumaPitch;
public ushort ChromaPitch;
}
}

View file

@ -0,0 +1,20 @@
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
struct FrameStats
{
public uint Unknown0;
public uint Unknown4;
public uint Pass2CycleCount;
public uint ErrorStatus;
public uint FrameStatusIntraCnt;
public uint FrameStatusInterCnt;
public uint FrameStatusSkipCtuCount;
public uint FrameStatusFwdMvxCnt;
public uint FrameStatusFwdMvyCnt;
public uint FrameStatusBwdMvxCnt;
public uint FrameStatusBwdMvyCnt;
public uint ErrorCtbPos;
public uint ErrorSlicePos;
public uint Unknown34;
}
}

View file

@ -0,0 +1,11 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
struct LoopFilter
{
public byte ModeRefDeltaEnabled;
public Array4<sbyte> RefDeltas;
public Array2<sbyte> ModeDeltas;
}
}

View file

@ -0,0 +1,85 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Video;
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
struct PictureInfo
{
public Array12<uint> Unknown0;
public uint BitstreamSize;
public uint IsEncrypted;
public uint Unknown38;
public uint Reserved3C;
public uint BlockLayout; // Not supported on T210
public uint WorkBufferSizeShr8;
public FrameSize LastFrameSize;
public FrameSize GoldenFrameSize;
public FrameSize AltFrameSize;
public FrameSize CurrentFrameSize;
public FrameFlags Flags;
public Array4<sbyte> RefFrameSignBias;
public byte FirstLevel;
public byte SharpnessLevel;
public byte BaseQIndex;
public byte YDcDeltaQ;
public byte UvAcDeltaQ;
public byte UvDcDeltaQ;
public byte Lossless;
public byte TxMode;
public byte AllowHighPrecisionMv;
public byte InterpFilter;
public byte ReferenceMode;
public sbyte CompFixedRef;
public Array2<sbyte> CompVarRef;
public byte Log2TileCols;
public byte Log2TileRows;
public Segmentation Seg;
public LoopFilter Lf;
public byte PaddingEB;
public uint WorkBufferSizeShr8New; // Not supported on T210
public uint SurfaceParams; // Not supported on T210
public uint UnknownF4;
public uint UnknownF8;
public uint UnknownFC;
public uint BitDepth => (SurfaceParams >> 1) & 0xf;
public Vp9PictureInfo Convert()
{
return new Vp9PictureInfo()
{
IsKeyFrame = Flags.HasFlag(FrameFlags.IsKeyFrame),
IntraOnly = Flags.HasFlag(FrameFlags.IntraOnly),
UsePrevInFindMvRefs =
!Flags.HasFlag(FrameFlags.ErrorResilientMode) &&
!Flags.HasFlag(FrameFlags.FrameSizeChanged) &&
!Flags.HasFlag(FrameFlags.IntraOnly) &&
Flags.HasFlag(FrameFlags.LastShowFrame) &&
!Flags.HasFlag(FrameFlags.LastFrameIsKeyFrame),
RefFrameSignBias = RefFrameSignBias,
BaseQIndex = BaseQIndex,
YDcDeltaQ = YDcDeltaQ,
UvDcDeltaQ = UvDcDeltaQ,
UvAcDeltaQ = UvAcDeltaQ,
Lossless = Lossless != 0,
TransformMode = TxMode,
AllowHighPrecisionMv = AllowHighPrecisionMv != 0,
InterpFilter = InterpFilter,
ReferenceMode = ReferenceMode,
CompFixedRef = CompFixedRef,
CompVarRef = CompVarRef,
Log2TileCols = Log2TileCols,
Log2TileRows = Log2TileRows,
SegmentEnabled = Seg.Enabled != 0,
SegmentMapUpdate = Seg.UpdateMap != 0,
SegmentMapTemporalUpdate = Seg.TemporalUpdate != 0,
SegmentAbsDelta = Seg.AbsDelta,
SegmentFeatureEnable = Seg.FeatureMask,
SegmentFeatureData = Seg.FeatureData,
ModeRefDeltaEnabled = Lf.ModeRefDeltaEnabled != 0,
RefDeltas = Lf.RefDeltas,
ModeDeltas = Lf.ModeDeltas
};
}
}
}

View file

@ -0,0 +1,14 @@
using Ryujinx.Common.Memory;
namespace Ryujinx.Graphics.Nvdec.Types.Vp9
{
struct Segmentation
{
public byte Enabled;
public byte UpdateMap;
public byte TemporalUpdate;
public byte AbsDelta;
public Array8<uint> FeatureMask;
public Array8<Array4<short>> FeatureData;
}
}