Move solution and projects to src

This commit is contained in:
TSR Berry 2023-04-08 01:22:00 +02:00 committed by Mary
parent cd124bda58
commit cee7121058
3466 changed files with 55 additions and 55 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,9 @@
using System;
namespace Ryujinx.Graphics.Texture.Astc
{
public class AstcDecoderException : Exception
{
public AstcDecoderException(string exMsg) : base(exMsg) { }
}
}

View file

@ -0,0 +1,68 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture.Astc
{
[StructLayout(LayoutKind.Sequential)]
struct AstcPixel
{
internal const int StructSize = 12;
public short A;
public short R;
public short G;
public short B;
private uint _bitDepthInt;
private Span<byte> BitDepth => MemoryMarshal.CreateSpan(ref Unsafe.As<uint, byte>(ref _bitDepthInt), 4);
private Span<short> Components => MemoryMarshal.CreateSpan(ref A, 4);
public AstcPixel(short a, short r, short g, short b)
{
A = a;
R = r;
G = g;
B = b;
_bitDepthInt = 0x08080808;
}
public void ClampByte()
{
R = Math.Min(Math.Max(R, (short)0), (short)255);
G = Math.Min(Math.Max(G, (short)0), (short)255);
B = Math.Min(Math.Max(B, (short)0), (short)255);
A = Math.Min(Math.Max(A, (short)0), (short)255);
}
public short GetComponent(int index)
{
return Components[index];
}
public void SetComponent(int index, int value)
{
Components[index] = (short)value;
}
public int Pack()
{
return A << 24 |
B << 16 |
G << 8 |
R << 0;
}
// Adds more precision to the blue channel as described
// in C.2.14
public static AstcPixel BlueContract(int a, int r, int g, int b)
{
return new AstcPixel((short)(a),
(short)((r + b) >> 1),
(short)((g + b) >> 1),
(short)(b));
}
}
}

View file

@ -0,0 +1,72 @@
using Ryujinx.Common.Utilities;
using System;
using System.Diagnostics;
namespace Ryujinx.Graphics.Texture.Astc
{
public struct BitStream128
{
private Buffer16 _data;
public int BitsLeft { get; set; }
public BitStream128(Buffer16 data)
{
_data = data;
BitsLeft = 128;
}
public int ReadBits(int bitCount)
{
Debug.Assert(bitCount < 32);
if (bitCount == 0)
{
return 0;
}
int mask = (1 << bitCount) - 1;
int value = _data.As<int>() & mask;
Span<ulong> span = _data.AsSpan<ulong>();
ulong carry = span[1] << (64 - bitCount);
span[0] = (span[0] >> bitCount) | carry;
span[1] >>= bitCount;
BitsLeft -= bitCount;
return value;
}
public void WriteBits(int value, int bitCount)
{
Debug.Assert(bitCount < 32);
if (bitCount == 0) return;
ulong maskedValue = (uint)(value & ((1 << bitCount) - 1));
Span<ulong> span = _data.AsSpan<ulong>();
if (BitsLeft < 64)
{
ulong lowMask = maskedValue << BitsLeft;
span[0] |= lowMask;
}
if (BitsLeft + bitCount > 64)
{
if (BitsLeft > 64)
{
span[1] |= maskedValue << (BitsLeft - 64);
}
else
{
span[1] |= maskedValue >> (64 - BitsLeft);
}
}
BitsLeft += bitCount;
}
}
}

View file

@ -0,0 +1,66 @@
namespace Ryujinx.Graphics.Texture.Astc
{
internal static class Bits
{
public static readonly ushort[] Replicate8_16Table;
public static readonly byte[] Replicate1_7Table;
static Bits()
{
Replicate8_16Table = new ushort[0x200];
Replicate1_7Table = new byte[0x200];
for (int i = 0; i < 0x200; i++)
{
Replicate8_16Table[i] = (ushort)Replicate(i, 8, 16);
Replicate1_7Table[i] = (byte)Replicate(i, 1, 7);
}
}
public static int Replicate8_16(int value)
{
return Replicate8_16Table[value];
}
public static int Replicate1_7(int value)
{
return Replicate1_7Table[value];
}
public static int Replicate(int value, int numberBits, int toBit)
{
if (numberBits == 0) return 0;
if (toBit == 0) return 0;
int tempValue = value & ((1 << numberBits) - 1);
int retValue = tempValue;
int resLength = numberBits;
while (resLength < toBit)
{
int comp = 0;
if (numberBits > toBit - resLength)
{
int newShift = toBit - resLength;
comp = numberBits - newShift;
numberBits = newShift;
}
retValue <<= numberBits;
retValue |= tempValue >> comp;
resLength += numberBits;
}
return retValue;
}
// Transfers a bit as described in C.2.14
public static void BitTransferSigned(ref int a, ref int b)
{
b >>= 1;
b |= a & 0x80;
a >>= 1;
a &= 0x3F;
if ((a & 0x20) != 0) a -= 0x40;
}
}
}

View file

@ -0,0 +1,23 @@
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture.Astc
{
[StructLayout(LayoutKind.Sequential, Size = AstcPixel.StructSize * 8)]
internal struct EndPointSet
{
private AstcPixel _start;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Span<AstcPixel> Get(int index)
{
Debug.Assert(index < 4);
ref AstcPixel start = ref Unsafe.Add(ref _start, index * 2);
return MemoryMarshal.CreateSpan(ref start, 2);
}
}
}

View file

@ -0,0 +1,345 @@
using System;
using System.Numerics;
namespace Ryujinx.Graphics.Texture.Astc
{
internal struct IntegerEncoded
{
internal const int StructSize = 8;
private static readonly IntegerEncoded[] Encodings;
public enum EIntegerEncoding : byte
{
JustBits,
Quint,
Trit
}
EIntegerEncoding _encoding;
public byte NumberBits { get; private set; }
public byte TritValue { get; private set; }
public byte QuintValue { get; private set; }
public int BitValue { get; private set; }
static IntegerEncoded()
{
Encodings = new IntegerEncoded[0x100];
for (int i = 0; i < Encodings.Length; i++)
{
Encodings[i] = CreateEncodingCalc(i);
}
}
public IntegerEncoded(EIntegerEncoding encoding, int numBits)
{
_encoding = encoding;
NumberBits = (byte)numBits;
BitValue = 0;
TritValue = 0;
QuintValue = 0;
}
public bool MatchesEncoding(IntegerEncoded other)
{
return _encoding == other._encoding && NumberBits == other.NumberBits;
}
public EIntegerEncoding GetEncoding()
{
return _encoding;
}
public int GetBitLength(int numberVals)
{
int totalBits = NumberBits * numberVals;
if (_encoding == EIntegerEncoding.Trit)
{
totalBits += (numberVals * 8 + 4) / 5;
}
else if (_encoding == EIntegerEncoding.Quint)
{
totalBits += (numberVals * 7 + 2) / 3;
}
return totalBits;
}
public static IntegerEncoded CreateEncoding(int maxVal)
{
return Encodings[maxVal];
}
private static IntegerEncoded CreateEncodingCalc(int maxVal)
{
while (maxVal > 0)
{
int check = maxVal + 1;
// Is maxVal a power of two?
if ((check & (check - 1)) == 0)
{
return new IntegerEncoded(EIntegerEncoding.JustBits, BitOperations.PopCount((uint)maxVal));
}
// Is maxVal of the type 3*2^n - 1?
if ((check % 3 == 0) && ((check / 3) & ((check / 3) - 1)) == 0)
{
return new IntegerEncoded(EIntegerEncoding.Trit, BitOperations.PopCount((uint)(check / 3 - 1)));
}
// Is maxVal of the type 5*2^n - 1?
if ((check % 5 == 0) && ((check / 5) & ((check / 5) - 1)) == 0)
{
return new IntegerEncoded(EIntegerEncoding.Quint, BitOperations.PopCount((uint)(check / 5 - 1)));
}
// Apparently it can't be represented with a bounded integer sequence...
// just iterate.
maxVal--;
}
return new IntegerEncoded(EIntegerEncoding.JustBits, 0);
}
public static void DecodeTritBlock(
ref BitStream128 bitStream,
ref IntegerSequence listIntegerEncoded,
int numberBitsPerValue)
{
// Implement the algorithm in section C.2.12
Span<int> m = stackalloc int[5];
m[0] = bitStream.ReadBits(numberBitsPerValue);
int encoded = bitStream.ReadBits(2);
m[1] = bitStream.ReadBits(numberBitsPerValue);
encoded |= bitStream.ReadBits(2) << 2;
m[2] = bitStream.ReadBits(numberBitsPerValue);
encoded |= bitStream.ReadBits(1) << 4;
m[3] = bitStream.ReadBits(numberBitsPerValue);
encoded |= bitStream.ReadBits(2) << 5;
m[4] = bitStream.ReadBits(numberBitsPerValue);
encoded |= bitStream.ReadBits(1) << 7;
ReadOnlySpan<byte> encodings = GetTritEncoding(encoded);
IntegerEncoded intEncoded = new IntegerEncoded(EIntegerEncoding.Trit, numberBitsPerValue);
for (int i = 0; i < 5; i++)
{
intEncoded.BitValue = m[i];
intEncoded.TritValue = encodings[i];
listIntegerEncoded.Add(ref intEncoded);
}
}
public static void DecodeQuintBlock(
ref BitStream128 bitStream,
ref IntegerSequence listIntegerEncoded,
int numberBitsPerValue)
{
ReadOnlySpan<byte> interleavedBits = new byte[] { 3, 2, 2 };
// Implement the algorithm in section C.2.12
Span<int> m = stackalloc int[3];
ulong encoded = 0;
int encodedBitsRead = 0;
for (int i = 0; i < m.Length; i++)
{
m[i] = bitStream.ReadBits(numberBitsPerValue);
uint encodedBits = (uint)bitStream.ReadBits(interleavedBits[i]);
encoded |= encodedBits << encodedBitsRead;
encodedBitsRead += interleavedBits[i];
}
ReadOnlySpan<byte> encodings = GetQuintEncoding((int)encoded);
for (int i = 0; i < 3; i++)
{
IntegerEncoded intEncoded = new IntegerEncoded(EIntegerEncoding.Quint, numberBitsPerValue)
{
BitValue = m[i],
QuintValue = encodings[i]
};
listIntegerEncoded.Add(ref intEncoded);
}
}
public static void DecodeIntegerSequence(
ref IntegerSequence decodeIntegerSequence,
ref BitStream128 bitStream,
int maxRange,
int numberValues)
{
// Determine encoding parameters
IntegerEncoded intEncoded = CreateEncoding(maxRange);
// Start decoding
int numberValuesDecoded = 0;
while (numberValuesDecoded < numberValues)
{
switch (intEncoded.GetEncoding())
{
case EIntegerEncoding.Quint:
{
DecodeQuintBlock(ref bitStream, ref decodeIntegerSequence, intEncoded.NumberBits);
numberValuesDecoded += 3;
break;
}
case EIntegerEncoding.Trit:
{
DecodeTritBlock(ref bitStream, ref decodeIntegerSequence, intEncoded.NumberBits);
numberValuesDecoded += 5;
break;
}
case EIntegerEncoding.JustBits:
{
intEncoded.BitValue = bitStream.ReadBits(intEncoded.NumberBits);
decodeIntegerSequence.Add(ref intEncoded);
numberValuesDecoded++;
break;
}
}
}
}
private static ReadOnlySpan<byte> GetTritEncoding(int index)
{
return TritEncodings.Slice(index * 5, 5);
}
private static ReadOnlySpan<byte> GetQuintEncoding(int index)
{
return QuintEncodings.Slice(index * 3, 3);
}
private static ReadOnlySpan<byte> TritEncodings => new byte[]
{
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0,
0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
2, 1, 0, 0, 0, 1, 0, 2, 0, 0, 0, 2, 0, 0, 0,
1, 2, 0, 0, 0, 2, 2, 0, 0, 0, 2, 0, 2, 0, 0,
0, 2, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0, 0,
2, 0, 2, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
2, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 0, 0,
1, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1, 1, 2, 0, 0,
0, 2, 1, 0, 0, 1, 2, 1, 0, 0, 2, 2, 1, 0, 0,
2, 1, 2, 0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 2, 2,
2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 0, 0, 1, 0,
1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 0, 2, 1, 0,
0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 2, 1, 0, 1, 0,
1, 0, 2, 1, 0, 0, 2, 0, 1, 0, 1, 2, 0, 1, 0,
2, 2, 0, 1, 0, 2, 0, 2, 1, 0, 0, 2, 2, 1, 0,
1, 2, 2, 1, 0, 2, 2, 2, 1, 0, 2, 0, 2, 1, 0,
0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 2, 0, 1, 1, 0,
0, 1, 2, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
2, 1, 1, 1, 0, 1, 1, 2, 1, 0, 0, 2, 1, 1, 0,
1, 2, 1, 1, 0, 2, 2, 1, 1, 0, 2, 1, 2, 1, 0,
0, 1, 0, 2, 2, 1, 1, 0, 2, 2, 2, 1, 0, 2, 2,
1, 0, 2, 2, 2, 0, 0, 0, 2, 0, 1, 0, 0, 2, 0,
2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 1, 0, 2, 0,
1, 1, 0, 2, 0, 2, 1, 0, 2, 0, 1, 0, 2, 2, 0,
0, 2, 0, 2, 0, 1, 2, 0, 2, 0, 2, 2, 0, 2, 0,
2, 0, 2, 2, 0, 0, 2, 2, 2, 0, 1, 2, 2, 2, 0,
2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 0, 0, 1, 2, 0,
1, 0, 1, 2, 0, 2, 0, 1, 2, 0, 0, 1, 2, 2, 0,
0, 1, 1, 2, 0, 1, 1, 1, 2, 0, 2, 1, 1, 2, 0,
1, 1, 2, 2, 0, 0, 2, 1, 2, 0, 1, 2, 1, 2, 0,
2, 2, 1, 2, 0, 2, 1, 2, 2, 0, 0, 2, 0, 2, 2,
1, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2,
0, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 0, 0, 0, 2,
0, 0, 2, 0, 2, 0, 1, 0, 0, 2, 1, 1, 0, 0, 2,
2, 1, 0, 0, 2, 1, 0, 2, 0, 2, 0, 2, 0, 0, 2,
1, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2,
0, 2, 2, 0, 2, 1, 2, 2, 0, 2, 2, 2, 2, 0, 2,
2, 0, 2, 0, 2, 0, 0, 1, 0, 2, 1, 0, 1, 0, 2,
2, 0, 1, 0, 2, 0, 1, 2, 0, 2, 0, 1, 1, 0, 2,
1, 1, 1, 0, 2, 2, 1, 1, 0, 2, 1, 1, 2, 0, 2,
0, 2, 1, 0, 2, 1, 2, 1, 0, 2, 2, 2, 1, 0, 2,
2, 1, 2, 0, 2, 0, 2, 2, 2, 2, 1, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 2, 0, 1,
0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 2, 1, 0, 0, 1,
1, 0, 2, 0, 1, 0, 2, 0, 0, 1, 1, 2, 0, 0, 1,
2, 2, 0, 0, 1, 2, 0, 2, 0, 1, 0, 2, 2, 0, 1,
1, 2, 2, 0, 1, 2, 2, 2, 0, 1, 2, 0, 2, 0, 1,
0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 2, 0, 1, 0, 1,
0, 1, 2, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1,
2, 1, 1, 0, 1, 1, 1, 2, 0, 1, 0, 2, 1, 0, 1,
1, 2, 1, 0, 1, 2, 2, 1, 0, 1, 2, 1, 2, 0, 1,
0, 0, 1, 2, 2, 1, 0, 1, 2, 2, 2, 0, 1, 2, 2,
0, 1, 2, 2, 2, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
2, 0, 0, 1, 1, 0, 0, 2, 1, 1, 0, 1, 0, 1, 1,
1, 1, 0, 1, 1, 2, 1, 0, 1, 1, 1, 0, 2, 1, 1,
0, 2, 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 1,
2, 0, 2, 1, 1, 0, 2, 2, 1, 1, 1, 2, 2, 1, 1,
2, 2, 2, 1, 1, 2, 0, 2, 1, 1, 0, 0, 1, 1, 1,
1, 0, 1, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
1, 1, 2, 1, 1, 0, 2, 1, 1, 1, 1, 2, 1, 1, 1,
2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 0, 1, 1, 2, 2,
1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2,
0, 0, 0, 2, 1, 1, 0, 0, 2, 1, 2, 0, 0, 2, 1,
0, 0, 2, 2, 1, 0, 1, 0, 2, 1, 1, 1, 0, 2, 1,
2, 1, 0, 2, 1, 1, 0, 2, 2, 1, 0, 2, 0, 2, 1,
1, 2, 0, 2, 1, 2, 2, 0, 2, 1, 2, 0, 2, 2, 1,
0, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1,
2, 0, 2, 2, 1, 0, 0, 1, 2, 1, 1, 0, 1, 2, 1,
2, 0, 1, 2, 1, 0, 1, 2, 2, 1, 0, 1, 1, 2, 1,
1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1,
0, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1,
2, 1, 2, 2, 1, 0, 2, 1, 2, 2, 1, 2, 1, 2, 2,
2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 1, 2,
1, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 1, 2,
0, 1, 0, 1, 2, 1, 1, 0, 1, 2, 2, 1, 0, 1, 2,
1, 0, 2, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 2,
2, 2, 0, 1, 2, 2, 0, 2, 1, 2, 0, 2, 2, 1, 2,
1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 0, 2, 1, 2,
0, 0, 1, 1, 2, 1, 0, 1, 1, 2, 2, 0, 1, 1, 2,
0, 1, 2, 1, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 2,
2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 0, 2, 1, 1, 2,
1, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2,
0, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 1, 2, 2, 2
};
private static ReadOnlySpan<byte> QuintEncodings => new byte[]
{
0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0,
0, 4, 0, 4, 4, 0, 4, 4, 4, 0, 1, 0, 1, 1, 0,
2, 1, 0, 3, 1, 0, 4, 1, 0, 1, 4, 0, 4, 4, 1,
4, 4, 4, 0, 2, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0,
4, 2, 0, 2, 4, 0, 4, 4, 2, 4, 4, 4, 0, 3, 0,
1, 3, 0, 2, 3, 0, 3, 3, 0, 4, 3, 0, 3, 4, 0,
4, 4, 3, 4, 4, 4, 0, 0, 1, 1, 0, 1, 2, 0, 1,
3, 0, 1, 4, 0, 1, 0, 4, 1, 4, 0, 4, 0, 4, 4,
0, 1, 1, 1, 1, 1, 2, 1, 1, 3, 1, 1, 4, 1, 1,
1, 4, 1, 4, 1, 4, 1, 4, 4, 0, 2, 1, 1, 2, 1,
2, 2, 1, 3, 2, 1, 4, 2, 1, 2, 4, 1, 4, 2, 4,
2, 4, 4, 0, 3, 1, 1, 3, 1, 2, 3, 1, 3, 3, 1,
4, 3, 1, 3, 4, 1, 4, 3, 4, 3, 4, 4, 0, 0, 2,
1, 0, 2, 2, 0, 2, 3, 0, 2, 4, 0, 2, 0, 4, 2,
2, 0, 4, 3, 0, 4, 0, 1, 2, 1, 1, 2, 2, 1, 2,
3, 1, 2, 4, 1, 2, 1, 4, 2, 2, 1, 4, 3, 1, 4,
0, 2, 2, 1, 2, 2, 2, 2, 2, 3, 2, 2, 4, 2, 2,
2, 4, 2, 2, 2, 4, 3, 2, 4, 0, 3, 2, 1, 3, 2,
2, 3, 2, 3, 3, 2, 4, 3, 2, 3, 4, 2, 2, 3, 4,
3, 3, 4, 0, 0, 3, 1, 0, 3, 2, 0, 3, 3, 0, 3,
4, 0, 3, 0, 4, 3, 0, 0, 4, 1, 0, 4, 0, 1, 3,
1, 1, 3, 2, 1, 3, 3, 1, 3, 4, 1, 3, 1, 4, 3,
0, 1, 4, 1, 1, 4, 0, 2, 3, 1, 2, 3, 2, 2, 3,
3, 2, 3, 4, 2, 3, 2, 4, 3, 0, 2, 4, 1, 2, 4,
0, 3, 3, 1, 3, 3, 2, 3, 3, 3, 3, 3, 4, 3, 3,
3, 4, 3, 0, 3, 4, 1, 3, 4
};
}
}

View file

@ -0,0 +1,31 @@
using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture.Astc
{
[StructLayout(LayoutKind.Sequential, Size = IntegerEncoded.StructSize * Capacity + sizeof(int))]
internal struct IntegerSequence
{
private const int Capacity = 100;
private int _length;
private IntegerEncoded _start;
public Span<IntegerEncoded> List => MemoryMarshal.CreateSpan(ref _start, _length);
public void Reset() => _length = 0;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Add(ref IntegerEncoded item)
{
Debug.Assert(_length < Capacity);
int oldLength = _length;
_length++;
List[oldLength] = item;
}
}
}

View file

@ -0,0 +1,819 @@
using Ryujinx.Graphics.Texture.Utils;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture
{
static class BC6Decoder
{
private const int HalfOne = 0x3C00;
public static void Decode(Span<byte> output, ReadOnlySpan<byte> data, int width, int height, bool signed)
{
ReadOnlySpan<Block> blocks = MemoryMarshal.Cast<byte, Block>(data);
Span<ulong> output64 = MemoryMarshal.Cast<byte, ulong>(output);
int wInBlocks = (width + 3) / 4;
int hInBlocks = (height + 3) / 4;
for (int y = 0; y < hInBlocks; y++)
{
int y2 = y * 4;
int bh = Math.Min(4, height - y2);
for (int x = 0; x < wInBlocks; x++)
{
int x2 = x * 4;
int bw = Math.Min(4, width - x2);
DecodeBlock(blocks[y * wInBlocks + x], output64.Slice(y2 * width + x2), bw, bh, width, signed);
}
}
}
private static void DecodeBlock(Block block, Span<ulong> output, int w, int h, int width, bool signed)
{
int mode = (int)(block.Low & 3);
if ((mode & 2) != 0)
{
mode = (int)(block.Low & 0x1f);
}
Span<RgbaColor32> endPoints = stackalloc RgbaColor32[4];
int subsetCount = DecodeEndPoints(ref block, endPoints, mode, signed);
if (subsetCount == 0)
{
// Mode is invalid, the spec mandates that hardware fills the block with
// a opaque black color.
for (int ty = 0; ty < h; ty++)
{
int baseOffs = ty * width;
for (int tx = 0; tx < w; tx++)
{
output[baseOffs + tx] = (ulong)HalfOne << 48;
}
}
return;
}
int partition;
int indexBitCount;
ulong indices;
if (subsetCount > 1)
{
partition = (int)((block.High >> 13) & 0x1F);
indexBitCount = 3;
int fixUpIndex = BC67Tables.FixUpIndices[subsetCount - 1][partition][1] * 3;
ulong lowMask = (ulong.MaxValue >> (65 - fixUpIndex)) << 3;
ulong highMask = ulong.MaxValue << (fixUpIndex + 3);
indices = ((block.High >> 16) & highMask) | ((block.High >> 17) & lowMask) | ((block.High >> 18) & 3);
}
else
{
partition = 0;
indexBitCount = 4;
indices = (block.High & ~0xFUL) | ((block.High >> 1) & 7);
}
ulong indexMask = (1UL << indexBitCount) - 1;
for (int ty = 0; ty < h; ty++)
{
int baseOffs = ty * width;
for (int tx = 0; tx < w; tx++)
{
int offs = baseOffs + tx;
int index = (int)(indices & indexMask);
int endPointBase = BC67Tables.PartitionTable[subsetCount - 1][partition][ty * 4 + tx] << 1;
RgbaColor32 color1 = endPoints[endPointBase];
RgbaColor32 color2 = endPoints[endPointBase + 1];
RgbaColor32 color = BC67Utils.Interpolate(color1, color2, index, indexBitCount);
output[offs] =
(ulong)FinishUnquantize(color.R, signed) |
((ulong)FinishUnquantize(color.G, signed) << 16) |
((ulong)FinishUnquantize(color.B, signed) << 32) |
((ulong)HalfOne << 48);
indices >>= indexBitCount;
}
}
}
private static int DecodeEndPoints(ref Block block, Span<RgbaColor32> endPoints, int mode, bool signed)
{
ulong low = block.Low;
ulong high = block.High;
int r0 = 0, g0 = 0, b0 = 0, r1 = 0, g1 = 0, b1 = 0, r2 = 0, g2 = 0, b2 = 0, r3 = 0, g3 = 0, b3 = 0;
int subsetCount;
switch (mode)
{
case 0:
r0 = (int)(low >> 5) & 0x3FF;
g0 = (int)(low >> 15) & 0x3FF;
b0 = (int)(low >> 25) & 0x3FF;
if (signed)
{
r0 = SignExtend(r0, 10);
g0 = SignExtend(g0, 10);
b0 = SignExtend(b0, 10);
}
r1 = r0 + SignExtend((int)(low >> 35), 5);
g1 = g0 + SignExtend((int)(low >> 45), 5);
b1 = b0 + SignExtend((int)(low >> 55), 5);
r2 = r0 + SignExtend((int)(high >> 1), 5);
g2 = g0 + SignExtend((int)(((low << 2) & 0x10) | ((low >> 41) & 0xF)), 5);
b2 = b0 + SignExtend((int)(((low << 1) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5);
r3 = r0 + SignExtend((int)(high >> 7), 5);
g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5);
b3 = b0 + SignExtend((int)(
((low) & 0x10) |
((high >> 9) & 0x08) |
((high >> 4) & 0x04) |
((low >> 59) & 0x02) |
((low >> 50) & 0x01)), 5);
r0 = Unquantize(r0, 10, signed);
g0 = Unquantize(g0, 10, signed);
b0 = Unquantize(b0, 10, signed);
r1 = Unquantize(r1 & 0x3FF, 10, signed);
g1 = Unquantize(g1 & 0x3FF, 10, signed);
b1 = Unquantize(b1 & 0x3FF, 10, signed);
r2 = Unquantize(r2 & 0x3FF, 10, signed);
g2 = Unquantize(g2 & 0x3FF, 10, signed);
b2 = Unquantize(b2 & 0x3FF, 10, signed);
r3 = Unquantize(r3 & 0x3FF, 10, signed);
g3 = Unquantize(g3 & 0x3FF, 10, signed);
b3 = Unquantize(b3 & 0x3FF, 10, signed);
subsetCount = 2;
break;
case 1:
r0 = (int)(low >> 5) & 0x7F;
g0 = (int)(low >> 15) & 0x7F;
b0 = (int)(low >> 25) & 0x7F;
if (signed)
{
r0 = SignExtend(r0, 7);
g0 = SignExtend(g0, 7);
b0 = SignExtend(b0, 7);
}
r1 = r0 + SignExtend((int)(low >> 35), 6);
g1 = g0 + SignExtend((int)(low >> 45), 6);
b1 = b0 + SignExtend((int)(low >> 55), 6);
r2 = r0 + SignExtend((int)(high >> 1), 6);
g2 = g0 + SignExtend((int)(((low << 3) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0x0F)), 6);
b2 = b0 + SignExtend((int)(
((low >> 17) & 0x20) |
((low >> 10) & 0x10) |
((high << 3) & 0x08) |
(low >> 61)), 6);
r3 = r0 + SignExtend((int)(high >> 7), 6);
g3 = g0 + SignExtend((int)(((low << 1) & 0x30) | ((low >> 51) & 0xF)), 6);
b3 = b0 + SignExtend((int)(
((low >> 28) & 0x20) |
((low >> 30) & 0x10) |
((low >> 29) & 0x08) |
((low >> 21) & 0x04) |
((low >> 12) & 0x03)), 6);
r0 = Unquantize(r0, 7, signed);
g0 = Unquantize(g0, 7, signed);
b0 = Unquantize(b0, 7, signed);
r1 = Unquantize(r1 & 0x7F, 7, signed);
g1 = Unquantize(g1 & 0x7F, 7, signed);
b1 = Unquantize(b1 & 0x7F, 7, signed);
r2 = Unquantize(r2 & 0x7F, 7, signed);
g2 = Unquantize(g2 & 0x7F, 7, signed);
b2 = Unquantize(b2 & 0x7F, 7, signed);
r3 = Unquantize(r3 & 0x7F, 7, signed);
g3 = Unquantize(g3 & 0x7F, 7, signed);
b3 = Unquantize(b3 & 0x7F, 7, signed);
subsetCount = 2;
break;
case 2:
r0 = (int)(((low >> 30) & 0x400) | ((low >> 5) & 0x3FF));
g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF));
b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF));
if (signed)
{
r0 = SignExtend(r0, 11);
g0 = SignExtend(g0, 11);
b0 = SignExtend(b0, 11);
}
r1 = r0 + SignExtend((int)(low >> 35), 5);
g1 = g0 + SignExtend((int)(low >> 45), 4);
b1 = b0 + SignExtend((int)(low >> 55), 4);
r2 = r0 + SignExtend((int)(high >> 1), 5);
g2 = g0 + SignExtend((int)(low >> 41), 4);
b2 = b0 + SignExtend((int)(((high << 3) & 8) | (low >> 61)), 4);
r3 = r0 + SignExtend((int)(high >> 7), 5);
g3 = g0 + SignExtend((int)(low >> 51), 4);
b3 = b0 + SignExtend((int)(
((high >> 9) & 8) |
((high >> 4) & 4) |
((low >> 59) & 2) |
((low >> 50) & 1)), 4);
r0 = Unquantize(r0, 11, signed);
g0 = Unquantize(g0, 11, signed);
b0 = Unquantize(b0, 11, signed);
r1 = Unquantize(r1 & 0x7FF, 11, signed);
g1 = Unquantize(g1 & 0x7FF, 11, signed);
b1 = Unquantize(b1 & 0x7FF, 11, signed);
r2 = Unquantize(r2 & 0x7FF, 11, signed);
g2 = Unquantize(g2 & 0x7FF, 11, signed);
b2 = Unquantize(b2 & 0x7FF, 11, signed);
r3 = Unquantize(r3 & 0x7FF, 11, signed);
g3 = Unquantize(g3 & 0x7FF, 11, signed);
b3 = Unquantize(b3 & 0x7FF, 11, signed);
subsetCount = 2;
break;
case 3:
r0 = (int)(low >> 5) & 0x3FF;
g0 = (int)(low >> 15) & 0x3FF;
b0 = (int)(low >> 25) & 0x3FF;
r1 = (int)(low >> 35) & 0x3FF;
g1 = (int)(low >> 45) & 0x3FF;
b1 = (int)(((high << 9) & 0x200) | (low >> 55));
if (signed)
{
r0 = SignExtend(r0, 10);
g0 = SignExtend(g0, 10);
b0 = SignExtend(b0, 10);
r1 = SignExtend(r1, 10);
g1 = SignExtend(g1, 10);
b1 = SignExtend(b1, 10);
}
r0 = Unquantize(r0, 10, signed);
g0 = Unquantize(g0, 10, signed);
b0 = Unquantize(b0, 10, signed);
r1 = Unquantize(r1, 10, signed);
g1 = Unquantize(g1, 10, signed);
b1 = Unquantize(b1, 10, signed);
subsetCount = 1;
break;
case 6:
r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF));
g0 = (int)(((low >> 40) & 0x400) | ((low >> 15) & 0x3FF));
b0 = (int)(((low >> 49) & 0x400) | ((low >> 25) & 0x3FF));
if (signed)
{
r0 = SignExtend(r0, 11);
g0 = SignExtend(g0, 11);
b0 = SignExtend(b0, 11);
}
r1 = r0 + SignExtend((int)(low >> 35), 4);
g1 = g0 + SignExtend((int)(low >> 45), 5);
b1 = b0 + SignExtend((int)(low >> 55), 4);
r2 = r0 + SignExtend((int)(high >> 1), 4);
g2 = g0 + SignExtend((int)(((high >> 7) & 0x10) | ((low >> 41) & 0x0F)), 5);
b2 = b0 + SignExtend((int)(((high << 3) & 0x08) | ((low >> 61))), 4);
r3 = r0 + SignExtend((int)(high >> 7), 4);
g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 5);
b3 = b0 + SignExtend((int)(
((high >> 9) & 8) |
((high >> 4) & 4) |
((low >> 59) & 2) |
((high >> 5) & 1)), 4);
r0 = Unquantize(r0, 11, signed);
g0 = Unquantize(g0, 11, signed);
b0 = Unquantize(b0, 11, signed);
r1 = Unquantize(r1 & 0x7FF, 11, signed);
g1 = Unquantize(g1 & 0x7FF, 11, signed);
b1 = Unquantize(b1 & 0x7FF, 11, signed);
r2 = Unquantize(r2 & 0x7FF, 11, signed);
g2 = Unquantize(g2 & 0x7FF, 11, signed);
b2 = Unquantize(b2 & 0x7FF, 11, signed);
r3 = Unquantize(r3 & 0x7FF, 11, signed);
g3 = Unquantize(g3 & 0x7FF, 11, signed);
b3 = Unquantize(b3 & 0x7FF, 11, signed);
subsetCount = 2;
break;
case 7:
r0 = (int)(((low >> 34) & 0x400) | ((low >> 5) & 0x3FF));
g0 = (int)(((low >> 44) & 0x400) | ((low >> 15) & 0x3FF));
b0 = (int)(((high << 10) & 0x400) | ((low >> 25) & 0x3FF));
if (signed)
{
r0 = SignExtend(r0, 11);
g0 = SignExtend(g0, 11);
b0 = SignExtend(b0, 11);
}
r1 = (r0 + SignExtend((int)(low >> 35), 9)) & 0x7FF;
g1 = (g0 + SignExtend((int)(low >> 45), 9)) & 0x7FF;
b1 = (b0 + SignExtend((int)(low >> 55), 9)) & 0x7FF;
r0 = Unquantize(r0, 11, signed);
g0 = Unquantize(g0, 11, signed);
b0 = Unquantize(b0, 11, signed);
r1 = Unquantize(r1, 11, signed);
g1 = Unquantize(g1, 11, signed);
b1 = Unquantize(b1, 11, signed);
subsetCount = 1;
break;
case 10:
r0 = (int)(((low >> 29) & 0x400) | ((low >> 5) & 0x3FF));
g0 = (int)(((low >> 39) & 0x400) | ((low >> 15) & 0x3FF));
b0 = (int)(((low >> 50) & 0x400) | ((low >> 25) & 0x3FF));
if (signed)
{
r0 = SignExtend(r0, 11);
g0 = SignExtend(g0, 11);
b0 = SignExtend(b0, 11);
}
r1 = r0 + SignExtend((int)(low >> 35), 4);
g1 = g0 + SignExtend((int)(low >> 45), 4);
b1 = b0 + SignExtend((int)(low >> 55), 5);
r2 = r0 + SignExtend((int)(high >> 1), 4);
g2 = g0 + SignExtend((int)(low >> 41), 4);
b2 = b0 + SignExtend((int)(((low >> 36) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5);
r3 = r0 + SignExtend((int)(high >> 7), 4);
g3 = g0 + SignExtend((int)(low >> 51), 4);
b3 = b0 + SignExtend((int)(
((high >> 7) & 0x10) |
((high >> 9) & 0x08) |
((high >> 4) & 0x06) |
((low >> 50) & 0x01)), 5);
r0 = Unquantize(r0, 11, signed);
g0 = Unquantize(g0, 11, signed);
b0 = Unquantize(b0, 11, signed);
r1 = Unquantize(r1 & 0x7FF, 11, signed);
g1 = Unquantize(g1 & 0x7FF, 11, signed);
b1 = Unquantize(b1 & 0x7FF, 11, signed);
r2 = Unquantize(r2 & 0x7FF, 11, signed);
g2 = Unquantize(g2 & 0x7FF, 11, signed);
b2 = Unquantize(b2 & 0x7FF, 11, signed);
r3 = Unquantize(r3 & 0x7FF, 11, signed);
g3 = Unquantize(g3 & 0x7FF, 11, signed);
b3 = Unquantize(b3 & 0x7FF, 11, signed);
subsetCount = 2;
break;
case 11:
r0 = (int)(((low >> 32) & 0x800) | ((low >> 34) & 0x400) | ((low >> 5) & 0x3FF));
g0 = (int)(((low >> 42) & 0x800) | ((low >> 44) & 0x400) | ((low >> 15) & 0x3FF));
b0 = (int)(((low >> 52) & 0x800) | ((high << 10) & 0x400) | ((low >> 25) & 0x3FF));
if (signed)
{
r0 = SignExtend(r0, 12);
g0 = SignExtend(g0, 12);
b0 = SignExtend(b0, 12);
}
r1 = (r0 + SignExtend((int)(low >> 35), 8)) & 0xFFF;
g1 = (g0 + SignExtend((int)(low >> 45), 8)) & 0xFFF;
b1 = (b0 + SignExtend((int)(low >> 55), 8)) & 0xFFF;
r0 = Unquantize(r0, 12, signed);
g0 = Unquantize(g0, 12, signed);
b0 = Unquantize(b0, 12, signed);
r1 = Unquantize(r1, 12, signed);
g1 = Unquantize(g1, 12, signed);
b1 = Unquantize(b1, 12, signed);
subsetCount = 1;
break;
case 14:
r0 = (int)(low >> 5) & 0x1FF;
g0 = (int)(low >> 15) & 0x1FF;
b0 = (int)(low >> 25) & 0x1FF;
if (signed)
{
r0 = SignExtend(r0, 9);
g0 = SignExtend(g0, 9);
b0 = SignExtend(b0, 9);
}
r1 = r0 + SignExtend((int)(low >> 35), 5);
g1 = g0 + SignExtend((int)(low >> 45), 5);
b1 = b0 + SignExtend((int)(low >> 55), 5);
r2 = r0 + SignExtend((int)(high >> 1), 5);
g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5);
b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5);
r3 = r0 + SignExtend((int)(high >> 7), 5);
g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5);
b3 = b0 + SignExtend((int)(
((low >> 30) & 0x10) |
((high >> 9) & 0x08) |
((high >> 4) & 0x04) |
((low >> 59) & 0x02) |
((low >> 50) & 0x01)), 5);
r0 = Unquantize(r0, 9, signed);
g0 = Unquantize(g0, 9, signed);
b0 = Unquantize(b0, 9, signed);
r1 = Unquantize(r1 & 0x1FF, 9, signed);
g1 = Unquantize(g1 & 0x1FF, 9, signed);
b1 = Unquantize(b1 & 0x1FF, 9, signed);
r2 = Unquantize(r2 & 0x1FF, 9, signed);
g2 = Unquantize(g2 & 0x1FF, 9, signed);
b2 = Unquantize(b2 & 0x1FF, 9, signed);
r3 = Unquantize(r3 & 0x1FF, 9, signed);
g3 = Unquantize(g3 & 0x1FF, 9, signed);
b3 = Unquantize(b3 & 0x1FF, 9, signed);
subsetCount = 2;
break;
case 15:
r0 = (BitReverse6((int)(low >> 39) & 0x3F) << 10) | ((int)(low >> 5) & 0x3FF);
g0 = (BitReverse6((int)(low >> 49) & 0x3F) << 10) | ((int)(low >> 15) & 0x3FF);
b0 = ((BitReverse6((int)(low >> 59)) | (int)(high & 1)) << 10) | ((int)(low >> 25) & 0x3FF);
if (signed)
{
r0 = SignExtend(r0, 16);
g0 = SignExtend(g0, 16);
b0 = SignExtend(b0, 16);
}
r1 = (r0 + SignExtend((int)(low >> 35), 4)) & 0xFFFF;
g1 = (g0 + SignExtend((int)(low >> 45), 4)) & 0xFFFF;
b1 = (b0 + SignExtend((int)(low >> 55), 4)) & 0xFFFF;
subsetCount = 1;
break;
case 18:
r0 = (int)(low >> 5) & 0xFF;
g0 = (int)(low >> 15) & 0xFF;
b0 = (int)(low >> 25) & 0xFF;
if (signed)
{
r0 = SignExtend(r0, 8);
g0 = SignExtend(g0, 8);
b0 = SignExtend(b0, 8);
}
r1 = r0 + SignExtend((int)(low >> 35), 6);
g1 = g0 + SignExtend((int)(low >> 45), 5);
b1 = b0 + SignExtend((int)(low >> 55), 5);
r2 = r0 + SignExtend((int)(high >> 1), 6);
g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5);
b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 8) | (low >> 61)), 5);
r3 = r0 + SignExtend((int)(high >> 7), 6);
g3 = g0 + SignExtend((int)(((low >> 9) & 0x10) | ((low >> 51) & 0xF)), 5);
b3 = b0 + SignExtend((int)(
((low >> 30) & 0x18) |
((low >> 21) & 0x04) |
((low >> 59) & 0x02) |
((low >> 50) & 0x01)), 5);
r0 = Unquantize(r0, 8, signed);
g0 = Unquantize(g0, 8, signed);
b0 = Unquantize(b0, 8, signed);
r1 = Unquantize(r1 & 0xFF, 8, signed);
g1 = Unquantize(g1 & 0xFF, 8, signed);
b1 = Unquantize(b1 & 0xFF, 8, signed);
r2 = Unquantize(r2 & 0xFF, 8, signed);
g2 = Unquantize(g2 & 0xFF, 8, signed);
b2 = Unquantize(b2 & 0xFF, 8, signed);
r3 = Unquantize(r3 & 0xFF, 8, signed);
g3 = Unquantize(g3 & 0xFF, 8, signed);
b3 = Unquantize(b3 & 0xFF, 8, signed);
subsetCount = 2;
break;
case 22:
r0 = (int)(low >> 5) & 0xFF;
g0 = (int)(low >> 15) & 0xFF;
b0 = (int)(low >> 25) & 0xFF;
if (signed)
{
r0 = SignExtend(r0, 8);
g0 = SignExtend(g0, 8);
b0 = SignExtend(b0, 8);
}
r1 = r0 + SignExtend((int)(low >> 35), 5);
g1 = g0 + SignExtend((int)(low >> 45), 6);
b1 = b0 + SignExtend((int)(low >> 55), 5);
r2 = r0 + SignExtend((int)(high >> 1), 5);
g2 = g0 + SignExtend((int)(((low >> 18) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 6);
b2 = b0 + SignExtend((int)(((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61)), 5);
r3 = r0 + SignExtend((int)(high >> 7), 5);
g3 = g0 + SignExtend((int)(((low >> 28) & 0x20) | ((low >> 36) & 0x10) | ((low >> 51) & 0x0F)), 6);
b3 = b0 + SignExtend((int)(
((low >> 30) & 0x10) |
((high >> 9) & 0x08) |
((high >> 4) & 0x04) |
((low >> 59) & 0x02) |
((low >> 13) & 0x01)), 5);
r0 = Unquantize(r0, 8, signed);
g0 = Unquantize(g0, 8, signed);
b0 = Unquantize(b0, 8, signed);
r1 = Unquantize(r1 & 0xFF, 8, signed);
g1 = Unquantize(g1 & 0xFF, 8, signed);
b1 = Unquantize(b1 & 0xFF, 8, signed);
r2 = Unquantize(r2 & 0xFF, 8, signed);
g2 = Unquantize(g2 & 0xFF, 8, signed);
b2 = Unquantize(b2 & 0xFF, 8, signed);
r3 = Unquantize(r3 & 0xFF, 8, signed);
g3 = Unquantize(g3 & 0xFF, 8, signed);
b3 = Unquantize(b3 & 0xFF, 8, signed);
subsetCount = 2;
break;
case 26:
r0 = (int)(low >> 5) & 0xFF;
g0 = (int)(low >> 15) & 0xFF;
b0 = (int)(low >> 25) & 0xFF;
if (signed)
{
r0 = SignExtend(r0, 8);
g0 = SignExtend(g0, 8);
b0 = SignExtend(b0, 8);
}
r1 = r0 + SignExtend((int)(low >> 35), 5);
g1 = g0 + SignExtend((int)(low >> 45), 5);
b1 = b0 + SignExtend((int)(low >> 55), 6);
r2 = r0 + SignExtend((int)(high >> 1), 5);
g2 = g0 + SignExtend((int)(((low >> 20) & 0x10) | ((low >> 41) & 0xF)), 5);
b2 = b0 + SignExtend((int)(
((low >> 18) & 0x20) |
((low >> 10) & 0x10) |
((high << 3) & 0x08) |
(low >> 61)), 6);
r3 = r0 + SignExtend((int)(high >> 7), 5);
g3 = g0 + SignExtend((int)(((low >> 36) & 0x10) | ((low >> 51) & 0xF)), 5);
b3 = b0 + SignExtend((int)(
((low >> 28) & 0x20) |
((low >> 30) & 0x10) |
((high >> 9) & 0x08) |
((high >> 4) & 0x04) |
((low >> 12) & 0x02) |
((low >> 50) & 0x01)), 6);
r0 = Unquantize(r0, 8, signed);
g0 = Unquantize(g0, 8, signed);
b0 = Unquantize(b0, 8, signed);
r1 = Unquantize(r1 & 0xFF, 8, signed);
g1 = Unquantize(g1 & 0xFF, 8, signed);
b1 = Unquantize(b1 & 0xFF, 8, signed);
r2 = Unquantize(r2 & 0xFF, 8, signed);
g2 = Unquantize(g2 & 0xFF, 8, signed);
b2 = Unquantize(b2 & 0xFF, 8, signed);
r3 = Unquantize(r3 & 0xFF, 8, signed);
g3 = Unquantize(g3 & 0xFF, 8, signed);
b3 = Unquantize(b3 & 0xFF, 8, signed);
subsetCount = 2;
break;
case 30:
r0 = (int)(low >> 5) & 0x3F;
g0 = (int)(low >> 15) & 0x3F;
b0 = (int)(low >> 25) & 0x3F;
r1 = (int)(low >> 35) & 0x3F;
g1 = (int)(low >> 45) & 0x3F;
b1 = (int)(low >> 55) & 0x3F;
r2 = (int)(high >> 1) & 0x3F;
g2 = (int)(((low >> 16) & 0x20) | ((low >> 20) & 0x10) | ((low >> 41) & 0xF));
b2 = (int)(((low >> 17) & 0x20) | ((low >> 10) & 0x10) | ((high << 3) & 0x08) | (low >> 61));
r3 = (int)(high >> 7) & 0x3F;
g3 = (int)(((low >> 26) & 0x20) | ((low >> 7) & 0x10) | ((low >> 51) & 0xF));
b3 = (int)(
((low >> 28) & 0x20) |
((low >> 30) & 0x10) |
((low >> 29) & 0x08) |
((low >> 21) & 0x04) |
((low >> 12) & 0x03));
if (signed)
{
r0 = SignExtend(r0, 6);
g0 = SignExtend(g0, 6);
b0 = SignExtend(b0, 6);
r1 = SignExtend(r1, 6);
g1 = SignExtend(g1, 6);
b1 = SignExtend(b1, 6);
r2 = SignExtend(r2, 6);
g2 = SignExtend(g2, 6);
b2 = SignExtend(b2, 6);
r3 = SignExtend(r3, 6);
g3 = SignExtend(g3, 6);
b3 = SignExtend(b3, 6);
}
r0 = Unquantize(r0, 6, signed);
g0 = Unquantize(g0, 6, signed);
b0 = Unquantize(b0, 6, signed);
r1 = Unquantize(r1, 6, signed);
g1 = Unquantize(g1, 6, signed);
b1 = Unquantize(b1, 6, signed);
r2 = Unquantize(r2, 6, signed);
g2 = Unquantize(g2, 6, signed);
b2 = Unquantize(b2, 6, signed);
r3 = Unquantize(r3, 6, signed);
g3 = Unquantize(g3, 6, signed);
b3 = Unquantize(b3, 6, signed);
subsetCount = 2;
break;
default:
subsetCount = 0;
break;
}
if (subsetCount > 0)
{
endPoints[0] = new RgbaColor32(r0, g0, b0, HalfOne);
endPoints[1] = new RgbaColor32(r1, g1, b1, HalfOne);
if (subsetCount > 1)
{
endPoints[2] = new RgbaColor32(r2, g2, b2, HalfOne);
endPoints[3] = new RgbaColor32(r3, g3, b3, HalfOne);
}
}
return subsetCount;
}
private static int SignExtend(int value, int bits)
{
int shift = 32 - bits;
return (value << shift) >> shift;
}
private static int Unquantize(int value, int bits, bool signed)
{
if (signed)
{
if (bits >= 16)
{
return value;
}
else
{
bool sign = value < 0;
if (sign)
{
value = -value;
}
if (value == 0)
{
return value;
}
else if (value >= ((1 << (bits - 1)) - 1))
{
value = 0x7FFF;
}
else
{
value = ((value << 15) + 0x4000) >> (bits - 1);
}
if (sign)
{
value = -value;
}
}
}
else
{
if (bits >= 15 || value == 0)
{
return value;
}
else if (value == ((1 << bits) - 1))
{
return 0xFFFF;
}
else
{
return ((value << 16) + 0x8000) >> bits;
}
}
return value;
}
private static ushort FinishUnquantize(int value, bool signed)
{
if (signed)
{
value = value < 0 ? -((-value * 31) >> 5) : (value * 31) >> 5;
int sign = 0;
if (value < 0)
{
sign = 0x8000;
value = -value;
}
return (ushort)(sign | value);
}
else
{
return (ushort)((value * 31) >> 6);
}
}
private static int BitReverse6(int value)
{
value = ((value >> 1) & 0x55) | ((value << 1) & 0xaa);
value = ((value >> 2) & 0x33) | ((value << 2) & 0xcc);
value = ((value >> 4) & 0x0f) | ((value << 4) & 0xf0);
return value >> 2;
}
}
}

View file

@ -0,0 +1,220 @@
using Ryujinx.Graphics.Texture.Utils;
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture
{
static class BC7Decoder
{
public static void Decode(Span<byte> output, ReadOnlySpan<byte> data, int width, int height)
{
ReadOnlySpan<Block> blocks = MemoryMarshal.Cast<byte, Block>(data);
Span<uint> output32 = MemoryMarshal.Cast<byte, uint>(output);
int wInBlocks = (width + 3) / 4;
int hInBlocks = (height + 3) / 4;
for (int y = 0; y < hInBlocks; y++)
{
int y2 = y * 4;
int bh = Math.Min(4, height - y2);
for (int x = 0; x < wInBlocks; x++)
{
int x2 = x * 4;
int bw = Math.Min(4, width - x2);
DecodeBlock(blocks[y * wInBlocks + x], output32.Slice(y2 * width + x2), bw, bh, width);
}
}
}
private static void DecodeBlock(Block block, Span<uint> output, int w, int h, int width)
{
int mode = BitOperations.TrailingZeroCount((byte)block.Low | 0x100);
if (mode == 8)
{
// Mode is invalid, the spec mandates that hardware fills the block with
// a transparent black color.
for (int ty = 0; ty < h; ty++)
{
int baseOffs = ty * width;
for (int tx = 0; tx < w; tx++)
{
int offs = baseOffs + tx;
output[offs] = 0;
}
}
return;
}
BC7ModeInfo modeInfo = BC67Tables.BC7ModeInfos[mode];
int offset = mode + 1;
int partition = (int)block.Decode(ref offset, modeInfo.PartitionBitCount);
int rotation = (int)block.Decode(ref offset, modeInfo.RotationBitCount);
int indexMode = (int)block.Decode(ref offset, modeInfo.IndexModeBitCount);
Debug.Assert(partition < 64);
Debug.Assert(rotation < 4);
Debug.Assert(indexMode < 2);
int endPointCount = modeInfo.SubsetCount * 2;
Span<RgbaColor32> endPoints = stackalloc RgbaColor32[endPointCount];
Span<byte> pValues = stackalloc byte[modeInfo.PBits];
endPoints.Fill(new RgbaColor32(0, 0, 0, 255));
for (int i = 0; i < endPointCount; i++)
{
endPoints[i].R = (int)block.Decode(ref offset, modeInfo.ColorDepth);
}
for (int i = 0; i < endPointCount; i++)
{
endPoints[i].G = (int)block.Decode(ref offset, modeInfo.ColorDepth);
}
for (int i = 0; i < endPointCount; i++)
{
endPoints[i].B = (int)block.Decode(ref offset, modeInfo.ColorDepth);
}
if (modeInfo.AlphaDepth != 0)
{
for (int i = 0; i < endPointCount; i++)
{
endPoints[i].A = (int)block.Decode(ref offset, modeInfo.AlphaDepth);
}
}
for (int i = 0; i < modeInfo.PBits; i++)
{
pValues[i] = (byte)block.Decode(ref offset, 1);
}
for (int i = 0; i < endPointCount; i++)
{
int pBit = -1;
if (modeInfo.PBits != 0)
{
int pIndex = (i * modeInfo.PBits) / endPointCount;
pBit = pValues[pIndex];
}
Unquantize(ref endPoints[i], modeInfo.ColorDepth, modeInfo.AlphaDepth, pBit);
}
byte[] partitionTable = BC67Tables.PartitionTable[modeInfo.SubsetCount - 1][partition];
byte[] fixUpTable = BC67Tables.FixUpIndices[modeInfo.SubsetCount - 1][partition];
Span<byte> colorIndices = stackalloc byte[16];
for (int i = 0; i < 16; i++)
{
byte subset = partitionTable[i];
int bitCount = i == fixUpTable[subset] ? modeInfo.ColorIndexBitCount - 1 : modeInfo.ColorIndexBitCount;
colorIndices[i] = (byte)block.Decode(ref offset, bitCount);
Debug.Assert(colorIndices[i] < 16);
}
Span<byte> alphaIndices = stackalloc byte[16];
if (modeInfo.AlphaIndexBitCount != 0)
{
for (int i = 0; i < 16; i++)
{
int bitCount = i != 0 ? modeInfo.AlphaIndexBitCount : modeInfo.AlphaIndexBitCount - 1;
alphaIndices[i] = (byte)block.Decode(ref offset, bitCount);
Debug.Assert(alphaIndices[i] < 16);
}
}
for (int ty = 0; ty < h; ty++)
{
int baseOffs = ty * width;
for (int tx = 0; tx < w; tx++)
{
int i = ty * 4 + tx;
RgbaColor32 color;
byte subset = partitionTable[i];
RgbaColor32 color1 = endPoints[subset * 2];
RgbaColor32 color2 = endPoints[subset * 2 + 1];
if (modeInfo.AlphaIndexBitCount != 0)
{
if (indexMode == 0)
{
color = BC67Utils.Interpolate(color1, color2, colorIndices[i], alphaIndices[i], modeInfo.ColorIndexBitCount, modeInfo.AlphaIndexBitCount);
}
else
{
color = BC67Utils.Interpolate(color1, color2, alphaIndices[i], colorIndices[i], modeInfo.AlphaIndexBitCount, modeInfo.ColorIndexBitCount);
}
}
else
{
color = BC67Utils.Interpolate(color1, color2, colorIndices[i], colorIndices[i], modeInfo.ColorIndexBitCount, modeInfo.ColorIndexBitCount);
}
if (rotation != 0)
{
int a = color.A;
switch (rotation)
{
case 1: color.A = color.R; color.R = a; break;
case 2: color.A = color.G; color.G = a; break;
case 3: color.A = color.B; color.B = a; break;
}
}
RgbaColor8 color8 = color.GetColor8();
output[baseOffs + tx] = color8.ToUInt32();
}
}
}
private static void Unquantize(ref RgbaColor32 color, int colorDepth, int alphaDepth, int pBit)
{
color.R = UnquantizeComponent(color.R, colorDepth, pBit);
color.G = UnquantizeComponent(color.G, colorDepth, pBit);
color.B = UnquantizeComponent(color.B, colorDepth, pBit);
color.A = alphaDepth != 0 ? UnquantizeComponent(color.A, alphaDepth, pBit) : 255;
}
private static int UnquantizeComponent(int component, int bits, int pBit)
{
int shift = 8 - bits;
int value = component << shift;
if (pBit >= 0)
{
Debug.Assert(pBit <= 1);
value |= value >> (bits + 1);
value |= pBit << (shift - 1);
}
else
{
value |= value >> bits;
}
return value;
}
}
}

View file

@ -0,0 +1,894 @@
using Ryujinx.Common;
using System;
using System.Buffers.Binary;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Graphics.Texture
{
public static class BCnDecoder
{
private const int BlockWidth = 4;
private const int BlockHeight = 4;
public static byte[] DecodeBC1(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
byte[] output = new byte[size];
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile);
Span<Vector128<byte>> outputLine0 = default;
Span<Vector128<byte>> outputLine1 = default;
Span<Vector128<byte>> outputLine2 = default;
Span<Vector128<byte>> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
BC1DecodeTileRgb(tile, data);
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = tileAsVector128[0];
outputLine1[x] = tileAsVector128[1];
outputLine2[x] = tileAsVector128[2];
outputLine3[x] = tileAsVector128[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth));
}
}
data = data.Slice(8);
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeBC2(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
byte[] output = new byte[size];
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile);
Span<Vector128<byte>> outputLine0 = default;
Span<Vector128<byte>> outputLine1 = default;
Span<Vector128<byte>> outputLine2 = default;
Span<Vector128<byte>> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
BC23DecodeTileRgb(tile, data.Slice(8));
ulong block = BinaryPrimitives.ReadUInt64LittleEndian(data);
for (int i = 3; i < BlockWidth * BlockHeight * 4; i += 4, block >>= 4)
{
tile[i] = (byte)((block & 0xf) | (block << 4));
}
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = tileAsVector128[0];
outputLine1[x] = tileAsVector128[1];
outputLine2[x] = tileAsVector128[2];
outputLine3[x] = tileAsVector128[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth));
}
}
data = data.Slice(16);
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeBC3(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
byte[] output = new byte[size];
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight * 4];
Span<byte> rPal = stackalloc byte[8];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
Span<Vector128<byte>> tileAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(tile);
Span<Vector128<byte>> outputLine0 = default;
Span<Vector128<byte>> outputLine1 = default;
Span<Vector128<byte>> outputLine2 = default;
Span<Vector128<byte>> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * width;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width));
outputLine2 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 2));
outputLine3 = MemoryMarshal.Cast<uint, Vector128<byte>>(outputAsUint.Slice(lineBaseOOffs + width * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
BC23DecodeTileRgb(tile, data.Slice(8));
ulong block = BinaryPrimitives.ReadUInt64LittleEndian(data);
rPal[0] = (byte)block;
rPal[1] = (byte)(block >> 8);
BCnLerpAlphaUnorm(rPal);
BCnDecodeTileAlphaRgba(tile, rPal, block >> 16);
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = tileAsVector128[0];
outputLine1[x] = tileAsVector128[1];
outputLine2[x] = tileAsVector128[2];
outputLine3[x] = tileAsVector128[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
tileAsUint.Slice(tY * 4, copyWidth).CopyTo(outputAsUint.Slice(pixelBaseOOffs + width * tY, copyWidth));
}
}
data = data.Slice(16);
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeBC4(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += BitUtils.AlignUp(Math.Max(1, width >> l), 4) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers;
}
// Backends currently expect a stride alignment of 4 bytes, so output width must be aligned.
int alignedWidth = BitUtils.AlignUp(width, 4);
byte[] output = new byte[size];
Span<byte> outputSpan = new Span<byte>(output);
ReadOnlySpan<ulong> data64 = MemoryMarshal.Cast<byte, ulong>(data);
Span<byte> tile = stackalloc byte[BlockWidth * BlockHeight];
Span<byte> rPal = stackalloc byte[8];
Span<uint> tileAsUint = MemoryMarshal.Cast<byte, uint>(tile);
Span<uint> outputLine0 = default;
Span<uint> outputLine1 = default;
Span<uint> outputLine2 = default;
Span<uint> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * alignedWidth;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + alignedWidth));
outputLine2 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + alignedWidth * 2));
outputLine3 = MemoryMarshal.Cast<byte, uint>(outputSpan.Slice(lineBaseOOffs + alignedWidth * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
ulong block = data64[0];
rPal[0] = (byte)block;
rPal[1] = (byte)(block >> 8);
if (signed)
{
BCnLerpAlphaSnorm(rPal);
}
else
{
BCnLerpAlphaUnorm(rPal);
}
BCnDecodeTileAlpha(tile, rPal, block >> 16);
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = tileAsUint[0];
outputLine1[x] = tileAsUint[1];
outputLine2[x] = tileAsUint[2];
outputLine3[x] = tileAsUint[3];
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
tile.Slice(tY * 4, copyWidth).CopyTo(outputSpan.Slice(pixelBaseOOffs + alignedWidth * tY, copyWidth));
}
}
data64 = data64.Slice(1);
}
}
imageBaseOOffs += alignedWidth * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
alignedWidth = BitUtils.AlignUp(width, 4);
}
return output;
}
public static byte[] DecodeBC5(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += BitUtils.AlignUp(Math.Max(1, width >> l), 2) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 2;
}
// Backends currently expect a stride alignment of 4 bytes, so output width must be aligned.
int alignedWidth = BitUtils.AlignUp(width, 2);
byte[] output = new byte[size];
ReadOnlySpan<ulong> data64 = MemoryMarshal.Cast<byte, ulong>(data);
Span<byte> rTile = stackalloc byte[BlockWidth * BlockHeight * 2];
Span<byte> gTile = stackalloc byte[BlockWidth * BlockHeight * 2];
Span<byte> rPal = stackalloc byte[8];
Span<byte> gPal = stackalloc byte[8];
Span<ushort> outputAsUshort = MemoryMarshal.Cast<byte, ushort>(output);
Span<uint> rTileAsUint = MemoryMarshal.Cast<byte, uint>(rTile);
Span<uint> gTileAsUint = MemoryMarshal.Cast<byte, uint>(gTile);
Span<ulong> outputLine0 = default;
Span<ulong> outputLine1 = default;
Span<ulong> outputLine2 = default;
Span<ulong> outputLine3 = default;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < h; y++)
{
int baseY = y * BlockHeight;
int copyHeight = Math.Min(BlockHeight, height - baseY);
int lineBaseOOffs = imageBaseOOffs + baseY * alignedWidth;
if (copyHeight == 4)
{
outputLine0 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs));
outputLine1 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + alignedWidth));
outputLine2 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + alignedWidth * 2));
outputLine3 = MemoryMarshal.Cast<ushort, ulong>(outputAsUshort.Slice(lineBaseOOffs + alignedWidth * 3));
}
for (int x = 0; x < w; x++)
{
int baseX = x * BlockWidth;
int copyWidth = Math.Min(BlockWidth, width - baseX);
ulong blockL = data64[0];
ulong blockH = data64[1];
rPal[0] = (byte)blockL;
rPal[1] = (byte)(blockL >> 8);
gPal[0] = (byte)blockH;
gPal[1] = (byte)(blockH >> 8);
if (signed)
{
BCnLerpAlphaSnorm(rPal);
BCnLerpAlphaSnorm(gPal);
}
else
{
BCnLerpAlphaUnorm(rPal);
BCnLerpAlphaUnorm(gPal);
}
BCnDecodeTileAlpha(rTile, rPal, blockL >> 16);
BCnDecodeTileAlpha(gTile, gPal, blockH >> 16);
if ((copyWidth | copyHeight) == 4)
{
outputLine0[x] = InterleaveBytes(rTileAsUint[0], gTileAsUint[0]);
outputLine1[x] = InterleaveBytes(rTileAsUint[1], gTileAsUint[1]);
outputLine2[x] = InterleaveBytes(rTileAsUint[2], gTileAsUint[2]);
outputLine3[x] = InterleaveBytes(rTileAsUint[3], gTileAsUint[3]);
}
else
{
int pixelBaseOOffs = lineBaseOOffs + baseX;
for (int tY = 0; tY < copyHeight; tY++)
{
int line = pixelBaseOOffs + alignedWidth * tY;
for (int tX = 0; tX < copyWidth; tX++)
{
int texel = tY * BlockWidth + tX;
outputAsUshort[line + tX] = (ushort)(rTile[texel] | (gTile[texel] << 8));
}
}
}
data64 = data64.Slice(2);
}
}
imageBaseOOffs += alignedWidth * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
alignedWidth = BitUtils.AlignUp(width, 2);
}
return output;
}
public static byte[] DecodeBC6(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers, bool signed)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 8;
}
byte[] output = new byte[size];
int inputOffset = 0;
int outputOffset = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
BC6Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height, signed);
inputOffset += w * h * 16;
outputOffset += width * height * 8;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeBC7(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
byte[] output = new byte[size];
int inputOffset = 0;
int outputOffset = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
BC7Decoder.Decode(output.AsSpan().Slice(outputOffset), data.Slice(inputOffset), width, height);
inputOffset += w * h * 16;
outputOffset += width * height * 4;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
private static ulong InterleaveBytes(uint left, uint right)
{
return InterleaveBytesWithZeros(left) | (InterleaveBytesWithZeros(right) << 8);
}
private static ulong InterleaveBytesWithZeros(uint value)
{
ulong output = value;
output = (output ^ (output << 16)) & 0xffff0000ffffUL;
output = (output ^ (output << 8)) & 0xff00ff00ff00ffUL;
return output;
}
private static void BCnLerpAlphaUnorm(Span<byte> alpha)
{
byte a0 = alpha[0];
byte a1 = alpha[1];
if (a0 > a1)
{
alpha[2] = (byte)((6 * a0 + 1 * a1) / 7);
alpha[3] = (byte)((5 * a0 + 2 * a1) / 7);
alpha[4] = (byte)((4 * a0 + 3 * a1) / 7);
alpha[5] = (byte)((3 * a0 + 4 * a1) / 7);
alpha[6] = (byte)((2 * a0 + 5 * a1) / 7);
alpha[7] = (byte)((1 * a0 + 6 * a1) / 7);
}
else
{
alpha[2] = (byte)((4 * a0 + 1 * a1) / 5);
alpha[3] = (byte)((3 * a0 + 2 * a1) / 5);
alpha[4] = (byte)((2 * a0 + 3 * a1) / 5);
alpha[5] = (byte)((1 * a0 + 4 * a1) / 5);
alpha[6] = 0;
alpha[7] = 0xff;
}
}
private static void BCnLerpAlphaSnorm(Span<byte> alpha)
{
sbyte a0 = (sbyte)alpha[0];
sbyte a1 = (sbyte)alpha[1];
if (a0 > a1)
{
alpha[2] = (byte)((6 * a0 + 1 * a1) / 7);
alpha[3] = (byte)((5 * a0 + 2 * a1) / 7);
alpha[4] = (byte)((4 * a0 + 3 * a1) / 7);
alpha[5] = (byte)((3 * a0 + 4 * a1) / 7);
alpha[6] = (byte)((2 * a0 + 5 * a1) / 7);
alpha[7] = (byte)((1 * a0 + 6 * a1) / 7);
}
else
{
alpha[2] = (byte)((4 * a0 + 1 * a1) / 5);
alpha[3] = (byte)((3 * a0 + 2 * a1) / 5);
alpha[4] = (byte)((2 * a0 + 3 * a1) / 5);
alpha[5] = (byte)((1 * a0 + 4 * a1) / 5);
alpha[6] = 0x80;
alpha[7] = 0x7f;
}
}
private unsafe static void BCnDecodeTileAlpha(Span<byte> output, Span<byte> rPal, ulong rI)
{
if (Avx2.IsSupported)
{
Span<Vector128<byte>> outputAsVector128 = MemoryMarshal.Cast<byte, Vector128<byte>>(output);
Vector128<uint> shifts = Vector128.Create(0u, 3u, 6u, 9u);
Vector128<uint> masks = Vector128.Create(7u);
Vector128<byte> vClut;
fixed (byte* pRPal = rPal)
{
vClut = Sse2.LoadScalarVector128((ulong*)pRPal).AsByte();
}
Vector128<uint> indices0 = Vector128.Create((uint)rI);
Vector128<uint> indices1 = Vector128.Create((uint)(rI >> 24));
Vector128<uint> indices00 = Avx2.ShiftRightLogicalVariable(indices0, shifts);
Vector128<uint> indices10 = Avx2.ShiftRightLogicalVariable(indices1, shifts);
Vector128<uint> indices01 = Sse2.ShiftRightLogical(indices00, 12);
Vector128<uint> indices11 = Sse2.ShiftRightLogical(indices10, 12);
indices00 = Sse2.And(indices00, masks);
indices10 = Sse2.And(indices10, masks);
indices01 = Sse2.And(indices01, masks);
indices11 = Sse2.And(indices11, masks);
Vector128<ushort> indicesW0 = Sse41.PackUnsignedSaturate(indices00.AsInt32(), indices01.AsInt32());
Vector128<ushort> indicesW1 = Sse41.PackUnsignedSaturate(indices10.AsInt32(), indices11.AsInt32());
Vector128<byte> indices = Sse2.PackUnsignedSaturate(indicesW0.AsInt16(), indicesW1.AsInt16());
outputAsVector128[0] = Ssse3.Shuffle(vClut, indices);
}
else
{
for (int i = 0; i < BlockWidth * BlockHeight; i++, rI >>= 3)
{
output[i] = rPal[(int)(rI & 7)];
}
}
}
private unsafe static void BCnDecodeTileAlphaRgba(Span<byte> output, Span<byte> rPal, ulong rI)
{
if (Avx2.IsSupported)
{
Span<Vector256<uint>> outputAsVector256 = MemoryMarshal.Cast<byte, Vector256<uint>>(output);
Vector256<uint> shifts = Vector256.Create(0u, 3u, 6u, 9u, 12u, 15u, 18u, 21u);
Vector128<uint> vClut128;
fixed (byte* pRPal = rPal)
{
vClut128 = Sse2.LoadScalarVector128((ulong*)pRPal).AsUInt32();
}
Vector256<uint> vClut = Avx2.ConvertToVector256Int32(vClut128.AsByte()).AsUInt32();
vClut = Avx2.ShiftLeftLogical(vClut, 24);
Vector256<uint> indices0 = Vector256.Create((uint)rI);
Vector256<uint> indices1 = Vector256.Create((uint)(rI >> 24));
indices0 = Avx2.ShiftRightLogicalVariable(indices0, shifts);
indices1 = Avx2.ShiftRightLogicalVariable(indices1, shifts);
outputAsVector256[0] = Avx2.Or(outputAsVector256[0], Avx2.PermuteVar8x32(vClut, indices0));
outputAsVector256[1] = Avx2.Or(outputAsVector256[1], Avx2.PermuteVar8x32(vClut, indices1));
}
else
{
for (int i = 3; i < BlockWidth * BlockHeight * 4; i += 4, rI >>= 3)
{
output[i] = rPal[(int)(rI & 7)];
}
}
}
private unsafe static void BC1DecodeTileRgb(Span<byte> output, ReadOnlySpan<byte> input)
{
Span<uint> clut = stackalloc uint[4];
uint c0c1 = BinaryPrimitives.ReadUInt32LittleEndian(input);
uint c0 = (ushort)c0c1;
uint c1 = (ushort)(c0c1 >> 16);
clut[0] = ConvertRgb565ToRgb888(c0) | 0xff000000;
clut[1] = ConvertRgb565ToRgb888(c1) | 0xff000000;
clut[2] = BC1LerpRgb2(clut[0], clut[1], c0, c1);
clut[3] = BC1LerpRgb3(clut[0], clut[1], c0, c1);
BCnDecodeTileRgb(clut, output, input);
}
private unsafe static void BC23DecodeTileRgb(Span<byte> output, ReadOnlySpan<byte> input)
{
Span<uint> clut = stackalloc uint[4];
uint c0c1 = BinaryPrimitives.ReadUInt32LittleEndian(input);
uint c0 = (ushort)c0c1;
uint c1 = (ushort)(c0c1 >> 16);
clut[0] = ConvertRgb565ToRgb888(c0);
clut[1] = ConvertRgb565ToRgb888(c1);
clut[2] = BC23LerpRgb2(clut[0], clut[1]);
clut[3] = BC23LerpRgb3(clut[0], clut[1]);
BCnDecodeTileRgb(clut, output, input);
}
private unsafe static void BCnDecodeTileRgb(Span<uint> clut, Span<byte> output, ReadOnlySpan<byte> input)
{
if (Avx2.IsSupported)
{
Span<Vector256<uint>> outputAsVector256 = MemoryMarshal.Cast<byte, Vector256<uint>>(output);
Vector256<uint> shifts0 = Vector256.Create(0u, 2u, 4u, 6u, 8u, 10u, 12u, 14u);
Vector256<uint> shifts1 = Vector256.Create(16u, 18u, 20u, 22u, 24u, 26u, 28u, 30u);
Vector256<uint> masks = Vector256.Create(3u);
Vector256<uint> vClut;
fixed (uint* pClut = &clut[0])
{
vClut = Sse2.LoadVector128(pClut).ToVector256Unsafe();
}
Vector256<uint> indices0;
fixed (byte* pInput = input)
{
indices0 = Avx2.BroadcastScalarToVector256((uint*)(pInput + 4));
}
Vector256<uint> indices1 = indices0;
indices0 = Avx2.ShiftRightLogicalVariable(indices0, shifts0);
indices1 = Avx2.ShiftRightLogicalVariable(indices1, shifts1);
indices0 = Avx2.And(indices0, masks);
indices1 = Avx2.And(indices1, masks);
outputAsVector256[0] = Avx2.PermuteVar8x32(vClut, indices0);
outputAsVector256[1] = Avx2.PermuteVar8x32(vClut, indices1);
}
else
{
Span<uint> outputAsUint = MemoryMarshal.Cast<byte, uint>(output);
uint indices = BinaryPrimitives.ReadUInt32LittleEndian(input.Slice(4));
for (int i = 0; i < BlockWidth * BlockHeight; i++, indices >>= 2)
{
outputAsUint[i] = clut[(int)(indices & 3)];
}
}
}
private static uint BC1LerpRgb2(uint color0, uint color1, uint c0, uint c1)
{
if (c0 > c1)
{
return BC23LerpRgb2(color0, color1) | 0xff000000;
}
uint carry = color0 & color1;
uint addHalve = ((color0 ^ color1) >> 1) & 0x7f7f7f;
return (addHalve + carry) | 0xff000000;
}
private static uint BC23LerpRgb2(uint color0, uint color1)
{
uint r0 = (byte)color0;
uint g0 = color0 & 0xff00;
uint b0 = color0 & 0xff0000;
uint r1 = (byte)color1;
uint g1 = color1 & 0xff00;
uint b1 = color1 & 0xff0000;
uint mixR = (2 * r0 + r1) / 3;
uint mixG = (2 * g0 + g1) / 3;
uint mixB = (2 * b0 + b1) / 3;
return mixR | (mixG & 0xff00) | (mixB & 0xff0000);
}
private static uint BC1LerpRgb3(uint color0, uint color1, uint c0, uint c1)
{
if (c0 > c1)
{
return BC23LerpRgb3(color0, color1) | 0xff000000;
}
return 0;
}
private static uint BC23LerpRgb3(uint color0, uint color1)
{
uint r0 = (byte)color0;
uint g0 = color0 & 0xff00;
uint b0 = color0 & 0xff0000;
uint r1 = (byte)color1;
uint g1 = color1 & 0xff00;
uint b1 = color1 & 0xff0000;
uint mixR = (2 * r1 + r0) / 3;
uint mixG = (2 * g1 + g0) / 3;
uint mixB = (2 * b1 + b0) / 3;
return mixR | (mixG & 0xff00) | (mixB & 0xff0000);
}
private static uint ConvertRgb565ToRgb888(uint value)
{
uint b = (value & 0x1f) << 19;
uint g = (value << 5) & 0xfc00;
uint r = (value >> 8) & 0xf8;
b |= b >> 5;
g |= g >> 6;
r |= r >> 5;
return r | (g & 0xff00) | (b & 0xff0000);
}
}
}

View file

@ -0,0 +1,60 @@
using Ryujinx.Common;
using Ryujinx.Graphics.Texture.Encoders;
using System;
namespace Ryujinx.Graphics.Texture
{
public static class BCnEncoder
{
private const int BlockWidth = 4;
private const int BlockHeight = 4;
public static byte[] EncodeBC7(byte[] data, int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
int w = BitUtils.DivRoundUp(Math.Max(1, width >> l), BlockWidth);
int h = BitUtils.DivRoundUp(Math.Max(1, height >> l), BlockHeight);
size += w * h * 16 * Math.Max(1, depth >> l) * layers;
}
byte[] output = new byte[size];
int imageBaseIOffs = 0;
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int rgba8Size = width * height * depth * layers * 4;
int w = BitUtils.DivRoundUp(width, BlockWidth);
int h = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
BC7Encoder.Encode(
output.AsMemory().Slice(imageBaseOOffs),
data.AsMemory().Slice(imageBaseIOffs),
width,
height,
EncodeMode.Fast | EncodeMode.Multithreaded);
imageBaseIOffs += width * height * 4;
imageBaseOOffs += w * h * 16;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
}
}

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Texture
{
static class BlockLinearConstants
{
public const int GobStride = 64;
public const int GobHeight = 8;
public const int GobSize = GobStride * GobHeight;
}
}

View file

@ -0,0 +1,195 @@
using Ryujinx.Common;
using System.Numerics;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Texture.BlockLinearConstants;
namespace Ryujinx.Graphics.Texture
{
class BlockLinearLayout
{
private struct RobAndSliceSizes
{
public int RobSize;
public int SliceSize;
public RobAndSliceSizes(int robSize, int sliceSize)
{
RobSize = robSize;
SliceSize = sliceSize;
}
}
private int _texBpp;
private int _bhMask;
private int _bdMask;
private int _bhShift;
private int _bdShift;
private int _bppShift;
private int _xShift;
private int _robSize;
private int _sliceSize;
// Variables for built in iteration.
private int _yPart;
private int _yzPart;
private int _zPart;
public BlockLinearLayout(
int width,
int height,
int gobBlocksInY,
int gobBlocksInZ,
int bpp)
{
_texBpp = bpp;
_bppShift = BitOperations.TrailingZeroCount(bpp);
_bhMask = gobBlocksInY - 1;
_bdMask = gobBlocksInZ - 1;
_bhShift = BitOperations.TrailingZeroCount(gobBlocksInY);
_bdShift = BitOperations.TrailingZeroCount(gobBlocksInZ);
_xShift = BitOperations.TrailingZeroCount(GobSize * gobBlocksInY * gobBlocksInZ);
RobAndSliceSizes rsSizes = GetRobAndSliceSizes(width, height, gobBlocksInY, gobBlocksInZ);
_robSize = rsSizes.RobSize;
_sliceSize = rsSizes.SliceSize;
}
private RobAndSliceSizes GetRobAndSliceSizes(int width, int height, int gobBlocksInY, int gobBlocksInZ)
{
int widthInGobs = BitUtils.DivRoundUp(width * _texBpp, GobStride);
int robSize = GobSize * gobBlocksInY * gobBlocksInZ * widthInGobs;
int sliceSize = BitUtils.DivRoundUp(height, gobBlocksInY * GobHeight) * robSize;
return new RobAndSliceSizes(robSize, sliceSize);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffset(int x, int y, int z)
{
return GetOffsetWithLineOffset(x << _bppShift, y, z);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffsetWithLineOffset(int x, int y, int z)
{
int yh = y / GobHeight;
int offset = (z >> _bdShift) * _sliceSize + (yh >> _bhShift) * _robSize;
offset += (x / GobStride) << _xShift;
offset += (yh & _bhMask) * GobSize;
offset += ((z & _bdMask) * GobSize) << _bhShift;
offset += ((x & 0x3f) >> 5) << 8;
offset += ((y & 0x07) >> 1) << 6;
offset += ((x & 0x1f) >> 4) << 5;
offset += ((y & 0x01) >> 0) << 4;
offset += ((x & 0x0f) >> 0) << 0;
return offset;
}
public (int offset, int size) GetRectangleRange(int x, int y, int width, int height)
{
// Justification:
// The 2D offset is a combination of separate x and y parts.
// Both components increase with input and never overlap bits.
// Therefore for each component, the minimum input value is the lowest that component can go.
// Minimum total value is minimum X component + minimum Y component. Similar goes for maximum.
int start = GetOffset(x, y, 0);
int end = GetOffset(x + width - 1, y + height - 1, 0) + _texBpp; // Cover the last pixel.
return (start, end - start);
}
public bool LayoutMatches(BlockLinearLayout other)
{
return _robSize == other._robSize &&
_sliceSize == other._sliceSize &&
_texBpp == other._texBpp &&
_bhMask == other._bhMask &&
_bdMask == other._bdMask;
}
// Functions for built in iteration.
// Components of the offset can be updated separately, and combined to save some time.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void SetY(int y)
{
int yh = y / GobHeight;
int offset = (yh >> _bhShift) * _robSize;
offset += (yh & _bhMask) * GobSize;
offset += ((y & 0x07) >> 1) << 6;
offset += ((y & 0x01) >> 0) << 4;
_yPart = offset;
_yzPart = offset + _zPart;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void SetZ(int z)
{
int offset = (z >> _bdShift) * _sliceSize;
offset += ((z & _bdMask) * GobSize) << _bhShift;
_zPart = offset;
_yzPart = offset + _yPart;
}
/// <summary>
/// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 16.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffsetWithLineOffset16(int x)
{
int offset = (x / GobStride) << _xShift;
offset += ((x & 0x3f) >> 5) << 8;
offset += ((x & 0x1f) >> 4) << 5;
return offset + _yzPart;
}
/// <summary>
/// Optimized conversion for line offset in bytes to an absolute offset. Input x must be divisible by 64.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffsetWithLineOffset64(int x)
{
int offset = (x / GobStride) << _xShift;
return offset + _yzPart;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffset(int x)
{
x <<= _bppShift;
int offset = (x / GobStride) << _xShift;
offset += ((x & 0x3f) >> 5) << 8;
offset += ((x & 0x1f) >> 4) << 5;
offset += (x & 0x0f);
return offset + _yzPart;
}
}
}

View file

@ -0,0 +1,11 @@
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture
{
[StructLayout(LayoutKind.Sequential, Pack = 1, Size = 12)]
public struct Bpp12Pixel
{
private ulong _elem1;
private uint _elem2;
}
}

View file

@ -0,0 +1,682 @@
using Ryujinx.Common;
using System;
using System.Buffers.Binary;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture
{
public static class ETC2Decoder
{
private const uint AlphaMask = 0xff000000u;
private const int BlockWidth = 4;
private const int BlockHeight = 4;
private static readonly int[][] _etc1Lut =
{
new int[] { 2, 8, -2, -8 },
new int[] { 5, 17, -5, -17 },
new int[] { 9, 29, -9, -29 },
new int[] { 13, 42, -13, -42 },
new int[] { 18, 60, -18, -60 },
new int[] { 24, 80, -24, -80 },
new int[] { 33, 106, -33, -106 },
new int[] { 47, 183, -47, -183 }
};
private static readonly int[] _etc2Lut =
{
3, 6, 11, 16, 23, 32, 41, 64
};
private static readonly int[][] _etc2AlphaLut =
{
new int[] { -3, -6, -9, -15, 2, 5, 8, 14 },
new int[] { -3, -7, -10, -13, 2, 6, 9, 12 },
new int[] { -2, -5, -8, -13, 1, 4, 7, 12 },
new int[] { -2, -4, -6, -13, 1, 3, 5, 12 },
new int[] { -3, -6, -8, -12, 2, 5, 7, 11 },
new int[] { -3, -7, -9, -11, 2, 6, 8, 10 },
new int[] { -4, -7, -8, -11, 3, 6, 7, 10 },
new int[] { -3, -5, -8, -11, 2, 4, 7, 10 },
new int[] { -2, -6, -8, -10, 1, 5, 7, 9 },
new int[] { -2, -5, -8, -10, 1, 4, 7, 9 },
new int[] { -2, -4, -8, -10, 1, 3, 7, 9 },
new int[] { -2, -5, -7, -10, 1, 4, 6, 9 },
new int[] { -3, -4, -7, -10, 2, 3, 6, 9 },
new int[] { -1, -2, -3, -10, 0, 1, 2, 9 },
new int[] { -4, -6, -8, -9, 3, 5, 7, 8 },
new int[] { -3, -5, -7, -9, 2, 4, 6, 8 }
};
public static byte[] DecodeRgb(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data);
int inputOffset = 0;
byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)];
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight];
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth);
int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < hInBlocks; y++)
{
int ty = y * BlockHeight;
int bh = Math.Min(BlockHeight, height - ty);
for (int x = 0; x < wInBlocks; x++)
{
int tx = x * BlockWidth;
int bw = Math.Min(BlockWidth, width - tx);
ulong colorBlock = dataUlong[inputOffset++];
DecodeBlock(tile, colorBlock);
for (int py = 0; py < bh; py++)
{
int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
for (int px = 0; px < bw; px++)
{
int oOffs = oOffsBase + px;
outputUint[oOffs] = tile[py * BlockWidth + px] | AlphaMask;
}
}
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodePta(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data);
int inputOffset = 0;
byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)];
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight];
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth);
int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < hInBlocks; y++)
{
int ty = y * BlockHeight;
int bh = Math.Min(BlockHeight, height - ty);
for (int x = 0; x < wInBlocks; x++)
{
int tx = x * BlockWidth;
int bw = Math.Min(BlockWidth, width - tx);
ulong colorBlock = dataUlong[inputOffset++];
DecodeBlockPta(tile, colorBlock);
for (int py = 0; py < bh; py++)
{
int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
tile.Slice(py * BlockWidth, bw).CopyTo(outputUint.Slice(oOffsBase, bw));
}
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
public static byte[] DecodeRgba(ReadOnlySpan<byte> data, int width, int height, int depth, int levels, int layers)
{
ReadOnlySpan<ulong> dataUlong = MemoryMarshal.Cast<byte, ulong>(data);
int inputOffset = 0;
byte[] output = new byte[CalculateOutputSize(width, height, depth, levels, layers)];
Span<uint> outputUint = MemoryMarshal.Cast<byte, uint>(output);
Span<uint> tile = stackalloc uint[BlockWidth * BlockHeight];
int imageBaseOOffs = 0;
for (int l = 0; l < levels; l++)
{
int wInBlocks = BitUtils.DivRoundUp(width, BlockWidth);
int hInBlocks = BitUtils.DivRoundUp(height, BlockHeight);
for (int l2 = 0; l2 < layers; l2++)
{
for (int z = 0; z < depth; z++)
{
for (int y = 0; y < hInBlocks; y++)
{
int ty = y * BlockHeight;
int bh = Math.Min(BlockHeight, height - ty);
for (int x = 0; x < wInBlocks; x++)
{
int tx = x * BlockWidth;
int bw = Math.Min(BlockWidth, width - tx);
ulong alphaBlock = dataUlong[inputOffset];
ulong colorBlock = dataUlong[inputOffset + 1];
inputOffset += 2;
DecodeBlock(tile, colorBlock);
byte alphaBase = (byte)alphaBlock;
int[] alphaTable = _etc2AlphaLut[(alphaBlock >> 8) & 0xf];
int alphaMultiplier = (int)(alphaBlock >> 12) & 0xf;
ulong alphaIndices = BinaryPrimitives.ReverseEndianness(alphaBlock);
if (alphaMultiplier != 0)
{
for (int py = 0; py < bh; py++)
{
int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
for (int px = 0; px < bw; px++)
{
int oOffs = oOffsBase + px;
int alphaIndex = (int)((alphaIndices >> (((px * BlockHeight + py) ^ 0xf) * 3)) & 7);
byte a = Saturate(alphaBase + alphaTable[alphaIndex] * alphaMultiplier);
outputUint[oOffs] = tile[py * BlockWidth + px] | ((uint)a << 24);
}
}
}
else
{
uint a = (uint)alphaBase << 24;
for (int py = 0; py < bh; py++)
{
int oOffsBase = imageBaseOOffs + ((ty + py) * width) + tx;
for (int px = 0; px < bw; px++)
{
int oOffs = oOffsBase + px;
outputUint[oOffs] = tile[py * BlockWidth + px] | a;
}
}
}
}
}
imageBaseOOffs += width * height;
}
}
width = Math.Max(1, width >> 1);
height = Math.Max(1, height >> 1);
depth = Math.Max(1, depth >> 1);
}
return output;
}
private static void DecodeBlock(Span<uint> tile, ulong block)
{
uint blockLow = (uint)(block >> 0);
uint blockHigh = (uint)(block >> 32);
uint r1, g1, b1;
uint r2, g2, b2;
bool differentialMode = (blockLow & 0x2000000) != 0;
if (differentialMode)
{
(r1, g1, b1, r2, g2, b2) = UnpackRgb555DiffEndPoints(blockLow);
if (r2 > 31)
{
DecodeBlock59T(tile, blockLow, blockHigh);
}
else if (g2 > 31)
{
DecodeBlock58H(tile, blockLow, blockHigh);
}
else if (b2 > 31)
{
DecodeBlock57P(tile, block);
}
else
{
r1 |= r1 >> 5;
g1 |= g1 >> 5;
b1 |= b1 >> 5;
r2 = (r2 << 3) | (r2 >> 2);
g2 = (g2 << 3) | (g2 >> 2);
b2 = (b2 << 3) | (b2 >> 2);
DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2);
}
}
else
{
r1 = (blockLow & 0x0000f0) >> 0;
g1 = (blockLow & 0x00f000) >> 8;
b1 = (blockLow & 0xf00000) >> 16;
r2 = (blockLow & 0x00000f) << 4;
g2 = (blockLow & 0x000f00) >> 4;
b2 = (blockLow & 0x0f0000) >> 12;
r1 |= r1 >> 4;
g1 |= g1 >> 4;
b1 |= b1 >> 4;
r2 |= r2 >> 4;
g2 |= g2 >> 4;
b2 |= b2 >> 4;
DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2);
}
}
private static void DecodeBlockPta(Span<uint> tile, ulong block)
{
uint blockLow = (uint)(block >> 0);
uint blockHigh = (uint)(block >> 32);
(uint r1, uint g1, uint b1, uint r2, uint g2, uint b2) = UnpackRgb555DiffEndPoints(blockLow);
bool fullyOpaque = (blockLow & 0x2000000) != 0;
if (fullyOpaque)
{
if (r2 > 31)
{
DecodeBlock59T(tile, blockLow, blockHigh);
}
else if (g2 > 31)
{
DecodeBlock58H(tile, blockLow, blockHigh);
}
else if (b2 > 31)
{
DecodeBlock57P(tile, block);
}
else
{
r1 |= r1 >> 5;
g1 |= g1 >> 5;
b1 |= b1 >> 5;
r2 = (r2 << 3) | (r2 >> 2);
g2 = (g2 << 3) | (g2 >> 2);
b2 = (b2 << 3) | (b2 >> 2);
DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2);
}
for (int i = 0; i < tile.Length; i++)
{
tile[i] |= AlphaMask;
}
}
else
{
if (r2 > 31)
{
DecodeBlock59T(tile, blockLow, blockHigh, AlphaMask);
}
else if (g2 > 31)
{
DecodeBlock58H(tile, blockLow, blockHigh, AlphaMask);
}
else if (b2 > 31)
{
DecodeBlock57P(tile, block);
for (int i = 0; i < tile.Length; i++)
{
tile[i] |= AlphaMask;
}
}
else
{
r1 |= r1 >> 5;
g1 |= g1 >> 5;
b1 |= b1 >> 5;
r2 = (r2 << 3) | (r2 >> 2);
g2 = (g2 << 3) | (g2 >> 2);
b2 = (b2 << 3) | (b2 >> 2);
DecodeBlockETC1(tile, blockLow, blockHigh, r1, g1, b1, r2, g2, b2, AlphaMask);
}
}
}
private static (uint, uint, uint, uint, uint, uint) UnpackRgb555DiffEndPoints(uint blockLow)
{
uint r1 = (blockLow & 0x0000f8) >> 0;
uint g1 = (blockLow & 0x00f800) >> 8;
uint b1 = (blockLow & 0xf80000) >> 16;
uint r2 = (uint)((sbyte)(r1 >> 3) + ((sbyte)((blockLow & 0x000007) << 5) >> 5));
uint g2 = (uint)((sbyte)(g1 >> 3) + ((sbyte)((blockLow & 0x000700) >> 3) >> 5));
uint b2 = (uint)((sbyte)(b1 >> 3) + ((sbyte)((blockLow & 0x070000) >> 11) >> 5));
return (r1, g1, b1, r2, g2, b2);
}
private static void DecodeBlock59T(Span<uint> tile, uint blockLow, uint blockHigh, uint alphaMask = 0)
{
uint r1 = (blockLow & 3) | ((blockLow >> 1) & 0xc);
uint g1 = (blockLow >> 12) & 0xf;
uint b1 = (blockLow >> 8) & 0xf;
uint r2 = (blockLow >> 20) & 0xf;
uint g2 = (blockLow >> 16) & 0xf;
uint b2 = (blockLow >> 28) & 0xf;
r1 |= r1 << 4;
g1 |= g1 << 4;
b1 |= b1 << 4;
r2 |= r2 << 4;
g2 |= g2 << 4;
b2 |= b2 << 4;
int dist = _etc2Lut[((blockLow >> 24) & 1) | ((blockLow >> 25) & 6)];
Span<uint> palette = stackalloc uint[4];
palette[0] = Pack(r1, g1, b1);
palette[1] = Pack(r2, g2, b2, dist);
palette[2] = Pack(r2, g2, b2);
palette[3] = Pack(r2, g2, b2, -dist);
blockHigh = BinaryPrimitives.ReverseEndianness(blockHigh);
for (int y = 0; y < BlockHeight; y++)
{
for (int x = 0; x < BlockWidth; x++)
{
int offset = (y * 4) + x;
int index = (x * 4) + y;
int paletteIndex = (int)((blockHigh >> index) & 1) | (int)((blockHigh >> (index + 15)) & 2);
tile[offset] = palette[paletteIndex];
if (alphaMask != 0)
{
if (paletteIndex == 2)
{
tile[offset] = 0;
}
else
{
tile[offset] |= alphaMask;
}
}
}
}
}
private static void DecodeBlock58H(Span<uint> tile, uint blockLow, uint blockHigh, uint alphaMask = 0)
{
uint r1 = (blockLow >> 3) & 0xf;
uint g1 = ((blockLow << 1) & 0xe) | ((blockLow >> 12) & 1);
uint b1 = ((blockLow >> 23) & 1) | ((blockLow >> 7) & 6) | ((blockLow >> 8) & 8);
uint r2 = (blockLow >> 19) & 0xf;
uint g2 = ((blockLow >> 31) & 1) | ((blockLow >> 15) & 0xe);
uint b2 = (blockLow >> 27) & 0xf;
uint rgb1 = Pack4Be(r1, g1, b1);
uint rgb2 = Pack4Be(r2, g2, b2);
r1 |= r1 << 4;
g1 |= g1 << 4;
b1 |= b1 << 4;
r2 |= r2 << 4;
g2 |= g2 << 4;
b2 |= b2 << 4;
int dist = _etc2Lut[(rgb1 >= rgb2 ? 1u : 0u) | ((blockLow >> 23) & 2) | ((blockLow >> 24) & 4)];
Span<uint> palette = stackalloc uint[4];
palette[0] = Pack(r1, g1, b1, dist);
palette[1] = Pack(r1, g1, b1, -dist);
palette[2] = Pack(r2, g2, b2, dist);
palette[3] = Pack(r2, g2, b2, -dist);
blockHigh = BinaryPrimitives.ReverseEndianness(blockHigh);
for (int y = 0; y < BlockHeight; y++)
{
for (int x = 0; x < BlockWidth; x++)
{
int offset = (y * 4) + x;
int index = (x * 4) + y;
int paletteIndex = (int)((blockHigh >> index) & 1) | (int)((blockHigh >> (index + 15)) & 2);
tile[offset] = palette[paletteIndex];
if (alphaMask != 0)
{
if (paletteIndex == 2)
{
tile[offset] = 0;
}
else
{
tile[offset] |= alphaMask;
}
}
}
}
}
private static void DecodeBlock57P(Span<uint> tile, ulong block)
{
int r0 = (int)((block >> 1) & 0x3f);
int g0 = (int)(((block >> 9) & 0x3f) | ((block & 1) << 6));
int b0 = (int)(((block >> 31) & 1) | ((block >> 15) & 6) | ((block >> 16) & 0x18) | ((block >> 3) & 0x20));
int rh = (int)(((block >> 24) & 1) | ((block >> 25) & 0x3e));
int gh = (int)((block >> 33) & 0x7f);
int bh = (int)(((block >> 43) & 0x1f) | ((block >> 27) & 0x20));
int rv = (int)(((block >> 53) & 7) | ((block >> 37) & 0x38));
int gv = (int)(((block >> 62) & 3) | ((block >> 46) & 0x7c));
int bv = (int)((block >> 56) & 0x3f);
r0 = (r0 << 2) | (r0 >> 4);
g0 = (g0 << 1) | (g0 >> 6);
b0 = (b0 << 2) | (b0 >> 4);
rh = (rh << 2) | (rh >> 4);
gh = (gh << 1) | (gh >> 6);
bh = (bh << 2) | (bh >> 4);
rv = (rv << 2) | (rv >> 4);
gv = (gv << 1) | (gv >> 6);
bv = (bv << 2) | (bv >> 4);
for (int y = 0; y < BlockHeight; y++)
{
for (int x = 0; x < BlockWidth; x++)
{
int offset = y * BlockWidth + x;
byte r = Saturate(((x * (rh - r0)) + (y * (rv - r0)) + (r0 * 4) + 2) >> 2);
byte g = Saturate(((x * (gh - g0)) + (y * (gv - g0)) + (g0 * 4) + 2) >> 2);
byte b = Saturate(((x * (bh - b0)) + (y * (bv - b0)) + (b0 * 4) + 2) >> 2);
tile[offset] = Pack(r, g, b);
}
}
}
private static void DecodeBlockETC1(
Span<uint> tile,
uint blockLow,
uint blockHigh,
uint r1,
uint g1,
uint b1,
uint r2,
uint g2,
uint b2,
uint alphaMask = 0)
{
int[] table1 = _etc1Lut[(blockLow >> 29) & 7];
int[] table2 = _etc1Lut[(blockLow >> 26) & 7];
bool flip = (blockLow & 0x1000000) != 0;
if (!flip)
{
for (int y = 0; y < BlockHeight; y++)
{
for (int x = 0; x < BlockWidth / 2; x++)
{
uint color1 = CalculatePixel(r1, g1, b1, x + 0, y, blockHigh, table1, alphaMask);
uint color2 = CalculatePixel(r2, g2, b2, x + 2, y, blockHigh, table2, alphaMask);
int offset1 = y * BlockWidth + x;
int offset2 = y * BlockWidth + x + 2;
tile[offset1] = color1;
tile[offset2] = color2;
}
}
}
else
{
for (int y = 0; y < BlockHeight / 2; y++)
{
for (int x = 0; x < BlockWidth; x++)
{
uint color1 = CalculatePixel(r1, g1, b1, x, y + 0, blockHigh, table1, alphaMask);
uint color2 = CalculatePixel(r2, g2, b2, x, y + 2, blockHigh, table2, alphaMask);
int offset1 = (y * BlockWidth) + x;
int offset2 = ((y + 2) * BlockWidth) + x;
tile[offset1] = color1;
tile[offset2] = color2;
}
}
}
}
private static uint CalculatePixel(uint r, uint g, uint b, int x, int y, uint block, int[] table, uint alphaMask)
{
int index = x * BlockHeight + y;
uint msb = block << 1;
uint tableIndex = index < 8
? ((block >> (index + 24)) & 1) + ((msb >> (index + 8)) & 2)
: ((block >> (index + 8)) & 1) + ((msb >> (index - 8)) & 2);
if (alphaMask != 0)
{
if (tableIndex == 0)
{
return Pack(r, g, b) | alphaMask;
}
else if (tableIndex == 2)
{
return 0;
}
else
{
return Pack(r, g, b, table[tableIndex]) | alphaMask;
}
}
return Pack(r, g, b, table[tableIndex]);
}
private static uint Pack(uint r, uint g, uint b, int offset)
{
r = Saturate((int)(r + offset));
g = Saturate((int)(g + offset));
b = Saturate((int)(b + offset));
return Pack(r, g, b);
}
private static uint Pack(uint r, uint g, uint b)
{
return r | (g << 8) | (b << 16);
}
private static uint Pack4Be(uint r, uint g, uint b)
{
return (r << 8) | (g << 4) | b;
}
private static byte Saturate(int value)
{
return value > byte.MaxValue ? byte.MaxValue : value < byte.MinValue ? byte.MinValue : (byte)value;
}
private static int CalculateOutputSize(int width, int height, int depth, int levels, int layers)
{
int size = 0;
for (int l = 0; l < levels; l++)
{
size += Math.Max(1, width >> l) * Math.Max(1, height >> l) * Math.Max(1, depth >> l) * layers * 4;
}
return size;
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,10 @@
namespace Ryujinx.Graphics.Texture.Encoders
{
enum EncodeMode
{
Fast,
Exhaustive,
ModeMask = 0xff,
Multithreaded = 1 << 8
}
}

View file

@ -0,0 +1,591 @@
using Ryujinx.Common;
using System;
using System.Runtime.Intrinsics;
using static Ryujinx.Graphics.Texture.BlockLinearConstants;
namespace Ryujinx.Graphics.Texture
{
public static class LayoutConverter
{
public const int HostStrideAlignment = 4;
public static void ConvertBlockLinearToLinear(
Span<byte> dst,
int width,
int height,
int stride,
int bytesPerPixel,
int gobBlocksInY,
ReadOnlySpan<byte> data)
{
int gobHeight = gobBlocksInY * GobHeight;
int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16);
int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64);
int xStart = strideTrunc / bytesPerPixel;
int outStrideGap = stride - width * bytesPerPixel;
int alignment = GobStride / bytesPerPixel;
int wAligned = BitUtils.AlignUp(width, alignment);
BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel);
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
{
fixed (byte* outputPtr = output, dataPtr = data)
{
byte* outPtr = outputPtr;
for (int y = 0; y < height; y++)
{
layoutConverter.SetY(y);
for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
{
byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset64(x);
byte* offset2 = offset + 0x20;
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
Vector128<byte> value = *(Vector128<byte>*)offset;
Vector128<byte> value2 = *(Vector128<byte>*)offset2;
Vector128<byte> value3 = *(Vector128<byte>*)offset3;
Vector128<byte> value4 = *(Vector128<byte>*)offset4;
*(Vector128<byte>*)outPtr = value;
*(Vector128<byte>*)(outPtr + 16) = value2;
*(Vector128<byte>*)(outPtr + 32) = value3;
*(Vector128<byte>*)(outPtr + 48) = value4;
}
for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
{
byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
}
for (int x = xStart; x < width; x++, outPtr += bytesPerPixel)
{
byte* offset = dataPtr + layoutConverter.GetOffset(x);
*(T*)outPtr = *(T*)offset;
}
outPtr += outStrideGap;
}
}
return true;
}
bool _ = bytesPerPixel switch
{
1 => Convert<byte>(dst, data),
2 => Convert<ushort>(dst, data),
4 => Convert<uint>(dst, data),
8 => Convert<ulong>(dst, data),
12 => Convert<Bpp12Pixel>(dst, data),
16 => Convert<Vector128<byte>>(dst, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
}
public static byte[] ConvertBlockLinearToLinear(
int width,
int height,
int depth,
int sliceDepth,
int levels,
int layers,
int blockWidth,
int blockHeight,
int bytesPerPixel,
int gobBlocksInY,
int gobBlocksInZ,
int gobBlocksInTileX,
SizeInfo sizeInfo,
ReadOnlySpan<byte> data)
{
int outSize = GetTextureSize(
width,
height,
sliceDepth,
levels,
layers,
blockWidth,
blockHeight,
bytesPerPixel);
byte[] output = new byte[outSize];
int outOffs = 0;
int mipGobBlocksInY = gobBlocksInY;
int mipGobBlocksInZ = gobBlocksInZ;
int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX;
int gobHeight = gobBlocksInY * GobHeight;
for (int level = 0; level < levels; level++)
{
int w = Math.Max(1, width >> level);
int h = Math.Max(1, height >> level);
int d = Math.Max(1, depth >> level);
w = BitUtils.DivRoundUp(w, blockWidth);
h = BitUtils.DivRoundUp(h, blockHeight);
while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1)
{
mipGobBlocksInY >>= 1;
}
while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1)
{
mipGobBlocksInZ >>= 1;
}
int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
int xStart = strideTrunc / bytesPerPixel;
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
int outStrideGap = stride - w * bytesPerPixel;
int alignment = gobWidth;
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
{
alignment = GobStride / bytesPerPixel;
}
int wAligned = BitUtils.AlignUp(w, alignment);
BlockLinearLayout layoutConverter = new BlockLinearLayout(
wAligned,
h,
mipGobBlocksInY,
mipGobBlocksInZ,
bytesPerPixel);
int sd = Math.Max(1, sliceDepth >> level);
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
{
fixed (byte* outputPtr = output, dataPtr = data)
{
byte* outPtr = outputPtr + outOffs;
for (int layer = 0; layer < layers; layer++)
{
byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
for (int z = 0; z < sd; z++)
{
layoutConverter.SetZ(z);
for (int y = 0; y < h; y++)
{
layoutConverter.SetY(y);
for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
{
byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
byte* offset2 = offset + 0x20;
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
Vector128<byte> value = *(Vector128<byte>*)offset;
Vector128<byte> value2 = *(Vector128<byte>*)offset2;
Vector128<byte> value3 = *(Vector128<byte>*)offset3;
Vector128<byte> value4 = *(Vector128<byte>*)offset4;
*(Vector128<byte>*)outPtr = value;
*(Vector128<byte>*)(outPtr + 16) = value2;
*(Vector128<byte>*)(outPtr + 32) = value3;
*(Vector128<byte>*)(outPtr + 48) = value4;
}
for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
{
byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset;
}
for (int x = xStart; x < w; x++, outPtr += bytesPerPixel)
{
byte* offset = inBaseOffset + layoutConverter.GetOffset(x);
*(T*)outPtr = *(T*)offset;
}
outPtr += outStrideGap;
}
}
}
outOffs += stride * h * d * layers;
}
return true;
}
bool _ = bytesPerPixel switch
{
1 => Convert<byte>(output, data),
2 => Convert<ushort>(output, data),
4 => Convert<uint>(output, data),
8 => Convert<ulong>(output, data),
12 => Convert<Bpp12Pixel>(output, data),
16 => Convert<Vector128<byte>>(output, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
}
return output;
}
public static byte[] ConvertLinearStridedToLinear(
int width,
int height,
int blockWidth,
int blockHeight,
int lineSize,
int stride,
int bytesPerPixel,
ReadOnlySpan<byte> data)
{
int w = BitUtils.DivRoundUp(width, blockWidth);
int h = BitUtils.DivRoundUp(height, blockHeight);
int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
lineSize = Math.Min(lineSize, outStride);
byte[] output = new byte[h * outStride];
Span<byte> outSpan = output;
int outOffs = 0;
int inOffs = 0;
for (int y = 0; y < h; y++)
{
data.Slice(inOffs, lineSize).CopyTo(outSpan.Slice(outOffs, lineSize));
inOffs += stride;
outOffs += outStride;
}
return output;
}
public static void ConvertLinearToBlockLinear(
Span<byte> dst,
int width,
int height,
int stride,
int bytesPerPixel,
int gobBlocksInY,
ReadOnlySpan<byte> data)
{
int gobHeight = gobBlocksInY * GobHeight;
int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16);
int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64);
int xStart = strideTrunc / bytesPerPixel;
int inStrideGap = stride - width * bytesPerPixel;
int alignment = GobStride / bytesPerPixel;
int wAligned = BitUtils.AlignUp(width, alignment);
BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel);
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
{
fixed (byte* outputPtr = output, dataPtr = data)
{
byte* inPtr = dataPtr;
for (int y = 0; y < height; y++)
{
layoutConverter.SetY(y);
for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
{
byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset64(x);
byte* offset2 = offset + 0x20;
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
Vector128<byte> value = *(Vector128<byte>*)inPtr;
Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16);
Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32);
Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48);
*(Vector128<byte>*)offset = value;
*(Vector128<byte>*)offset2 = value2;
*(Vector128<byte>*)offset3 = value3;
*(Vector128<byte>*)offset4 = value4;
}
for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
{
byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
}
for (int x = xStart; x < width; x++, inPtr += bytesPerPixel)
{
byte* offset = outputPtr + layoutConverter.GetOffset(x);
*(T*)offset = *(T*)inPtr;
}
inPtr += inStrideGap;
}
}
return true;
}
bool _ = bytesPerPixel switch
{
1 => Convert<byte>(dst, data),
2 => Convert<ushort>(dst, data),
4 => Convert<uint>(dst, data),
8 => Convert<ulong>(dst, data),
12 => Convert<Bpp12Pixel>(dst, data),
16 => Convert<Vector128<byte>>(dst, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
}
public static ReadOnlySpan<byte> ConvertLinearToBlockLinear(
Span<byte> output,
int width,
int height,
int depth,
int sliceDepth,
int levels,
int layers,
int blockWidth,
int blockHeight,
int bytesPerPixel,
int gobBlocksInY,
int gobBlocksInZ,
int gobBlocksInTileX,
SizeInfo sizeInfo,
ReadOnlySpan<byte> data)
{
if (output.Length == 0)
{
output = new byte[sizeInfo.TotalSize];
}
int inOffs = 0;
int mipGobBlocksInY = gobBlocksInY;
int mipGobBlocksInZ = gobBlocksInZ;
int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX;
int gobHeight = gobBlocksInY * GobHeight;
for (int level = 0; level < levels; level++)
{
int w = Math.Max(1, width >> level);
int h = Math.Max(1, height >> level);
int d = Math.Max(1, depth >> level);
w = BitUtils.DivRoundUp(w, blockWidth);
h = BitUtils.DivRoundUp(h, blockHeight);
while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1)
{
mipGobBlocksInY >>= 1;
}
while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1)
{
mipGobBlocksInZ >>= 1;
}
int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);
int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);
int xStart = strideTrunc / bytesPerPixel;
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
int inStrideGap = stride - w * bytesPerPixel;
int alignment = gobWidth;
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
{
alignment = GobStride / bytesPerPixel;
}
int wAligned = BitUtils.AlignUp(w, alignment);
BlockLinearLayout layoutConverter = new BlockLinearLayout(
wAligned,
h,
mipGobBlocksInY,
mipGobBlocksInZ,
bytesPerPixel);
int sd = Math.Max(1, sliceDepth >> level);
unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged
{
fixed (byte* outputPtr = output, dataPtr = data)
{
byte* inPtr = dataPtr + inOffs;
for (int layer = 0; layer < layers; layer++)
{
byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));
for (int z = 0; z < sd; z++)
{
layoutConverter.SetZ(z);
for (int y = 0; y < h; y++)
{
layoutConverter.SetY(y);
for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
{
byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
byte* offset2 = offset + 0x20;
byte* offset3 = offset + 0x100;
byte* offset4 = offset + 0x120;
Vector128<byte> value = *(Vector128<byte>*)inPtr;
Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16);
Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32);
Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48);
*(Vector128<byte>*)offset = value;
*(Vector128<byte>*)offset2 = value2;
*(Vector128<byte>*)offset3 = value3;
*(Vector128<byte>*)offset4 = value4;
}
for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
{
byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);
*(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr;
}
for (int x = xStart; x < w; x++, inPtr += bytesPerPixel)
{
byte* offset = outBaseOffset + layoutConverter.GetOffset(x);
*(T*)offset = *(T*)inPtr;
}
inPtr += inStrideGap;
}
}
}
inOffs += stride * h * d * layers;
}
return true;
}
bool _ = bytesPerPixel switch
{
1 => Convert<byte>(output, data),
2 => Convert<ushort>(output, data),
4 => Convert<uint>(output, data),
8 => Convert<ulong>(output, data),
12 => Convert<Bpp12Pixel>(output, data),
16 => Convert<Vector128<byte>>(output, data),
_ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
};
}
return output;
}
public static ReadOnlySpan<byte> ConvertLinearToLinearStrided(
Span<byte> output,
int width,
int height,
int blockWidth,
int blockHeight,
int stride,
int bytesPerPixel,
ReadOnlySpan<byte> data)
{
int w = BitUtils.DivRoundUp(width, blockWidth);
int h = BitUtils.DivRoundUp(height, blockHeight);
int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
int lineSize = width * bytesPerPixel;
if (inStride == stride)
{
if (output.Length != 0)
{
data.CopyTo(output);
return output;
}
else
{
return data;
}
}
if (output.Length == 0)
{
output = new byte[h * stride];
}
int inOffs = 0;
int outOffs = 0;
for (int y = 0; y < h; y++)
{
data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize));
inOffs += inStride;
outOffs += stride;
}
return output;
}
private static int GetTextureSize(
int width,
int height,
int depth,
int levels,
int layers,
int blockWidth,
int blockHeight,
int bytesPerPixel)
{
int layerSize = 0;
for (int level = 0; level < levels; level++)
{
int w = Math.Max(1, width >> level);
int h = Math.Max(1, height >> level);
int d = Math.Max(1, depth >> level);
w = BitUtils.DivRoundUp(w, blockWidth);
h = BitUtils.DivRoundUp(h, blockHeight);
int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);
layerSize += stride * h * d;
}
return layerSize * layers;
}
}
}

View file

@ -0,0 +1,141 @@
using Ryujinx.Common;
using System;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Texture.BlockLinearConstants;
namespace Ryujinx.Graphics.Texture
{
public class OffsetCalculator
{
private int _width;
private int _height;
private int _stride;
private bool _isLinear;
private int _bytesPerPixel;
private BlockLinearLayout _layoutConverter;
// Variables for built in iteration.
private int _yPart;
public OffsetCalculator(
int width,
int height,
int stride,
bool isLinear,
int gobBlocksInY,
int gobBlocksInZ,
int bytesPerPixel)
{
_width = width;
_height = height;
_stride = stride;
_isLinear = isLinear;
_bytesPerPixel = bytesPerPixel;
int wAlignment = GobStride / bytesPerPixel;
int wAligned = BitUtils.AlignUp(width, wAlignment);
if (!isLinear)
{
_layoutConverter = new BlockLinearLayout(
wAligned,
height,
gobBlocksInY,
gobBlocksInZ,
bytesPerPixel);
}
}
public OffsetCalculator(
int width,
int height,
int stride,
bool isLinear,
int gobBlocksInY,
int bytesPerPixel) : this(width, height, stride, isLinear, gobBlocksInY, 1, bytesPerPixel)
{
}
public void SetY(int y)
{
if (_isLinear)
{
_yPart = y * _stride;
}
else
{
_layoutConverter.SetY(y);
}
}
public int GetOffset(int x, int y)
{
if (_isLinear)
{
return x * _bytesPerPixel + y * _stride;
}
else
{
return _layoutConverter.GetOffset(x, y, 0);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffset(int x)
{
if (_isLinear)
{
return x * _bytesPerPixel + _yPart;
}
else
{
return _layoutConverter.GetOffset(x);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetOffsetWithLineOffset64(int x)
{
if (_isLinear)
{
return x + _yPart;
}
else
{
return _layoutConverter.GetOffsetWithLineOffset64(x);
}
}
public (int offset, int size) GetRectangleRange(int x, int y, int width, int height)
{
if (_isLinear)
{
int start = y * Math.Abs(_stride) + x * _bytesPerPixel;
int end = (y + height - 1) * Math.Abs(_stride) + (x + width) * _bytesPerPixel;
return (y * _stride + x * _bytesPerPixel, end - start);
}
else
{
return _layoutConverter.GetRectangleRange(x, y, width, height);
}
}
public bool LayoutMatches(OffsetCalculator other)
{
if (_isLinear)
{
return other._isLinear &&
_width == other._width &&
_height == other._height &&
_stride == other._stride &&
_bytesPerPixel == other._bytesPerPixel;
}
else
{
return !other._isLinear && _layoutConverter.LayoutMatches(other._layoutConverter);
}
}
}
}

View file

@ -0,0 +1,216 @@
using Ryujinx.Common;
using System;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Graphics.Texture
{
public static class PixelConverter
{
private static (int remainder, int outRemainder, int height) GetLineRemainders(int length, int width, int bpp, int outBpp)
{
int stride = BitUtils.AlignUp(width * bpp, LayoutConverter.HostStrideAlignment);
int remainder = stride / bpp - width;
int outStride = BitUtils.AlignUp(width * outBpp, LayoutConverter.HostStrideAlignment);
int outRemainder = outStride / outBpp - width;
return (remainder, outRemainder, length / stride);
}
public unsafe static byte[] ConvertR4G4ToR4G4B4A4(ReadOnlySpan<byte> data, int width)
{
byte[] output = new byte[data.Length * 2];
(int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 1, 2);
Span<ushort> outputSpan = MemoryMarshal.Cast<byte, ushort>(output);
if (remainder == 0)
{
int start = 0;
if (Sse41.IsSupported)
{
int sizeTrunc = data.Length & ~7;
start = sizeTrunc;
fixed (byte* inputPtr = data, outputPtr = output)
{
for (ulong offset = 0; offset < (ulong)sizeTrunc; offset += 8)
{
Sse2.Store(outputPtr + offset * 2, Sse41.ConvertToVector128Int16(inputPtr + offset).AsByte());
}
}
}
for (int i = start; i < data.Length; i++)
{
outputSpan[i] = (ushort)data[i];
}
}
else
{
int offset = 0;
int outOffset = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
outputSpan[outOffset++] = data[offset++];
}
offset += remainder;
outOffset += outRemainder;
}
}
return output;
}
public unsafe static byte[] ConvertR5G6B5ToR8G8B8A8(ReadOnlySpan<byte> data, int width)
{
byte[] output = new byte[data.Length * 2];
int offset = 0;
int outOffset = 0;
(int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4);
ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data);
Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
uint packed = inputSpan[offset++];
uint outputPacked = 0xff000000;
outputPacked |= (packed << 3) & 0x000000f8;
outputPacked |= (packed << 8) & 0x00f80000;
// Replicate 5 bit components.
outputPacked |= (outputPacked >> 5) & 0x00070007;
// Include and replicate 6 bit component.
outputPacked |= ((packed << 5) & 0x0000fc00) | ((packed >> 1) & 0x00000300);
outputSpan[outOffset++] = outputPacked;
}
offset += remainder;
outOffset += outRemainder;
}
return output;
}
public unsafe static byte[] ConvertR5G5B5ToR8G8B8A8(ReadOnlySpan<byte> data, int width, bool forceAlpha)
{
byte[] output = new byte[data.Length * 2];
int offset = 0;
int outOffset = 0;
(int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4);
ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data);
Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
uint packed = inputSpan[offset++];
uint a = forceAlpha ? 1 : (packed >> 15);
uint outputPacked = a * 0xff000000;
outputPacked |= (packed << 3) & 0x000000f8;
outputPacked |= (packed << 6) & 0x0000f800;
outputPacked |= (packed << 9) & 0x00f80000;
// Replicate 5 bit components.
outputPacked |= (outputPacked >> 5) & 0x00070707;
outputSpan[outOffset++] = outputPacked;
}
offset += remainder;
outOffset += outRemainder;
}
return output;
}
public unsafe static byte[] ConvertA1B5G5R5ToR8G8B8A8(ReadOnlySpan<byte> data, int width)
{
byte[] output = new byte[data.Length * 2];
int offset = 0;
int outOffset = 0;
(int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4);
ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data);
Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
uint packed = inputSpan[offset++];
uint a = packed >> 15;
uint outputPacked = a * 0xff000000;
outputPacked |= (packed >> 8) & 0x000000f8;
outputPacked |= (packed << 5) & 0x0000f800;
outputPacked |= (packed << 18) & 0x00f80000;
// Replicate 5 bit components.
outputPacked |= (outputPacked >> 5) & 0x00070707;
outputSpan[outOffset++] = outputPacked;
}
offset += remainder;
outOffset += outRemainder;
}
return output;
}
public unsafe static byte[] ConvertR4G4B4A4ToR8G8B8A8(ReadOnlySpan<byte> data, int width)
{
byte[] output = new byte[data.Length * 2];
int offset = 0;
int outOffset = 0;
(int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4);
ReadOnlySpan<ushort> inputSpan = MemoryMarshal.Cast<byte, ushort>(data);
Span<uint> outputSpan = MemoryMarshal.Cast<byte, uint>(output);
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
uint packed = inputSpan[offset++];
uint outputPacked = packed & 0x0000000f;
outputPacked |= (packed << 4) & 0x00000f00;
outputPacked |= (packed << 8) & 0x000f0000;
outputPacked |= (packed << 12) & 0x0f000000;
outputSpan[outOffset++] = outputPacked * 0x11;
}
offset += remainder;
outOffset += outRemainder;
}
return output;
}
}
}

View file

@ -0,0 +1,14 @@
namespace Ryujinx.Graphics.Texture
{
public readonly struct Region
{
public int Offset { get; }
public int Size { get; }
public Region(int offset, int size)
{
Offset = offset;
Size = size;
}
}
}

View file

@ -0,0 +1,11 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
</ItemGroup>
</Project>

View file

@ -0,0 +1,16 @@
namespace Ryujinx.Graphics.Texture
{
public readonly struct Size
{
public int Width { get; }
public int Height { get; }
public int Depth { get; }
public Size(int width, int height, int depth)
{
Width = width;
Height = height;
Depth = depth;
}
}
}

View file

@ -0,0 +1,287 @@
using Ryujinx.Common;
using System;
using static Ryujinx.Graphics.Texture.BlockLinearConstants;
namespace Ryujinx.Graphics.Texture
{
public static class SizeCalculator
{
private const int StrideAlignment = 32;
private static int Calculate3DOffsetCount(int levels, int depth)
{
int offsetCount = depth;
while (--levels > 0)
{
depth = Math.Max(1, depth >> 1);
offsetCount += depth;
}
return offsetCount;
}
public static SizeInfo GetBlockLinearTextureSize(
int width,
int height,
int depth,
int levels,
int layers,
int blockWidth,
int blockHeight,
int bytesPerPixel,
int gobBlocksInY,
int gobBlocksInZ,
int gobBlocksInTileX,
int gpuLayerSize = 0)
{
bool is3D = depth > 1;
int layerSize = 0;
int[] allOffsets = new int[is3D ? Calculate3DOffsetCount(levels, depth) : levels * layers * depth];
int[] mipOffsets = new int[levels];
int[] sliceSizes = new int[levels];
int[] levelSizes = new int[levels];
int mipGobBlocksInY = gobBlocksInY;
int mipGobBlocksInZ = gobBlocksInZ;
int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX;
int gobHeight = gobBlocksInY * GobHeight;
int depthLevelOffset = 0;
for (int level = 0; level < levels; level++)
{
int w = Math.Max(1, width >> level);
int h = Math.Max(1, height >> level);
int d = Math.Max(1, depth >> level);
w = BitUtils.DivRoundUp(w, blockWidth);
h = BitUtils.DivRoundUp(h, blockHeight);
while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1)
{
mipGobBlocksInY >>= 1;
}
while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1)
{
mipGobBlocksInZ >>= 1;
}
int widthInGobs = BitUtils.DivRoundUp(w * bytesPerPixel, GobStride);
int alignment = gobBlocksInTileX;
if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
{
alignment = 1;
}
widthInGobs = BitUtils.AlignUp(widthInGobs, alignment);
int totalBlocksOfGobsInZ = BitUtils.DivRoundUp(d, mipGobBlocksInZ);
int totalBlocksOfGobsInY = BitUtils.DivRoundUp(BitUtils.DivRoundUp(h, GobHeight), mipGobBlocksInY);
int robSize = widthInGobs * mipGobBlocksInY * mipGobBlocksInZ * GobSize;
if (is3D)
{
int gobSize = mipGobBlocksInY * GobSize;
int sliceSize = totalBlocksOfGobsInY * widthInGobs * gobSize;
int baseOffset = layerSize;
int mask = gobBlocksInZ - 1;
for (int z = 0; z < d; z++)
{
int zLow = z & mask;
int zHigh = z & ~mask;
allOffsets[z + depthLevelOffset] = baseOffset + zLow * gobSize + zHigh * sliceSize;
}
}
mipOffsets[level] = layerSize;
sliceSizes[level] = totalBlocksOfGobsInY * robSize;
levelSizes[level] = totalBlocksOfGobsInZ * sliceSizes[level];
layerSize += levelSizes[level];
depthLevelOffset += d;
}
if (layers > 1)
{
layerSize = AlignLayerSize(
layerSize,
height,
depth,
blockHeight,
gobBlocksInY,
gobBlocksInZ,
gobBlocksInTileX);
}
int totalSize;
if (layerSize < gpuLayerSize)
{
totalSize = (layers - 1) * gpuLayerSize + layerSize;
layerSize = gpuLayerSize;
}
else
{
totalSize = layerSize * layers;
}
if (!is3D)
{
for (int layer = 0; layer < layers; layer++)
{
int baseIndex = layer * levels;
int baseOffset = layer * layerSize;
for (int level = 0; level < levels; level++)
{
allOffsets[baseIndex + level] = baseOffset + mipOffsets[level];
}
}
}
return new SizeInfo(mipOffsets, allOffsets, sliceSizes, levelSizes, depth, levels, layerSize, totalSize, is3D);
}
public static SizeInfo GetLinearTextureSize(int stride, int height, int blockHeight)
{
// Non-2D or mipmapped linear textures are not supported by the Switch GPU,
// so we only need to handle a single case (2D textures without mipmaps).
int totalSize = stride * BitUtils.DivRoundUp(height, blockHeight);
return new SizeInfo(totalSize);
}
private static int AlignLayerSize(
int size,
int height,
int depth,
int blockHeight,
int gobBlocksInY,
int gobBlocksInZ,
int gobBlocksInTileX)
{
if (gobBlocksInTileX < 2)
{
height = BitUtils.DivRoundUp(height, blockHeight);
while (height <= (gobBlocksInY >> 1) * GobHeight && gobBlocksInY != 1)
{
gobBlocksInY >>= 1;
}
while (depth <= (gobBlocksInZ >> 1) && gobBlocksInZ != 1)
{
gobBlocksInZ >>= 1;
}
int blockOfGobsSize = gobBlocksInY * gobBlocksInZ * GobSize;
int sizeInBlockOfGobs = size / blockOfGobsSize;
if (size != sizeInBlockOfGobs * blockOfGobsSize)
{
size = (sizeInBlockOfGobs + 1) * blockOfGobsSize;
}
}
else
{
int alignment = (gobBlocksInTileX * GobSize) * gobBlocksInY * gobBlocksInZ;
size = BitUtils.AlignUp(size, alignment);
}
return size;
}
public static Size GetBlockLinearAlignedSize(
int width,
int height,
int depth,
int blockWidth,
int blockHeight,
int bytesPerPixel,
int gobBlocksInY,
int gobBlocksInZ,
int gobBlocksInTileX)
{
width = BitUtils.DivRoundUp(width, blockWidth);
height = BitUtils.DivRoundUp(height, blockHeight);
int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX;
int gobHeight = gobBlocksInY * GobHeight;
int alignment = gobWidth;
if (depth < gobBlocksInZ || width <= gobWidth || height <= gobHeight)
{
alignment = GobStride / bytesPerPixel;
}
// Height has already been divided by block height, so pass it as 1.
(gobBlocksInY, gobBlocksInZ) = GetMipGobBlockSizes(height, depth, 1, gobBlocksInY, gobBlocksInZ);
int blockOfGobsHeight = gobBlocksInY * GobHeight;
int blockOfGobsDepth = gobBlocksInZ;
width = BitUtils.AlignUp(width, alignment);
height = BitUtils.AlignUp(height, blockOfGobsHeight);
depth = BitUtils.AlignUp(depth, blockOfGobsDepth);
return new Size(width, height, depth);
}
public static Size GetLinearAlignedSize(
int width,
int height,
int blockWidth,
int blockHeight,
int bytesPerPixel)
{
width = BitUtils.DivRoundUp(width, blockWidth);
height = BitUtils.DivRoundUp(height, blockHeight);
int widthAlignment = StrideAlignment / bytesPerPixel;
width = BitUtils.AlignUp(width, widthAlignment);
return new Size(width, height, 1);
}
public static (int, int) GetMipGobBlockSizes(
int height,
int depth,
int blockHeight,
int gobBlocksInY,
int gobBlocksInZ)
{
height = BitUtils.DivRoundUp(height, blockHeight);
while (height <= (gobBlocksInY >> 1) * GobHeight && gobBlocksInY != 1)
{
gobBlocksInY >>= 1;
}
while (depth <= (gobBlocksInZ >> 1) && gobBlocksInZ != 1)
{
gobBlocksInZ >>= 1;
}
return (gobBlocksInY, gobBlocksInZ);
}
}
}

View file

@ -0,0 +1,119 @@
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Texture
{
public readonly struct SizeInfo
{
private readonly int[] _mipOffsets;
private readonly int _levels;
private readonly int _depth;
private readonly bool _is3D;
public readonly int[] AllOffsets;
public readonly int[] SliceSizes;
public readonly int[] LevelSizes;
public int LayerSize { get; }
public int TotalSize { get; }
public SizeInfo(int size)
{
_mipOffsets = new int[] { 0 };
AllOffsets = new int[] { 0 };
SliceSizes = new int[] { size };
LevelSizes = new int[] { size };
_depth = 1;
_levels = 1;
LayerSize = size;
TotalSize = size;
_is3D = false;
}
internal SizeInfo(
int[] mipOffsets,
int[] allOffsets,
int[] sliceSizes,
int[] levelSizes,
int depth,
int levels,
int layerSize,
int totalSize,
bool is3D)
{
_mipOffsets = mipOffsets;
AllOffsets = allOffsets;
SliceSizes = sliceSizes;
LevelSizes = levelSizes;
_depth = depth;
_levels = levels;
LayerSize = layerSize;
TotalSize = totalSize;
_is3D = is3D;
}
public int GetMipOffset(int level)
{
if ((uint)level >= _mipOffsets.Length)
{
throw new ArgumentOutOfRangeException(nameof(level));
}
return _mipOffsets[level];
}
public bool FindView(int offset, out int firstLayer, out int firstLevel)
{
int index = Array.BinarySearch(AllOffsets, offset);
if (index < 0)
{
firstLayer = 0;
firstLevel = 0;
return false;
}
if (_is3D)
{
firstLayer = index;
firstLevel = 0;
int levelDepth = _depth;
while (firstLayer >= levelDepth)
{
firstLayer -= levelDepth;
firstLevel++;
levelDepth = Math.Max(levelDepth >> 1, 1);
}
}
else
{
firstLayer = index / _levels;
firstLevel = index - (firstLayer * _levels);
}
return true;
}
public IEnumerable<Region> AllRegions()
{
if (_is3D)
{
for (int i = 0; i < _mipOffsets.Length; i++)
{
int maxSize = TotalSize - _mipOffsets[i];
yield return new Region(_mipOffsets[i], Math.Min(maxSize, LevelSizes[i]));
}
}
else
{
for (int i = 0; i < AllOffsets.Length; i++)
{
yield return new Region(AllOffsets[i], SliceSizes[i % _levels]);
}
}
}
}
}

View file

@ -0,0 +1,297 @@
namespace Ryujinx.Graphics.Texture.Utils
{
static class BC67Tables
{
public static readonly BC7ModeInfo[] BC7ModeInfos = new BC7ModeInfo[]
{
new BC7ModeInfo(3, 4, 6, 0, 0, 3, 0, 4, 0),
new BC7ModeInfo(2, 6, 2, 0, 0, 3, 0, 6, 0),
new BC7ModeInfo(3, 6, 0, 0, 0, 2, 0, 5, 0),
new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 7, 0),
new BC7ModeInfo(1, 0, 0, 2, 1, 2, 3, 5, 6),
new BC7ModeInfo(1, 0, 0, 2, 0, 2, 2, 7, 8),
new BC7ModeInfo(1, 0, 2, 0, 0, 4, 0, 7, 7),
new BC7ModeInfo(2, 6, 4, 0, 0, 2, 0, 5, 5)
};
public static readonly byte[][] Weights =
{
new byte[] { 0, 21, 43, 64 },
new byte[] { 0, 9, 18, 27, 37, 46, 55, 64 },
new byte[] { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }
};
public static readonly byte[][] InverseWeights =
{
new byte[] { 64, 43, 21, 0 },
new byte[] { 64, 55, 46, 37, 27, 18, 9, 0 },
new byte[] { 64, 60, 55, 51, 47, 43, 38, 34, 30, 26, 21, 17, 13, 9, 4, 0 }
};
public static readonly byte[][][] FixUpIndices = new byte[3][][]
{
new byte[64][]
{
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 },
new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }, new byte[] { 0, 0, 0 }
},
new byte[64][]
{
new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 },
new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 15, 0 },
new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 },
new byte[] { 0, 8, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 6, 0 }, new byte[] { 0, 8, 0 },
new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 },
new byte[] { 0, 2, 0 }, new byte[] { 0, 8, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 },
new byte[] { 0, 2, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 6, 0 },
new byte[] { 0, 6, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 6, 0 }, new byte[] { 0, 8, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 }, new byte[] { 0, 15, 0 },
new byte[] { 0, 15, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 2, 0 }, new byte[] { 0, 15, 0 }
},
new byte[64][]
{
new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 15, 8 }, new byte[] { 0, 15, 3 },
new byte[] { 0, 8, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 15, 8 },
new byte[] { 0, 8, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 6, 15 }, new byte[] { 0, 6, 15 },
new byte[] { 0, 6, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 },
new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 15, 3 },
new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }, new byte[] { 0, 6, 15 }, new byte[] { 0, 10, 8 },
new byte[] { 0, 5, 3 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 8, 6 }, new byte[] { 0, 6, 10 },
new byte[] { 0, 8, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 15, 10 }, new byte[] { 0, 15, 8 },
new byte[] { 0, 8, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 5, 10 },
new byte[] { 0, 6, 10 }, new byte[] { 0, 10, 8 }, new byte[] { 0, 8, 9 }, new byte[] { 0, 15, 10 },
new byte[] { 0, 15, 6 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 8 }, new byte[] { 0, 5, 15 },
new byte[] { 0, 15, 3 }, new byte[] { 0, 15, 6 }, new byte[] { 0, 15, 6 }, new byte[] { 0, 15, 8 },
new byte[] { 0, 3, 15 }, new byte[] { 0, 15, 3 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 5, 15 },
new byte[] { 0, 5, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 5, 15 }, new byte[] { 0, 10, 15 },
new byte[] { 0, 5, 15 }, new byte[] { 0, 10, 15 }, new byte[] { 0, 8, 15 }, new byte[] { 0, 13, 15 },
new byte[] { 0, 15, 3 }, new byte[] { 0, 12, 15 }, new byte[] { 0, 3, 15 }, new byte[] { 0, 3, 8 }
}
};
public static readonly byte[][][] PartitionTable = new byte[3][][]
{
new byte[64][]
{
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 0
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 1
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 2
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 3
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 4
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 5
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 6
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 7
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 8
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 9
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 10
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 11
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 12
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 13
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 14
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 15
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 16
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 17
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 18
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 19
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 20
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 21
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 22
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 23
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 24
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 25
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 26
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 27
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 28
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 29
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 30
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 31
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 32
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 33
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 34
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 35
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 36
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 37
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 38
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 39
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 40
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 41
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 42
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 43
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 44
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 45
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 46
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 47
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 48
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 49
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 50
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 51
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 52
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 53
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 54
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 55
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 56
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 57
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 58
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 59
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 60
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 61
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // 62
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } // 63
},
new byte[64][]
{
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // 0
new byte[16] { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // 1
new byte[16] { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // 2
new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // 3
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // 4
new byte[16] { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 5
new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 6
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // 7
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // 8
new byte[16] { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 9
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // 10
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // 11
new byte[16] { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 12
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // 13
new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // 14
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // 15
new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // 16
new byte[16] { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // 17
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // 18
new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // 19
new byte[16] { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // 20
new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // 21
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // 22
new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // 23
new byte[16] { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // 24
new byte[16] { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // 25
new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // 26
new byte[16] { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // 27
new byte[16] { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // 28
new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // 29
new byte[16] { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // 30
new byte[16] { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // 31
new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // 32
new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // 33
new byte[16] { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // 34
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // 35
new byte[16] { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // 36
new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // 37
new byte[16] { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // 38
new byte[16] { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // 39
new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // 40
new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // 41
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // 42
new byte[16] { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // 43
new byte[16] { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // 44
new byte[16] { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // 45
new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // 46
new byte[16] { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // 47
new byte[16] { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // 48
new byte[16] { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // 49
new byte[16] { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // 50
new byte[16] { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // 51
new byte[16] { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // 52
new byte[16] { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // 53
new byte[16] { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // 54
new byte[16] { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // 55
new byte[16] { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // 56
new byte[16] { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // 57
new byte[16] { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // 58
new byte[16] { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // 59
new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // 60
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // 61
new byte[16] { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // 62
new byte[16] { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 } // 63
},
new byte[64][]
{
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // 0
new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // 1
new byte[16] { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // 2
new byte[16] { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // 3
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // 4
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // 5
new byte[16] { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // 6
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // 7
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // 8
new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // 9
new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // 10
new byte[16] { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // 11
new byte[16] { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // 12
new byte[16] { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // 13
new byte[16] { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // 14
new byte[16] { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // 15
new byte[16] { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // 16
new byte[16] { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // 17
new byte[16] { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // 18
new byte[16] { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // 19
new byte[16] { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // 20
new byte[16] { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // 21
new byte[16] { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // 22
new byte[16] { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // 23
new byte[16] { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // 24
new byte[16] { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // 25
new byte[16] { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // 26
new byte[16] { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // 27
new byte[16] { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // 28
new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // 29
new byte[16] { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // 30
new byte[16] { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // 31
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // 32
new byte[16] { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // 33
new byte[16] { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // 34
new byte[16] { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // 35
new byte[16] { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // 36
new byte[16] { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // 37
new byte[16] { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // 38
new byte[16] { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // 39
new byte[16] { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // 40
new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // 41
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // 42
new byte[16] { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // 43
new byte[16] { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // 44
new byte[16] { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // 45
new byte[16] { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // 46
new byte[16] { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // 47
new byte[16] { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // 48
new byte[16] { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // 49
new byte[16] { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // 50
new byte[16] { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // 51
new byte[16] { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // 52
new byte[16] { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // 53
new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // 54
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // 55
new byte[16] { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // 56
new byte[16] { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // 57
new byte[16] { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // 58
new byte[16] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // 59
new byte[16] { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // 60
new byte[16] { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // 61
new byte[16] { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // 62
new byte[16] { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 } // 63
}
};
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,37 @@
namespace Ryujinx.Graphics.Texture.Utils
{
readonly struct BC7ModeInfo
{
public readonly int SubsetCount;
public readonly int PartitionBitCount;
public readonly int PBits;
public readonly int RotationBitCount;
public readonly int IndexModeBitCount;
public readonly int ColorIndexBitCount;
public readonly int AlphaIndexBitCount;
public readonly int ColorDepth;
public readonly int AlphaDepth;
public BC7ModeInfo(
int subsetCount,
int partitionBitsCount,
int pBits,
int rotationBitCount,
int indexModeBitCount,
int colorIndexBitCount,
int alphaIndexBitCount,
int colorDepth,
int alphaDepth)
{
SubsetCount = subsetCount;
PartitionBitCount = partitionBitsCount;
PBits = pBits;
RotationBitCount = rotationBitCount;
IndexModeBitCount = indexModeBitCount;
ColorIndexBitCount = colorIndexBitCount;
AlphaIndexBitCount = alphaIndexBitCount;
ColorDepth = colorDepth;
AlphaDepth = alphaDepth;
}
}
}

View file

@ -0,0 +1,55 @@
namespace Ryujinx.Graphics.Texture.Utils
{
struct Block
{
public ulong Low;
public ulong High;
public void Encode(ulong value, ref int offset, int bits)
{
if (offset >= 64)
{
High |= value << (offset - 64);
}
else
{
Low |= value << offset;
if (offset + bits > 64)
{
int remainder = 64 - offset;
High |= value >> remainder;
}
}
offset += bits;
}
public ulong Decode(ref int offset, int bits)
{
ulong value;
ulong mask = bits == 64 ? ulong.MaxValue : (1UL << bits) - 1;
if (offset >= 64)
{
value = (High >> (offset - 64)) & mask;
}
else
{
value = Low >> offset;
if (offset + bits > 64)
{
int remainder = 64 - offset;
value |= High << remainder;
}
value &= mask;
}
offset += bits;
return value;
}
}
}

View file

@ -0,0 +1,229 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Graphics.Texture.Utils
{
struct RgbaColor32 : IEquatable<RgbaColor32>
{
private Vector128<int> _color;
public int R
{
get => _color.GetElement(0);
set => _color = _color.WithElement(0, value);
}
public int G
{
get => _color.GetElement(1);
set => _color = _color.WithElement(1, value);
}
public int B
{
get => _color.GetElement(2);
set => _color = _color.WithElement(2, value);
}
public int A
{
get => _color.GetElement(3);
set => _color = _color.WithElement(3, value);
}
public RgbaColor32(Vector128<int> color)
{
_color = color;
}
public RgbaColor32(int r, int g, int b, int a)
{
_color = Vector128.Create(r, g, b, a);
}
public RgbaColor32(int scalar)
{
_color = Vector128.Create(scalar);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static RgbaColor32 operator +(RgbaColor32 x, RgbaColor32 y)
{
if (Sse2.IsSupported)
{
return new RgbaColor32(Sse2.Add(x._color, y._color));
}
else
{
return new RgbaColor32(x.R + y.R, x.G + y.G, x.B + y.B, x.A + y.A);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static RgbaColor32 operator -(RgbaColor32 x, RgbaColor32 y)
{
if (Sse2.IsSupported)
{
return new RgbaColor32(Sse2.Subtract(x._color, y._color));
}
else
{
return new RgbaColor32(x.R - y.R, x.G - y.G, x.B - y.B, x.A - y.A);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static RgbaColor32 operator *(RgbaColor32 x, RgbaColor32 y)
{
if (Sse41.IsSupported)
{
return new RgbaColor32(Sse41.MultiplyLow(x._color, y._color));
}
else
{
return new RgbaColor32(x.R * y.R, x.G * y.G, x.B * y.B, x.A * y.A);
}
}
public static RgbaColor32 operator /(RgbaColor32 x, RgbaColor32 y)
{
return new RgbaColor32(x.R / y.R, x.G / y.G, x.B / y.B, x.A / y.A);
}
public static RgbaColor32 DivideGuarded(RgbaColor32 x, RgbaColor32 y, int resultIfZero)
{
return new RgbaColor32(
DivideGuarded(x.R, y.R, resultIfZero),
DivideGuarded(x.G, y.G, resultIfZero),
DivideGuarded(x.B, y.B, resultIfZero),
DivideGuarded(x.A, y.A, resultIfZero));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static RgbaColor32 operator <<(RgbaColor32 x, int shift)
{
if (Sse2.IsSupported)
{
return new RgbaColor32(Sse2.ShiftLeftLogical(x._color, (byte)shift));
}
else
{
return new RgbaColor32(x.R << shift, x.G << shift, x.B << shift, x.A << shift);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static RgbaColor32 operator >>(RgbaColor32 x, int shift)
{
if (Sse2.IsSupported)
{
return new RgbaColor32(Sse2.ShiftRightLogical(x._color, (byte)shift));
}
else
{
return new RgbaColor32(x.R >> shift, x.G >> shift, x.B >> shift, x.A >> shift);
}
}
public static bool operator ==(RgbaColor32 x, RgbaColor32 y)
{
return x.Equals(y);
}
public static bool operator !=(RgbaColor32 x, RgbaColor32 y)
{
return !x.Equals(y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Dot(RgbaColor32 x, RgbaColor32 y)
{
if (Sse41.IsSupported)
{
Vector128<int> product = Sse41.MultiplyLow(x._color, y._color);
Vector128<int> sum = Ssse3.HorizontalAdd(product, product);
sum = Ssse3.HorizontalAdd(sum, sum);
return sum.GetElement(0);
}
else
{
return x.R * y.R + x.G * y.G + x.B * y.B + x.A * y.A;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static RgbaColor32 Max(RgbaColor32 x, RgbaColor32 y)
{
if (Sse41.IsSupported)
{
return new RgbaColor32(Sse41.Max(x._color, y._color));
}
else
{
return new RgbaColor32(Math.Max(x.R, y.R), Math.Max(x.G, y.G), Math.Max(x.B, y.B), Math.Max(x.A, y.A));
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static RgbaColor32 Min(RgbaColor32 x, RgbaColor32 y)
{
if (Sse41.IsSupported)
{
return new RgbaColor32(Sse41.Min(x._color, y._color));
}
else
{
return new RgbaColor32(Math.Min(x.R, y.R), Math.Min(x.G, y.G), Math.Min(x.B, y.B), Math.Min(x.A, y.A));
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public RgbaColor8 GetColor8()
{
if (Sse41.IsSupported)
{
Vector128<int> temp = _color;
Vector128<ushort> color16 = Sse41.PackUnsignedSaturate(temp, temp);
Vector128<byte> color8 = Sse2.PackUnsignedSaturate(color16.AsInt16(), color16.AsInt16());
uint color = color8.AsUInt32().GetElement(0);
return Unsafe.As<uint, RgbaColor8>(ref color);
}
else
{
return new RgbaColor8(ClampByte(R), ClampByte(G), ClampByte(B), ClampByte(A));
}
}
private static int DivideGuarded(int dividend, int divisor, int resultIfZero)
{
if (divisor == 0)
{
return resultIfZero;
}
return dividend / divisor;
}
private static byte ClampByte(int value)
{
return (byte)Math.Clamp(value, 0, 255);
}
public override int GetHashCode()
{
return HashCode.Combine(R, G, B, A);
}
public override bool Equals(object obj)
{
return obj is RgbaColor32 other && Equals(other);
}
public bool Equals(RgbaColor32 other)
{
return _color.Equals(other._color);
}
}
}

View file

@ -0,0 +1,84 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Ryujinx.Graphics.Texture.Utils
{
struct RgbaColor8 : IEquatable<RgbaColor8>
{
public byte R;
public byte G;
public byte B;
public byte A;
public RgbaColor8(byte r, byte g, byte b, byte a)
{
R = r;
G = g;
B = b;
A = a;
}
public static RgbaColor8 FromUInt32(uint color)
{
return Unsafe.As<uint, RgbaColor8>(ref color);
}
public static bool operator ==(RgbaColor8 x, RgbaColor8 y)
{
return x.Equals(y);
}
public static bool operator !=(RgbaColor8 x, RgbaColor8 y)
{
return !x.Equals(y);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public RgbaColor32 GetColor32()
{
if (Sse41.IsSupported)
{
Vector128<byte> color = Vector128.CreateScalarUnsafe(Unsafe.As<RgbaColor8, uint>(ref this)).AsByte();
return new RgbaColor32(Sse41.ConvertToVector128Int32(color));
}
else
{
return new RgbaColor32(R, G, B, A);
}
}
public uint ToUInt32()
{
return Unsafe.As<RgbaColor8, uint>(ref this);
}
public override int GetHashCode()
{
return HashCode.Combine(R, G, B, A);
}
public override bool Equals(object obj)
{
return obj is RgbaColor8 other && Equals(other);
}
public bool Equals(RgbaColor8 other)
{
return R == other.R && G == other.G && B == other.B && A == other.A;
}
public byte GetComponent(int index)
{
return index switch
{
0 => R,
1 => G,
2 => B,
3 => A,
_ => throw new ArgumentOutOfRangeException(nameof(index))
};
}
}
}