yuzu
1522 строки · 63.1 Кб
1// SPDX-License-Identifier: MPL-2.0
2// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
3// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
4
5// This BCn Decoder is directly derivative of Swiftshader's BCn Decoder found at: https://github.com/google/swiftshader/blob/d070309f7d154d6764cbd514b1a5c8bfcef61d06/src/Device/BC_Decoder.cpp
6// This file does not follow the Skyline code conventions but has certain Skyline specific code
7// There are a lot of implicit and narrowing conversions in this file due to this (Warnings are disabled as a result)
8
9#include <array>
10#include <assert.h>
11#include <stddef.h>
12#include <stdint.h>
13
14namespace {
15constexpr int BlockWidth = 4;
16constexpr int BlockHeight = 4;
17
18struct BC_color {
19void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, bool hasAlphaChannel, bool hasSeparateAlpha) const {
20Color c[4];
21c[0].extract565(c0);
22c[1].extract565(c1);
23if (hasSeparateAlpha || (c0 > c1)) {
24c[2] = ((c[0] * 2) + c[1]) / 3;
25c[3] = ((c[1] * 2) + c[0]) / 3;
26} else {
27c[2] = (c[0] + c[1]) >> 1;
28if (hasAlphaChannel) {
29c[3].clearAlpha();
30}
31}
32
33for (int j = 0; j < BlockHeight && (y + j) < dstH; j++) {
34size_t dstOffset = j * dstPitch;
35size_t idxOffset = j * BlockHeight;
36for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, idxOffset++, dstOffset += dstBpp) {
37*reinterpret_cast<unsigned int *>(dst + dstOffset) = c[getIdx(idxOffset)].pack8888();
38}
39}
40}
41
42private:
43struct Color {
44Color() {
45c[0] = c[1] = c[2] = 0;
46c[3] = 0xFF000000;
47}
48
49void extract565(const unsigned int c565) {
50c[0] = ((c565 & 0x0000001F) << 3) | ((c565 & 0x0000001C) >> 2);
51c[1] = ((c565 & 0x000007E0) >> 3) | ((c565 & 0x00000600) >> 9);
52c[2] = ((c565 & 0x0000F800) >> 8) | ((c565 & 0x0000E000) >> 13);
53}
54
55unsigned int pack8888() const {
56return ((c[0] & 0xFF) << 16) | ((c[1] & 0xFF) << 8) | (c[2] & 0xFF) | c[3];
57}
58
59void clearAlpha() {
60c[3] = 0;
61}
62
63Color operator*(int factor) const {
64Color res;
65for (int i = 0; i < 4; ++i) {
66res.c[i] = c[i] * factor;
67}
68return res;
69}
70
71Color operator/(int factor) const {
72Color res;
73for (int i = 0; i < 4; ++i) {
74res.c[i] = c[i] / factor;
75}
76return res;
77}
78
79Color operator>>(int shift) const {
80Color res;
81for (int i = 0; i < 4; ++i) {
82res.c[i] = c[i] >> shift;
83}
84return res;
85}
86
87Color operator+(Color const &obj) const {
88Color res;
89for (int i = 0; i < 4; ++i) {
90res.c[i] = c[i] + obj.c[i];
91}
92return res;
93}
94
95private:
96int c[4];
97};
98
99size_t getIdx(int i) const {
100size_t offset = i << 1; // 2 bytes per index
101return (idx & (0x3 << offset)) >> offset;
102}
103
104unsigned short c0;
105unsigned short c1;
106unsigned int idx;
107};
108static_assert(sizeof(BC_color) == 8, "BC_color must be 8 bytes");
109
110struct BC_channel {
111void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, size_t channel, bool isSigned) const {
112int c[8] = {0};
113
114if (isSigned) {
115c[0] = static_cast<signed char>(data & 0xFF);
116c[1] = static_cast<signed char>((data & 0xFF00) >> 8);
117} else {
118c[0] = static_cast<uint8_t>(data & 0xFF);
119c[1] = static_cast<uint8_t>((data & 0xFF00) >> 8);
120}
121
122if (c[0] > c[1]) {
123for (int i = 2; i < 8; ++i) {
124c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7;
125}
126} else {
127for (int i = 2; i < 6; ++i) {
128c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5;
129}
130c[6] = isSigned ? -128 : 0;
131c[7] = isSigned ? 127 : 255;
132}
133
134for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) {
135for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) {
136dst[channel + (i * dstBpp) + (j * dstPitch)] = static_cast<uint8_t>(c[getIdx((j * BlockHeight) + i)]);
137}
138}
139}
140
141private:
142uint8_t getIdx(int i) const {
143int offset = i * 3 + 16;
144return static_cast<uint8_t>((data & (0x7ull << offset)) >> offset);
145}
146
147uint64_t data;
148};
149static_assert(sizeof(BC_channel) == 8, "BC_channel must be 8 bytes");
150
151struct BC_alpha {
152void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp) const {
153dst += 3; // Write only to alpha (channel 3)
154for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++, dst += dstPitch) {
155uint8_t *dstRow = dst;
156for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) {
157*dstRow = getAlpha(j * BlockHeight + i);
158}
159}
160}
161
162private:
163uint8_t getAlpha(int i) const {
164int offset = i << 2;
165int alpha = (data & (0xFull << offset)) >> offset;
166return static_cast<uint8_t>(alpha | (alpha << 4));
167}
168
169uint64_t data;
170};
171static_assert(sizeof(BC_alpha) == 8, "BC_alpha must be 8 bytes");
172
173namespace BC6H {
174static constexpr int MaxPartitions = 64;
175
176// @fmt:off
177
178static constexpr uint8_t PartitionTable2[MaxPartitions][16] = {
179{ 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
180{ 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 },
181{ 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
182{ 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
183{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 },
184{ 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
185{ 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
186{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
187{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 },
188{ 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
189{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
190{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 },
191{ 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
192{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 },
193{ 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
194{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 },
195{ 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 },
196{ 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
197{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 },
198{ 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
199{ 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
200{ 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
201{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
202{ 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 },
203{ 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
204{ 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
205{ 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 },
206{ 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 },
207{ 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 },
208{ 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
209{ 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 },
210{ 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
211{ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
212{ 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 },
213{ 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 },
214{ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 },
215{ 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 },
216{ 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 },
217{ 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
218{ 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 },
219{ 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 },
220{ 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 },
221{ 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 },
222{ 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 },
223{ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 },
224{ 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 },
225{ 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 },
226{ 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
227{ 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
228{ 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
229{ 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 },
230{ 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 },
231{ 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 },
232{ 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
233{ 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
234{ 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 },
235{ 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
236{ 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 },
237{ 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 },
238{ 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 },
239{ 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
240{ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
241{ 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 },
242{ 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 },
243};
244
245static constexpr uint8_t AnchorTable2[MaxPartitions] = {
2460xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
2470xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
2480xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
2490x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
2500xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
2510x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
2520x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
2530xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
254};
255
256// @fmt:on
257
258// 1.0f in half-precision floating point format
259static constexpr uint16_t halfFloat1 = 0x3C00;
260union Color {
261struct RGBA {
262uint16_t r = 0;
263uint16_t g = 0;
264uint16_t b = 0;
265uint16_t a = halfFloat1;
266
267RGBA(uint16_t r, uint16_t g, uint16_t b)
268: r(r), g(g), b(b) {
269}
270
271RGBA &operator=(const RGBA &other) {
272this->r = other.r;
273this->g = other.g;
274this->b = other.b;
275this->a = halfFloat1;
276
277return *this;
278}
279};
280
281Color(uint16_t r, uint16_t g, uint16_t b)
282: rgba(r, g, b) {
283}
284
285Color(int r, int g, int b)
286: rgba((uint16_t) r, (uint16_t) g, (uint16_t) b) {
287}
288
289Color() {}
290
291Color(const Color &other) {
292this->rgba = other.rgba;
293}
294
295Color &operator=(const Color &other) {
296this->rgba = other.rgba;
297
298return *this;
299}
300
301RGBA rgba;
302uint16_t channel[4];
303};
304static_assert(sizeof(Color) == 8, "BC6h::Color must be 8 bytes long");
305
306inline int32_t extendSign(int32_t val, size_t size) {
307// Suppose we have a 2-bit integer being stored in 4 bit variable:
308// x = 0b00AB
309//
310// In order to sign extend x, we need to turn the 0s into A's:
311// x_extend = 0bAAAB
312//
313// We can do that by flipping A in x then subtracting 0b0010 from x.
314// Suppose A is 1:
315// x = 0b001B
316// x_flip = 0b000B
317// x_minus = 0b111B
318// Since A is flipped to 0, subtracting the mask sets it and all the bits above it to 1.
319// And if A is 0:
320// x = 0b000B
321// x_flip = 0b001B
322// x_minus = 0b000B
323// We unset the bit we flipped, and touch no other bit
324uint16_t mask = 1u << (size - 1);
325return (val ^ mask) - mask;
326}
327
328static int constexpr RGBfChannels = 3;
329struct RGBf {
330uint16_t channel[RGBfChannels];
331size_t size[RGBfChannels];
332bool isSigned;
333
334RGBf() {
335static_assert(RGBfChannels == 3, "RGBf must have exactly 3 channels");
336static_assert(sizeof(channel) / sizeof(channel[0]) == RGBfChannels, "RGBf must have exactly 3 channels");
337static_assert(sizeof(channel) / sizeof(channel[0]) == sizeof(size) / sizeof(size[0]), "RGBf requires equally sized arrays for channels and channel sizes");
338
339for (int i = 0; i < RGBfChannels; i++) {
340channel[i] = 0;
341size[i] = 0;
342}
343
344isSigned = false;
345}
346
347void extendSign() {
348for (int i = 0; i < RGBfChannels; i++) {
349channel[i] = BC6H::extendSign(channel[i], size[i]);
350}
351}
352
353// Assuming this is the delta, take the base-endpoint and transform this into
354// a proper endpoint.
355//
356// The final computed endpoint is truncated to the base-endpoint's size;
357void resolveDelta(RGBf base) {
358for (int i = 0; i < RGBfChannels; i++) {
359size[i] = base.size[i];
360channel[i] = (base.channel[i] + channel[i]) & ((1 << base.size[i]) - 1);
361}
362
363// Per the spec:
364// "For signed formats, the results of the delta calculation must be sign
365// extended as well."
366if (isSigned) {
367extendSign();
368}
369}
370
371void unquantize() {
372if (isSigned) {
373unquantizeSigned();
374} else {
375unquantizeUnsigned();
376}
377}
378
379void unquantizeUnsigned() {
380for (int i = 0; i < RGBfChannels; i++) {
381if (size[i] >= 15 || channel[i] == 0) {
382continue;
383} else if (channel[i] == ((1u << size[i]) - 1)) {
384channel[i] = 0xFFFFu;
385} else {
386// Need 32 bits to avoid overflow
387uint32_t tmp = channel[i];
388channel[i] = (uint16_t) (((tmp << 16) + 0x8000) >> size[i]);
389}
390size[i] = 16;
391}
392}
393
394void unquantizeSigned() {
395for (int i = 0; i < RGBfChannels; i++) {
396if (size[i] >= 16 || channel[i] == 0) {
397continue;
398}
399
400int16_t value = (int16_t)channel[i];
401int32_t result = value;
402bool signBit = value < 0;
403if (signBit) {
404value = -value;
405}
406
407if (value >= ((1 << (size[i] - 1)) - 1)) {
408result = 0x7FFF;
409} else {
410// Need 32 bits to avoid overflow
411int32_t tmp = value;
412result = (((tmp << 15) + 0x4000) >> (size[i] - 1));
413}
414
415if (signBit) {
416result = -result;
417}
418
419channel[i] = (uint16_t) result;
420size[i] = 16;
421}
422}
423};
424
425struct Data {
426uint64_t low64;
427uint64_t high64;
428
429Data() = default;
430
431Data(uint64_t low64, uint64_t high64)
432: low64(low64), high64(high64) {
433}
434
435// Consumes the lowest N bits from from low64 and high64 where N is:
436// abs(MSB - LSB)
437// MSB and LSB come from the block description of the BC6h spec and specify
438// the location of the bits in the returned bitstring.
439//
440// If MSB < LSB, then the bits are reversed. Otherwise, the bitstring is read and
441// shifted without further modification.
442//
443uint32_t consumeBits(uint32_t MSB, uint32_t LSB) {
444bool reversed = MSB < LSB;
445if (reversed) {
446std::swap(MSB, LSB);
447}
448assert(MSB - LSB + 1 < sizeof(uint32_t) * 8);
449
450uint32_t numBits = MSB - LSB + 1;
451uint32_t mask = (1 << numBits) - 1;
452// Read the low N bits
453uint32_t bits = (low64 & mask);
454
455low64 >>= numBits;
456// Put the low N bits of high64 into the high 64-N bits of low64
457low64 |= (high64 & mask) << (sizeof(high64) * 8 - numBits);
458high64 >>= numBits;
459
460if (reversed) {
461uint32_t tmp = 0;
462for (uint32_t numSwaps = 0; numSwaps < numBits; numSwaps++) {
463tmp <<= 1;
464tmp |= (bits & 1);
465bits >>= 1;
466}
467
468bits = tmp;
469}
470
471return bits << LSB;
472}
473};
474
475struct IndexInfo {
476uint64_t value;
477int numBits;
478};
479
480// Interpolates between two endpoints, then does a final unquantization step
481Color interpolate(RGBf e0, RGBf e1, const IndexInfo &index, bool isSigned) {
482static constexpr uint32_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
483static constexpr uint32_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30,
48434, 38, 43, 47, 51, 55, 60, 64};
485static constexpr uint32_t const *weightsN[] = {
486nullptr, nullptr, nullptr, weights3, weights4
487};
488auto weights = weightsN[index.numBits];
489assert(weights != nullptr);
490Color color;
491uint32_t e0Weight = 64 - weights[index.value];
492uint32_t e1Weight = weights[index.value];
493
494for (int i = 0; i < RGBfChannels; i++) {
495int32_t e0Channel = e0.channel[i];
496int32_t e1Channel = e1.channel[i];
497
498if (isSigned) {
499e0Channel = extendSign(e0Channel, 16);
500e1Channel = extendSign(e1Channel, 16);
501}
502
503int32_t e0Value = e0Channel * e0Weight;
504int32_t e1Value = e1Channel * e1Weight;
505
506uint32_t tmp = ((e0Value + e1Value + 32) >> 6);
507
508// Need to unquantize value to limit it to the legal range of half-precision
509// floats. We do this by scaling by 31/32 or 31/64 depending on if the value
510// is signed or unsigned.
511if (isSigned) {
512tmp = ((tmp & 0x80000000) != 0) ? (((~tmp + 1) * 31) >> 5) | 0x8000 : (tmp * 31) >> 5;
513// Don't return -0.0f, just normalize it to 0.0f.
514if (tmp == 0x8000)
515tmp = 0;
516} else {
517tmp = (tmp * 31) >> 6;
518}
519
520color.channel[i] = (uint16_t) tmp;
521}
522
523return color;
524}
525
526enum DataType {
527// Endpoints
528EP0 = 0,
529EP1 = 1,
530EP2 = 2,
531EP3 = 3,
532Mode,
533Partition,
534End,
535};
536
537enum Channel {
538R = 0,
539G = 1,
540B = 2,
541None,
542};
543
544struct DeltaBits {
545size_t channel[3];
546
547constexpr DeltaBits()
548: channel{0, 0, 0} {
549}
550
551constexpr DeltaBits(size_t r, size_t g, size_t b)
552: channel{r, g, b} {
553}
554};
555
556struct ModeDesc {
557int number;
558bool hasDelta;
559int partitionCount;
560int endpointBits;
561DeltaBits deltaBits;
562
563constexpr ModeDesc()
564: number(-1), hasDelta(false), partitionCount(0), endpointBits(0) {
565}
566
567constexpr ModeDesc(int number, bool hasDelta, int partitionCount, int endpointBits, DeltaBits deltaBits)
568: number(number), hasDelta(hasDelta), partitionCount(partitionCount), endpointBits(endpointBits), deltaBits(deltaBits) {
569}
570};
571
572struct BlockDesc {
573DataType type;
574Channel channel;
575int MSB;
576int LSB;
577ModeDesc modeDesc;
578
579constexpr BlockDesc()
580: type(End), channel(None), MSB(0), LSB(0), modeDesc() {
581}
582
583constexpr BlockDesc(const DataType type, Channel channel, int MSB, int LSB, ModeDesc modeDesc)
584: type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc(modeDesc) {
585}
586
587constexpr BlockDesc(DataType type, Channel channel, int MSB, int LSB)
588: type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc() {
589}
590};
591
592// Turns a legal mode into an index into the BlockDesc table.
593// Illegal or reserved modes return -1.
594static int modeToIndex(uint8_t mode) {
595if (mode <= 3) {
596return mode;
597} else if ((mode & 0x2) != 0) {
598if (mode <= 18) {
599// Turns 6 into 4, 7 into 5, 10 into 6, etc.
600return (mode / 2) + 1 + (mode & 0x1);
601} else if (mode == 22 || mode == 26 || mode == 30) {
602// Turns 22 into 11, 26 into 12, etc.
603return mode / 4 + 6;
604}
605}
606
607return -1;
608}
609
610// Returns a description of the bitfields for each mode from the LSB
611// to the MSB before the index data starts.
612//
613// The numbers come from the BC6h block description. Each BlockDesc in the
614// {Type, Channel, MSB, LSB}
615// * Type describes which endpoint this is, or if this is a mode, a partition
616// number, or the end of the block description.
617// * Channel describes one of the 3 color channels within an endpoint
618// * MSB and LSB specificy:
619// * The size of the bitfield being read
620// * The position of the bitfield within the variable it is being read to
621// * If the bitfield is stored in reverse bit order
622// If MSB < LSB then the bitfield is stored in reverse order. The size of
623// the bitfield is abs(MSB-LSB+1). And the position of the bitfield within
624// the variable is min(LSB, MSB).
625//
626// Invalid or reserved modes return an empty list.
627static constexpr int NumBlocks = 14;
628// The largest number of descriptions within a block.
629static constexpr int MaxBlockDescIndex = 26;
630static constexpr BlockDesc blockDescs[NumBlocks][MaxBlockDescIndex] = {
631// @fmt:off
632// Mode 0, Index 0
633{
634{ Mode, None, 1, 0, { 0, true, 2, 10, { 5, 5, 5 } } },
635{ EP2, G, 4, 4 }, { EP2, B, 4, 4 }, { EP3, B, 4, 4 },
636{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
637{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
638{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
639{ EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
640{ EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
641{ EP3, B, 3, 3 },
642{ Partition, None, 4, 0 },
643{ End, None, 0, 0},
644},
645// Mode 1, Index 1
646{
647{ Mode, None, 1, 0, { 1, true, 2, 7, { 6, 6, 6 } } },
648{ EP2, G, 5, 5 }, { EP3, G, 5, 4 }, { EP0, R, 6, 0 },
649{ EP3, B, 1, 0 }, { EP2, B, 4, 4 }, { EP0, G, 6, 0 },
650{ EP2, B, 5, 5 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
651{ EP0, B, 6, 0 }, { EP3, B, 3, 3 }, { EP3, B, 5, 5 },
652{ EP3, B, 4, 4 }, { EP1, R, 5, 0 }, { EP2, G, 3, 0 },
653{ EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 5, 0 },
654{ EP2, B, 3, 0 }, { EP2, R, 5, 0 }, { EP3, R, 5, 0 },
655{ Partition, None, 4, 0 },
656{ End, None, 0, 0},
657},
658// Mode 2, Index 2
659{
660{ Mode, None, 4, 0, { 2, true, 2, 11, { 5, 4, 4 } } },
661{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
662{ EP1, R, 4, 0 }, { EP0, R, 10, 10 }, { EP2, G, 3, 0 },
663{ EP1, G, 3, 0 }, { EP0, G, 10, 10 }, { EP3, B, 0, 0 },
664{ EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
665{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
666{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
667{ Partition, None, 4, 0 },
668{ End, None, 0, 0},
669},
670// Mode 3, Index 3
671{
672{ Mode, None, 4, 0, { 3, false, 1, 10, { 0, 0, 0 } } },
673{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
674{ EP1, R, 9, 0 }, { EP1, G, 9, 0 }, { EP1, B, 9, 0 },
675{ End, None, 0, 0},
676},
677// Mode 6, Index 4
678{
679{ Mode, None, 4, 0, { 6, true, 2, 11, { 4, 5, 4 } } }, // 1 1
680{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
681{ EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP3, G, 4, 4 },
682{ EP2, G, 3, 0 }, { EP1, G, 4, 0 }, { EP0, G, 10, 10 },
683{ EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
684{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
685{ EP3, B, 0, 0 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 }, // 18 19
686{ EP2, G, 4, 4 }, { EP3, B, 3, 3 }, // 2 21
687{ Partition, None, 4, 0 },
688{ End, None, 0, 0},
689},
690// Mode 7, Index 5
691{
692{ Mode, None, 4, 0, { 7, true, 1, 11, { 9, 9, 9 } } },
693{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
694{ EP1, R, 8, 0 }, { EP0, R, 10, 10 }, { EP1, G, 8, 0 },
695{ EP0, G, 10, 10 }, { EP1, B, 8, 0 }, { EP0, B, 10, 10 },
696{ End, None, 0, 0},
697},
698// Mode 10, Index 6
699{
700{ Mode, None, 4, 0, { 10, true, 2, 11, { 4, 4, 5 } } },
701{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
702{ EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP2, B, 4, 4 },
703{ EP2, G, 3, 0 }, { EP1, G, 3, 0 }, { EP0, G, 10, 10 },
704{ EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
705{ EP0, B, 10, 10 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
706{ EP3, B, 1, 1 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 },
707{ EP3, B, 4, 4 }, { EP3, B, 3, 3 },
708{ Partition, None, 4, 0 },
709{ End, None, 0, 0},
710},
711// Mode 11, Index 7
712{
713{ Mode, None, 4, 0, { 11, true, 1, 12, { 8, 8, 8 } } },
714{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
715{ EP1, R, 7, 0 }, { EP0, R, 10, 11 }, { EP1, G, 7, 0 },
716{ EP0, G, 10, 11 }, { EP1, B, 7, 0 }, { EP0, B, 10, 11 },
717{ End, None, 0, 0},
718},
719// Mode 14, Index 8
720{
721{ Mode, None, 4, 0, { 14, true, 2, 9, { 5, 5, 5 } } },
722{ EP0, R, 8, 0 }, { EP2, B, 4, 4 }, { EP0, G, 8, 0 },
723{ EP2, G, 4, 4 }, { EP0, B, 8, 0 }, { EP3, B, 4, 4 },
724{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
725{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
726{ EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
727{ EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
728{ EP3, B, 3, 3 },
729{ Partition, None, 4, 0 },
730{ End, None, 0, 0},
731},
732// Mode 15, Index 9
733{
734{ Mode, None, 4, 0, { 15, true, 1, 16, { 4, 4, 4 } } },
735{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
736{ EP1, R, 3, 0 }, { EP0, R, 10, 15 }, { EP1, G, 3, 0 },
737{ EP0, G, 10, 15 }, { EP1, B, 3, 0 }, { EP0, B, 10, 15 },
738{ End, None, 0, 0},
739},
740// Mode 18, Index 10
741{
742{ Mode, None, 4, 0, { 18, true, 2, 8, { 6, 5, 5 } } },
743{ EP0, R, 7, 0 }, { EP3, G, 4, 4 }, { EP2, B, 4, 4 },
744{ EP0, G, 7, 0 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
745{ EP0, B, 7, 0 }, { EP3, B, 3, 3 }, { EP3, B, 4, 4 },
746{ EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 4, 0 },
747{ EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
748{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 5, 0 },
749{ EP3, R, 5, 0 },
750{ Partition, None, 4, 0 },
751{ End, None, 0, 0},
752},
753// Mode 22, Index 11
754{
755{ Mode, None, 4, 0, { 22, true, 2, 8, { 5, 6, 5 } } },
756{ EP0, R, 7, 0 }, { EP3, B, 0, 0 }, { EP2, B, 4, 4 },
757{ EP0, G, 7, 0 }, { EP2, G, 5, 5 }, { EP2, G, 4, 4 },
758{ EP0, B, 7, 0 }, { EP3, G, 5, 5 }, { EP3, B, 4, 4 },
759{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
760{ EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
761{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
762{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
763{ Partition, None, 4, 0 },
764{ End, None, 0, 0},
765},
766// Mode 26, Index 12
767{
768{ Mode, None, 4, 0, { 26, true, 2, 8, { 5, 5, 6 } } },
769{ EP0, R, 7, 0 }, { EP3, B, 1, 1 }, { EP2, B, 4, 4 },
770{ EP0, G, 7, 0 }, { EP2, B, 5, 5 }, { EP2, G, 4, 4 },
771{ EP0, B, 7, 0 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
772{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
773{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
774{ EP1, B, 5, 0 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
775{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
776{ Partition, None, 4, 0 },
777{ End, None, 0, 0},
778},
779// Mode 30, Index 13
780{
781{ Mode, None, 4, 0, { 30, false, 2, 6, { 0, 0, 0 } } },
782{ EP0, R, 5, 0 }, { EP3, G, 4, 4 }, { EP3, B, 0, 0 },
783{ EP3, B, 1, 1 }, { EP2, B, 4, 4 }, { EP0, G, 5, 0 },
784{ EP2, G, 5, 5 }, { EP2, B, 5, 5 }, { EP3, B, 2, 2 },
785{ EP2, G, 4, 4 }, { EP0, B, 5, 0 }, { EP3, G, 5, 5 },
786{ EP3, B, 3, 3 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
787{ EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 5, 0 },
788{ EP3, G, 3, 0 }, { EP1, B, 5, 0 }, { EP2, B, 3, 0 },
789{ EP2, R, 5, 0 }, { EP3, R, 5, 0 },
790{ Partition, None, 4, 0 },
791{ End, None, 0, 0},
792}
793// @fmt:on
794};
795
796struct Block {
797uint64_t low64;
798uint64_t high64;
799
800void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch, size_t dstBpp, bool isSigned) const {
801uint8_t mode = 0;
802Data data(low64, high64);
803assert(dstBpp == sizeof(Color));
804
805if ((data.low64 & 0x2) == 0) {
806mode = data.consumeBits(1, 0);
807} else {
808mode = data.consumeBits(4, 0);
809}
810
811int blockIndex = modeToIndex(mode);
812// Handle illegal or reserved mode
813if (blockIndex == -1) {
814for (int y = 0; y < 4 && y + dstY < dstHeight; y++) {
815for (int x = 0; x < 4 && x + dstX < dstWidth; x++) {
816auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
817out->rgba = {0, 0, 0};
818}
819}
820return;
821}
822const BlockDesc *blockDesc = blockDescs[blockIndex];
823
824RGBf e[4];
825e[0].isSigned = e[1].isSigned = e[2].isSigned = e[3].isSigned = isSigned;
826
827int partition = 0;
828ModeDesc modeDesc;
829for (int index = 0; blockDesc[index].type != End; index++) {
830const BlockDesc desc = blockDesc[index];
831
832switch (desc.type) {
833case Mode:
834modeDesc = desc.modeDesc;
835assert(modeDesc.number == mode);
836
837e[0].size[0] = e[0].size[1] = e[0].size[2] = modeDesc.endpointBits;
838for (int i = 0; i < RGBfChannels; i++) {
839if (modeDesc.hasDelta) {
840e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.deltaBits.channel[i];
841} else {
842e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.endpointBits;
843}
844}
845break;
846case Partition:
847partition |= data.consumeBits(desc.MSB, desc.LSB);
848break;
849case EP0:
850case EP1:
851case EP2:
852case EP3:
853e[desc.type].channel[desc.channel] |= data.consumeBits(desc.MSB, desc.LSB);
854break;
855default:
856assert(false);
857return;
858}
859}
860
861// Sign extension
862if (isSigned) {
863for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) {
864e[ep].extendSign();
865}
866} else if (modeDesc.hasDelta) {
867// Don't sign-extend the base endpoint in an unsigned format.
868for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) {
869e[ep].extendSign();
870}
871}
872
873// Turn the deltas into endpoints
874if (modeDesc.hasDelta) {
875for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) {
876e[ep].resolveDelta(e[0]);
877}
878}
879
880for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) {
881e[ep].unquantize();
882}
883
884// Get the indices, calculate final colors, and output
885for (int y = 0; y < 4; y++) {
886for (int x = 0; x < 4; x++) {
887int pixelNum = x + y * 4;
888IndexInfo idx;
889bool isAnchor = false;
890int firstEndpoint = 0;
891// Bc6H can have either 1 or 2 petitions depending on the mode.
892// The number of petitions affects the number of indices with implicit
893// leading 0 bits and the number of bits per index.
894if (modeDesc.partitionCount == 1) {
895idx.numBits = 4;
896// There's an implicit leading 0 bit for the first idx
897isAnchor = (pixelNum == 0);
898} else {
899idx.numBits = 3;
900// There are 2 indices with implicit leading 0-bits.
901isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition]));
902firstEndpoint = PartitionTable2[partition][pixelNum] * 2;
903}
904
905idx.value = data.consumeBits(idx.numBits - isAnchor - 1, 0);
906
907// Don't exit the loop early, we need to consume these index bits regardless if
908// we actually output them or not.
909if ((y + dstY >= dstHeight) || (x + dstX >= dstWidth)) {
910continue;
911}
912
913Color color = interpolate(e[firstEndpoint], e[firstEndpoint + 1], idx, isSigned);
914auto out = reinterpret_cast<Color *>(dst + dstBpp * x + dstPitch * y);
915*out = color;
916}
917}
918}
919};
920
921} // namespace BC6H
922
923namespace BC7 {
924// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt
925// https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format
926
927struct Bitfield {
928int offset;
929int count;
930
931constexpr Bitfield Then(const int bits) { return {offset + count, bits}; }
932
933constexpr bool operator==(const Bitfield &rhs) {
934return offset == rhs.offset && count == rhs.count;
935}
936};
937
938struct Mode {
939const int IDX; // Mode index
940const int NS; // Number of subsets in each partition
941const int PB; // Partition bits
942const int RB; // Rotation bits
943const int ISB; // Index selection bits
944const int CB; // Color bits
945const int AB; // Alpha bits
946const int EPB; // Endpoint P-bits
947const int SPB; // Shared P-bits
948const int IB; // Primary index bits per element
949const int IBC; // Primary index bits total
950const int IB2; // Secondary index bits per element
951
952constexpr int NumColors() const { return NS * 2; }
953
954constexpr Bitfield Partition() const { return {IDX + 1, PB}; }
955
956constexpr Bitfield Rotation() const { return Partition().Then(RB); }
957
958constexpr Bitfield IndexSelection() const { return Rotation().Then(ISB); }
959
960constexpr Bitfield Red(int idx) const {
961return IndexSelection().Then(CB * idx).Then(CB);
962}
963
964constexpr Bitfield Green(int idx) const {
965return Red(NumColors() - 1).Then(CB * idx).Then(CB);
966}
967
968constexpr Bitfield Blue(int idx) const {
969return Green(NumColors() - 1).Then(CB * idx).Then(CB);
970}
971
972constexpr Bitfield Alpha(int idx) const {
973return Blue(NumColors() - 1).Then(AB * idx).Then(AB);
974}
975
976constexpr Bitfield EndpointPBit(int idx) const {
977return Alpha(NumColors() - 1).Then(EPB * idx).Then(EPB);
978}
979
980constexpr Bitfield SharedPBit0() const {
981return EndpointPBit(NumColors() - 1).Then(SPB);
982}
983
984constexpr Bitfield SharedPBit1() const {
985return SharedPBit0().Then(SPB);
986}
987
988constexpr Bitfield PrimaryIndex(int offset, int count) const {
989return SharedPBit1().Then(offset).Then(count);
990}
991
992constexpr Bitfield SecondaryIndex(int offset, int count) const {
993return SharedPBit1().Then(IBC + offset).Then(count);
994}
995};
996
997static constexpr Mode Modes[] = {
998// IDX NS PB RB ISB CB AB EPB SPB IB IBC, IB2
999/**/ {0x0, 0x3, 0x4, 0x0, 0x0, 0x4, 0x0, 0x1, 0x0, 0x3, 0x2d, 0x0},
1000/**/ {0x1, 0x2, 0x6, 0x0, 0x0, 0x6, 0x0, 0x0, 0x1, 0x3, 0x2e, 0x0},
1001/**/ {0x2, 0x3, 0x6, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x1d, 0x0},
1002/**/ {0x3, 0x2, 0x6, 0x0, 0x0, 0x7, 0x0, 0x1, 0x0, 0x2, 0x1e, 0x0},
1003/**/ {0x4, 0x1, 0x0, 0x2, 0x1, 0x5, 0x6, 0x0, 0x0, 0x2, 0x1f, 0x3},
1004/**/ {0x5, 0x1, 0x0, 0x2, 0x0, 0x7, 0x8, 0x0, 0x0, 0x2, 0x1f, 0x2},
1005/**/ {0x6, 0x1, 0x0, 0x0, 0x0, 0x7, 0x7, 0x1, 0x0, 0x4, 0x3f, 0x0},
1006/**/ {0x7, 0x2, 0x6, 0x0, 0x0, 0x5, 0x5, 0x1, 0x0, 0x2, 0x1e, 0x0},
1007/**/ {-1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x00, 0x0},
1008};
1009
1010static constexpr int MaxPartitions = 64;
1011static constexpr int MaxSubsets = 3;
1012
1013static constexpr uint8_t PartitionTable2[MaxPartitions][16] = {
1014{0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
1015{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1},
1016{0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1},
1017{0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1},
1018{0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1},
1019{0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
1020{0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
1021{0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1},
1022{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1},
1023{0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1024{0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
1025{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1},
1026{0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1027{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
1028{0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1029{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
1030{0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1},
1031{0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
1032{0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0},
1033{0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0},
1034{0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
1035{0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0},
1036{0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
1037{0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1},
1038{0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0},
1039{0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
1040{0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0},
1041{0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0},
1042{0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0},
1043{0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
1044{0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0},
1045{0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0},
1046{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
1047{0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1},
1048{0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0},
1049{0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0},
1050{0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0},
1051{0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0},
1052{0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1},
1053{0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1},
1054{0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0},
1055{0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0},
1056{0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0},
1057{0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0},
1058{0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
1059{0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1},
1060{0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1},
1061{0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0},
1062{0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1063{0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0},
1064{0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0},
1065{0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0},
1066{0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1},
1067{0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1},
1068{0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0},
1069{0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0},
1070{0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1},
1071{0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1},
1072{0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1},
1073{0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1},
1074{0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
1075{0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
1076{0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0},
1077{0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1},
1078};
1079
1080static constexpr uint8_t PartitionTable3[MaxPartitions][16] = {
1081{0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2},
1082{0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1},
1083{0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1},
1084{0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1},
1085{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2},
1086{0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2},
1087{0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1},
1088{0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1},
1089{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2},
1090{0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2},
1091{0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2},
1092{0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2},
1093{0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2},
1094{0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2},
1095{0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2},
1096{0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0},
1097{0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2},
1098{0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0},
1099{0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2},
1100{0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1},
1101{0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2},
1102{0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1},
1103{0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2},
1104{0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0},
1105{0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0},
1106{0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2},
1107{0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0},
1108{0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1},
1109{0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2},
1110{0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2},
1111{0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1},
1112{0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1},
1113{0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2},
1114{0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1},
1115{0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2},
1116{0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0},
1117{0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0},
1118{0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
1119{0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0},
1120{0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1},
1121{0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1},
1122{0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2},
1123{0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1},
1124{0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2},
1125{0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1},
1126{0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1},
1127{0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1},
1128{0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1},
1129{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2},
1130{0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1},
1131{0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2},
1132{0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2},
1133{0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2},
1134{0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2},
1135{0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2},
1136{0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2},
1137{0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2},
1138{0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2},
1139{0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2},
1140{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2},
1141{0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1},
1142{0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2},
1143{0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
1144{0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0},
1145};
1146
1147static constexpr uint8_t AnchorTable2[MaxPartitions] = {
1148// @fmt:off
11490xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
11500xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
11510xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
11520x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
11530xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
11540x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
11550x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
11560xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
1157// @fmt:on
1158};
1159
1160static constexpr uint8_t AnchorTable3a[MaxPartitions] = {
1161// @fmt:off
11620x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf,
11630x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3,
11640x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa,
11650x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf,
11660x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf,
11670xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf,
11680x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa,
11690x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3,
1170// @fmt:on
1171};
1172
1173static constexpr uint8_t AnchorTable3b[MaxPartitions] = {
1174// @fmt:off
11750xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8,
11760xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8,
11770xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8,
11780x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8,
11790xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa,
11800x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8,
11810xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
11820xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8,
1183// @fmt:on
1184};
1185
1186struct Color {
1187struct RGB {
1188RGB() = default;
1189
1190RGB(uint8_t r, uint8_t g, uint8_t b)
1191: b(b), g(g), r(r) {}
1192
1193RGB(int r, int g, int b)
1194: b(static_cast<uint8_t>(b)), g(static_cast<uint8_t>(g)), r(static_cast<uint8_t>(r)) {}
1195
1196RGB operator<<(int shift) const { return {r << shift, g << shift, b << shift}; }
1197
1198RGB operator>>(int shift) const { return {r >> shift, g >> shift, b >> shift}; }
1199
1200RGB operator|(int bits) const { return {r | bits, g | bits, b | bits}; }
1201
1202RGB operator|(const RGB &rhs) const { return {r | rhs.r, g | rhs.g, b | rhs.b}; }
1203
1204RGB operator+(const RGB &rhs) const { return {r + rhs.r, g + rhs.g, b + rhs.b}; }
1205
1206uint8_t b;
1207uint8_t g;
1208uint8_t r;
1209};
1210
1211RGB rgb;
1212uint8_t a;
1213};
1214
1215static_assert(sizeof(Color) == 4, "Color size must be 4 bytes");
1216
1217struct Block {
1218constexpr uint64_t Get(const Bitfield &bf) const {
1219uint64_t mask = (1ULL << bf.count) - 1;
1220if (bf.offset + bf.count <= 64) {
1221return (low >> bf.offset) & mask;
1222}
1223if (bf.offset >= 64) {
1224return (high >> (bf.offset - 64)) & mask;
1225}
1226return ((low >> bf.offset) | (high << (64 - bf.offset))) & mask;
1227}
1228
1229const Mode &mode() const {
1230if ((low & 0b00000001) != 0) {
1231return Modes[0];
1232}
1233if ((low & 0b00000010) != 0) {
1234return Modes[1];
1235}
1236if ((low & 0b00000100) != 0) {
1237return Modes[2];
1238}
1239if ((low & 0b00001000) != 0) {
1240return Modes[3];
1241}
1242if ((low & 0b00010000) != 0) {
1243return Modes[4];
1244}
1245if ((low & 0b00100000) != 0) {
1246return Modes[5];
1247}
1248if ((low & 0b01000000) != 0) {
1249return Modes[6];
1250}
1251if ((low & 0b10000000) != 0) {
1252return Modes[7];
1253}
1254return Modes[8]; // Invalid mode
1255}
1256
1257struct IndexInfo {
1258uint64_t value;
1259int numBits;
1260};
1261
1262uint8_t interpolate(uint8_t e0, uint8_t e1, const IndexInfo &index) const {
1263static constexpr uint16_t weights2[] = {0, 21, 43, 64};
1264static constexpr uint16_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
1265static constexpr uint16_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30,
126634, 38, 43, 47, 51, 55, 60, 64};
1267static constexpr uint16_t const *weightsN[] = {
1268nullptr, nullptr, weights2, weights3, weights4
1269};
1270auto weights = weightsN[index.numBits];
1271assert(weights != nullptr);
1272return (uint8_t) (((64 - weights[index.value]) * uint16_t(e0) + weights[index.value] * uint16_t(e1) + 32) >> 6);
1273}
1274
1275void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch) const {
1276auto const &mode = this->mode();
1277
1278if (mode.IDX < 0) // Invalid mode:
1279{
1280for (size_t y = 0; y < 4 && y + dstY < dstHeight; y++) {
1281for (size_t x = 0; x < 4 && x + dstX < dstWidth; x++) {
1282auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
1283out->rgb = {0, 0, 0};
1284out->a = 0;
1285}
1286}
1287return;
1288}
1289
1290using Endpoint = std::array<Color, 2>;
1291std::array<Endpoint, MaxSubsets> subsets;
1292
1293for (size_t i = 0; i < mode.NS; i++) {
1294auto &subset = subsets[i];
1295subset[0].rgb.r = Get(mode.Red(i * 2 + 0));
1296subset[0].rgb.g = Get(mode.Green(i * 2 + 0));
1297subset[0].rgb.b = Get(mode.Blue(i * 2 + 0));
1298subset[0].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 0)) : 255;
1299
1300subset[1].rgb.r = Get(mode.Red(i * 2 + 1));
1301subset[1].rgb.g = Get(mode.Green(i * 2 + 1));
1302subset[1].rgb.b = Get(mode.Blue(i * 2 + 1));
1303subset[1].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 1)) : 255;
1304}
1305
1306if (mode.SPB > 0) {
1307auto pbit0 = Get(mode.SharedPBit0());
1308auto pbit1 = Get(mode.SharedPBit1());
1309subsets[0][0].rgb = (subsets[0][0].rgb << 1) | pbit0;
1310subsets[0][1].rgb = (subsets[0][1].rgb << 1) | pbit0;
1311subsets[1][0].rgb = (subsets[1][0].rgb << 1) | pbit1;
1312subsets[1][1].rgb = (subsets[1][1].rgb << 1) | pbit1;
1313}
1314
1315if (mode.EPB > 0) {
1316for (size_t i = 0; i < mode.NS; i++) {
1317auto &subset = subsets[i];
1318auto pbit0 = Get(mode.EndpointPBit(i * 2 + 0));
1319auto pbit1 = Get(mode.EndpointPBit(i * 2 + 1));
1320subset[0].rgb = (subset[0].rgb << 1) | pbit0;
1321subset[1].rgb = (subset[1].rgb << 1) | pbit1;
1322if (mode.AB > 0) {
1323subset[0].a = (subset[0].a << 1) | pbit0;
1324subset[1].a = (subset[1].a << 1) | pbit1;
1325}
1326}
1327}
1328
1329auto const colorBits = mode.CB + mode.SPB + mode.EPB;
1330auto const alphaBits = mode.AB + mode.SPB + mode.EPB;
1331
1332for (size_t i = 0; i < mode.NS; i++) {
1333auto &subset = subsets[i];
1334subset[0].rgb = subset[0].rgb << (8 - colorBits);
1335subset[1].rgb = subset[1].rgb << (8 - colorBits);
1336subset[0].rgb = subset[0].rgb | (subset[0].rgb >> colorBits);
1337subset[1].rgb = subset[1].rgb | (subset[1].rgb >> colorBits);
1338
1339if (mode.AB > 0) {
1340subset[0].a = subset[0].a << (8 - alphaBits);
1341subset[1].a = subset[1].a << (8 - alphaBits);
1342subset[0].a = subset[0].a | (subset[0].a >> alphaBits);
1343subset[1].a = subset[1].a | (subset[1].a >> alphaBits);
1344}
1345}
1346
1347int colorIndexBitOffset = 0;
1348int alphaIndexBitOffset = 0;
1349for (int y = 0; y < 4; y++) {
1350for (int x = 0; x < 4; x++) {
1351auto texelIdx = y * 4 + x;
1352auto partitionIdx = Get(mode.Partition());
1353assert(partitionIdx < MaxPartitions);
1354auto subsetIdx = subsetIndex(mode, partitionIdx, texelIdx);
1355assert(subsetIdx < MaxSubsets);
1356auto const &subset = subsets[subsetIdx];
1357
1358auto anchorIdx = anchorIndex(mode, partitionIdx, subsetIdx);
1359auto isAnchor = anchorIdx == texelIdx;
1360auto colorIdx = colorIndex(mode, isAnchor, colorIndexBitOffset);
1361auto alphaIdx = alphaIndex(mode, isAnchor, alphaIndexBitOffset);
1362
1363if (y + dstY >= dstHeight || x + dstX >= dstWidth) {
1364// Don't be tempted to skip early at the loops:
1365// The calls to colorIndex() and alphaIndex() adjust bit
1366// offsets that need to be carefully tracked.
1367continue;
1368}
1369
1370Color output;
1371// Note: We flip r and b channels past this point as the texture storage is BGR while the output is RGB
1372output.rgb.r = interpolate(subset[0].rgb.b, subset[1].rgb.b, colorIdx);
1373output.rgb.g = interpolate(subset[0].rgb.g, subset[1].rgb.g, colorIdx);
1374output.rgb.b = interpolate(subset[0].rgb.r, subset[1].rgb.r, colorIdx);
1375output.a = interpolate(subset[0].a, subset[1].a, alphaIdx);
1376
1377switch (Get(mode.Rotation())) {
1378default:
1379break;
1380case 1:
1381std::swap(output.a, output.rgb.b);
1382break;
1383case 2:
1384std::swap(output.a, output.rgb.g);
1385break;
1386case 3:
1387std::swap(output.a, output.rgb.r);
1388break;
1389}
1390
1391auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
1392*out = output;
1393}
1394}
1395}
1396
1397int subsetIndex(const Mode &mode, int partitionIdx, int texelIndex) const {
1398switch (mode.NS) {
1399default:
1400return 0;
1401case 2:
1402return PartitionTable2[partitionIdx][texelIndex];
1403case 3:
1404return PartitionTable3[partitionIdx][texelIndex];
1405}
1406}
1407
1408int anchorIndex(const Mode &mode, int partitionIdx, int subsetIdx) const {
1409// ARB_texture_compression_bptc states:
1410// "In partition zero, the anchor index is always index zero.
1411// In other partitions, the anchor index is specified by tables
1412// Table.A2 and Table.A3.""
1413// Note: This is really confusing - I believe they meant subset instead
1414// of partition here.
1415switch (subsetIdx) {
1416default:
1417return 0;
1418case 1:
1419return mode.NS == 2 ? AnchorTable2[partitionIdx] : AnchorTable3a[partitionIdx];
1420case 2:
1421return AnchorTable3b[partitionIdx];
1422}
1423}
1424
1425IndexInfo colorIndex(const Mode &mode, bool isAnchor,
1426int &indexBitOffset) const {
1427// ARB_texture_compression_bptc states:
1428// "The index value for interpolating color comes from the secondary
1429// index for the texel if the format has an index selection bit and its
1430// value is one and from the primary index otherwise.""
1431auto idx = Get(mode.IndexSelection());
1432assert(idx <= 1);
1433bool secondary = idx == 1;
1434auto numBits = secondary ? mode.IB2 : mode.IB;
1435auto numReadBits = numBits - (isAnchor ? 1 : 0);
1436auto index =
1437Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits)
1438: mode.PrimaryIndex(indexBitOffset, numReadBits));
1439indexBitOffset += numReadBits;
1440return {index, numBits};
1441}
1442
1443IndexInfo alphaIndex(const Mode &mode, bool isAnchor,
1444int &indexBitOffset) const {
1445// ARB_texture_compression_bptc states:
1446// "The alpha index comes from the secondary index if the block has a
1447// secondary index and the block either doesn't have an index selection
1448// bit or that bit is zero and the primary index otherwise."
1449auto idx = Get(mode.IndexSelection());
1450assert(idx <= 1);
1451bool secondary = (mode.IB2 != 0) && (idx == 0);
1452auto numBits = secondary ? mode.IB2 : mode.IB;
1453auto numReadBits = numBits - (isAnchor ? 1 : 0);
1454auto index =
1455Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits)
1456: mode.PrimaryIndex(indexBitOffset, numReadBits));
1457indexBitOffset += numReadBits;
1458return {index, numBits};
1459}
1460
1461// Assumes little-endian
1462uint64_t low;
1463uint64_t high;
1464};
1465
1466} // namespace BC7
1467} // anonymous namespace
1468
1469namespace bcn {
1470constexpr size_t R8Bpp{1}; //!< The amount of bytes per pixel in R8
1471constexpr size_t R8g8Bpp{2}; //!< The amount of bytes per pixel in R8G8
1472constexpr size_t R8g8b8a8Bpp{4}; //!< The amount of bytes per pixel in R8G8B8A8
1473constexpr size_t R16g16b16a16Bpp{8}; //!< The amount of bytes per pixel in R16G16B16
1474
1475void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1476const auto *color{reinterpret_cast<const BC_color *>(src)};
1477size_t pitch{R8g8b8a8Bpp * width};
1478color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, true, false);
1479}
1480
1481void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1482const auto *alpha{reinterpret_cast<const BC_alpha *>(src)};
1483const auto *color{reinterpret_cast<const BC_color *>(src + 8)};
1484size_t pitch{R8g8b8a8Bpp * width};
1485color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true);
1486alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp);
1487}
1488
1489void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1490const auto *alpha{reinterpret_cast<const BC_channel *>(src)};
1491const auto *color{reinterpret_cast<const BC_color *>(src + 8)};
1492size_t pitch{R8g8b8a8Bpp * width};
1493color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true);
1494alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, 3, false);
1495}
1496
1497void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
1498const auto *red{reinterpret_cast<const BC_channel *>(src)};
1499size_t pitch{R8Bpp * width};
1500red->decode(dst, x, y, width, height, pitch, R8Bpp, 0, isSigned);
1501}
1502
1503void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
1504const auto *red{reinterpret_cast<const BC_channel *>(src)};
1505const auto *green{reinterpret_cast<const BC_channel *>(src + 8)};
1506size_t pitch{R8g8Bpp * width};
1507red->decode(dst, x, y, width, height, pitch, R8g8Bpp, 0, isSigned);
1508green->decode(dst, x, y, width, height, pitch, R8g8Bpp, 1, isSigned);
1509}
1510
1511void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
1512const auto *block{reinterpret_cast<const BC6H::Block *>(src)};
1513size_t pitch{R16g16b16a16Bpp * width};
1514block->decode(dst, x, y, width, height, pitch, R16g16b16a16Bpp, isSigned);
1515}
1516
1517void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1518const auto *block{reinterpret_cast<const BC7::Block *>(src)};
1519size_t pitch{R8g8b8a8Bpp * width};
1520block->decode(dst, x, y, width, height, pitch);
1521}
1522}
1523