yuzu

Форк
0
/
bc_decoder.cpp 
1522 строки · 63.1 Кб
1
// SPDX-License-Identifier: MPL-2.0
2
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
3
// Copyright 2019 The SwiftShader Authors. All Rights Reserved.
4

5
// This BCn Decoder is directly derivative of Swiftshader's BCn Decoder found at: https://github.com/google/swiftshader/blob/d070309f7d154d6764cbd514b1a5c8bfcef61d06/src/Device/BC_Decoder.cpp
6
// This file does not follow the Skyline code conventions but has certain Skyline specific code
7
// There are a lot of implicit and narrowing conversions in this file due to this (Warnings are disabled as a result)
8

9
#include <array>
10
#include <assert.h>
11
#include <stddef.h>
12
#include <stdint.h>
13

14
namespace {
15
    constexpr int BlockWidth = 4;
16
    constexpr int BlockHeight = 4;
17

18
    struct BC_color {
19
        void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, bool hasAlphaChannel, bool hasSeparateAlpha) const {
20
            Color c[4];
21
            c[0].extract565(c0);
22
            c[1].extract565(c1);
23
            if (hasSeparateAlpha || (c0 > c1)) {
24
                c[2] = ((c[0] * 2) + c[1]) / 3;
25
                c[3] = ((c[1] * 2) + c[0]) / 3;
26
            } else {
27
                c[2] = (c[0] + c[1]) >> 1;
28
                if (hasAlphaChannel) {
29
                    c[3].clearAlpha();
30
                }
31
            }
32

33
            for (int j = 0; j < BlockHeight && (y + j) < dstH; j++) {
34
                size_t dstOffset = j * dstPitch;
35
                size_t idxOffset = j * BlockHeight;
36
                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, idxOffset++, dstOffset += dstBpp) {
37
                    *reinterpret_cast<unsigned int *>(dst + dstOffset) = c[getIdx(idxOffset)].pack8888();
38
                }
39
            }
40
        }
41

42
      private:
43
        struct Color {
44
            Color() {
45
                c[0] = c[1] = c[2] = 0;
46
                c[3] = 0xFF000000;
47
            }
48

49
            void extract565(const unsigned int c565) {
50
                c[0] = ((c565 & 0x0000001F) << 3) | ((c565 & 0x0000001C) >> 2);
51
                c[1] = ((c565 & 0x000007E0) >> 3) | ((c565 & 0x00000600) >> 9);
52
                c[2] = ((c565 & 0x0000F800) >> 8) | ((c565 & 0x0000E000) >> 13);
53
            }
54

55
            unsigned int pack8888() const {
56
                return ((c[0] & 0xFF) << 16) | ((c[1] & 0xFF) << 8) | (c[2] & 0xFF) | c[3];
57
            }
58

59
            void clearAlpha() {
60
                c[3] = 0;
61
            }
62

63
            Color operator*(int factor) const {
64
                Color res;
65
                for (int i = 0; i < 4; ++i) {
66
                    res.c[i] = c[i] * factor;
67
                }
68
                return res;
69
            }
70

71
            Color operator/(int factor) const {
72
                Color res;
73
                for (int i = 0; i < 4; ++i) {
74
                    res.c[i] = c[i] / factor;
75
                }
76
                return res;
77
            }
78

79
            Color operator>>(int shift) const {
80
                Color res;
81
                for (int i = 0; i < 4; ++i) {
82
                    res.c[i] = c[i] >> shift;
83
                }
84
                return res;
85
            }
86

87
            Color operator+(Color const &obj) const {
88
                Color res;
89
                for (int i = 0; i < 4; ++i) {
90
                    res.c[i] = c[i] + obj.c[i];
91
                }
92
                return res;
93
            }
94

95
          private:
96
            int c[4];
97
        };
98

99
        size_t getIdx(int i) const {
100
            size_t offset = i << 1;  // 2 bytes per index
101
            return (idx & (0x3 << offset)) >> offset;
102
        }
103

104
        unsigned short c0;
105
        unsigned short c1;
106
        unsigned int idx;
107
    };
108
    static_assert(sizeof(BC_color) == 8, "BC_color must be 8 bytes");
109

110
    struct BC_channel {
111
        void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, size_t channel, bool isSigned) const {
112
            int c[8] = {0};
113

114
            if (isSigned) {
115
                c[0] = static_cast<signed char>(data & 0xFF);
116
                c[1] = static_cast<signed char>((data & 0xFF00) >> 8);
117
            } else {
118
                c[0] = static_cast<uint8_t>(data & 0xFF);
119
                c[1] = static_cast<uint8_t>((data & 0xFF00) >> 8);
120
            }
121

122
            if (c[0] > c[1]) {
123
                for (int i = 2; i < 8; ++i) {
124
                    c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7;
125
                }
126
            } else {
127
                for (int i = 2; i < 6; ++i) {
128
                    c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5;
129
                }
130
                c[6] = isSigned ? -128 : 0;
131
                c[7] = isSigned ? 127 : 255;
132
            }
133

134
            for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) {
135
                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) {
136
                    dst[channel + (i * dstBpp) + (j * dstPitch)] = static_cast<uint8_t>(c[getIdx((j * BlockHeight) + i)]);
137
                }
138
            }
139
        }
140

141
      private:
142
        uint8_t getIdx(int i) const {
143
            int offset = i * 3 + 16;
144
            return static_cast<uint8_t>((data & (0x7ull << offset)) >> offset);
145
        }
146

147
        uint64_t data;
148
    };
149
    static_assert(sizeof(BC_channel) == 8, "BC_channel must be 8 bytes");
150

151
    struct BC_alpha {
152
        void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp) const {
153
            dst += 3;  // Write only to alpha (channel 3)
154
            for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++, dst += dstPitch) {
155
                uint8_t *dstRow = dst;
156
                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) {
157
                    *dstRow = getAlpha(j * BlockHeight + i);
158
                }
159
            }
160
        }
161

162
      private:
163
        uint8_t getAlpha(int i) const {
164
            int offset = i << 2;
165
            int alpha = (data & (0xFull << offset)) >> offset;
166
            return static_cast<uint8_t>(alpha | (alpha << 4));
167
        }
168

169
        uint64_t data;
170
    };
171
    static_assert(sizeof(BC_alpha) == 8, "BC_alpha must be 8 bytes");
172

173
    namespace BC6H {
174
        static constexpr int MaxPartitions = 64;
175

176
        // @fmt:off
177

178
        static constexpr uint8_t PartitionTable2[MaxPartitions][16] = {
179
            { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
180
            { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 },
181
            { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 },
182
            { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
183
            { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 },
184
            { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
185
            { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
186
            { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
187
            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 },
188
            { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
189
            { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 },
190
            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 },
191
            { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
192
            { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 },
193
            { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
194
            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 },
195
            { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 },
196
            { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
197
            { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 },
198
            { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
199
            { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
200
            { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
201
            { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
202
            { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 },
203
            { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 },
204
            { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 },
205
            { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 },
206
            { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 },
207
            { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 },
208
            { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
209
            { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 },
210
            { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
211
            { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
212
            { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 },
213
            { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 },
214
            { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 },
215
            { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 },
216
            { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 },
217
            { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
218
            { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 },
219
            { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 },
220
            { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 },
221
            { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 },
222
            { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 },
223
            { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 },
224
            { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 },
225
            { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 },
226
            { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 },
227
            { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
228
            { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 },
229
            { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 },
230
            { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 },
231
            { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 },
232
            { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
233
            { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 },
234
            { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 },
235
            { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 },
236
            { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 },
237
            { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 },
238
            { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 },
239
            { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 },
240
            { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
241
            { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 },
242
            { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 },
243
        };
244

245
        static constexpr uint8_t AnchorTable2[MaxPartitions] = {
246
            0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
247
            0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
248
            0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
249
            0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
250
            0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
251
            0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
252
            0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
253
            0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
254
        };
255

256
    // @fmt:on
257

258
        // 1.0f in half-precision floating point format
259
        static constexpr uint16_t halfFloat1 = 0x3C00;
260
        union Color {
261
            struct RGBA {
262
                uint16_t r = 0;
263
                uint16_t g = 0;
264
                uint16_t b = 0;
265
                uint16_t a = halfFloat1;
266

267
                RGBA(uint16_t r, uint16_t g, uint16_t b)
268
                    : r(r), g(g), b(b) {
269
                }
270

271
                RGBA &operator=(const RGBA &other) {
272
                    this->r = other.r;
273
                    this->g = other.g;
274
                    this->b = other.b;
275
                    this->a = halfFloat1;
276

277
                    return *this;
278
                }
279
            };
280

281
            Color(uint16_t r, uint16_t g, uint16_t b)
282
                : rgba(r, g, b) {
283
            }
284

285
            Color(int r, int g, int b)
286
                : rgba((uint16_t) r, (uint16_t) g, (uint16_t) b) {
287
            }
288

289
            Color() {}
290

291
            Color(const Color &other) {
292
                this->rgba = other.rgba;
293
            }
294

295
            Color &operator=(const Color &other) {
296
                this->rgba = other.rgba;
297

298
                return *this;
299
            }
300

301
            RGBA rgba;
302
            uint16_t channel[4];
303
        };
304
        static_assert(sizeof(Color) == 8, "BC6h::Color must be 8 bytes long");
305

306
        inline int32_t extendSign(int32_t val, size_t size) {
307
            // Suppose we have a 2-bit integer being stored in 4 bit variable:
308
            //    x = 0b00AB
309
            //
310
            // In order to sign extend x, we need to turn the 0s into A's:
311
            //    x_extend = 0bAAAB
312
            //
313
            // We can do that by flipping A in x then subtracting 0b0010 from x.
314
            // Suppose A is 1:
315
            //    x       = 0b001B
316
            //    x_flip  = 0b000B
317
            //    x_minus = 0b111B
318
            // Since A is flipped to 0, subtracting the mask sets it and all the bits above it to 1.
319
            // And if A is 0:
320
            //    x       = 0b000B
321
            //    x_flip  = 0b001B
322
            //    x_minus = 0b000B
323
            // We unset the bit we flipped, and touch no other bit
324
            uint16_t mask = 1u << (size - 1);
325
            return (val ^ mask) - mask;
326
        }
327

328
        static int constexpr RGBfChannels = 3;
329
        struct RGBf {
330
            uint16_t channel[RGBfChannels];
331
            size_t size[RGBfChannels];
332
            bool isSigned;
333

334
            RGBf() {
335
                static_assert(RGBfChannels == 3, "RGBf must have exactly 3 channels");
336
                static_assert(sizeof(channel) / sizeof(channel[0]) == RGBfChannels, "RGBf must have exactly 3 channels");
337
                static_assert(sizeof(channel) / sizeof(channel[0]) == sizeof(size) / sizeof(size[0]), "RGBf requires equally sized arrays for channels and channel sizes");
338

339
                for (int i = 0; i < RGBfChannels; i++) {
340
                    channel[i] = 0;
341
                    size[i] = 0;
342
                }
343

344
                isSigned = false;
345
            }
346

347
            void extendSign() {
348
                for (int i = 0; i < RGBfChannels; i++) {
349
                    channel[i] = BC6H::extendSign(channel[i], size[i]);
350
                }
351
            }
352

353
            // Assuming this is the delta, take the base-endpoint and transform this into
354
            // a proper endpoint.
355
            //
356
            // The final computed endpoint is truncated to the base-endpoint's size;
357
            void resolveDelta(RGBf base) {
358
                for (int i = 0; i < RGBfChannels; i++) {
359
                    size[i] = base.size[i];
360
                    channel[i] = (base.channel[i] + channel[i]) & ((1 << base.size[i]) - 1);
361
                }
362

363
                // Per the spec:
364
                // "For signed formats, the results of the delta calculation must be sign
365
                // extended as well."
366
                if (isSigned) {
367
                    extendSign();
368
                }
369
            }
370

371
            void unquantize() {
372
                if (isSigned) {
373
                    unquantizeSigned();
374
                } else {
375
                    unquantizeUnsigned();
376
                }
377
            }
378

379
            void unquantizeUnsigned() {
380
                for (int i = 0; i < RGBfChannels; i++) {
381
                    if (size[i] >= 15 || channel[i] == 0) {
382
                        continue;
383
                    } else if (channel[i] == ((1u << size[i]) - 1)) {
384
                        channel[i] = 0xFFFFu;
385
                    } else {
386
                        // Need 32 bits to avoid overflow
387
                        uint32_t tmp = channel[i];
388
                        channel[i] = (uint16_t) (((tmp << 16) + 0x8000) >> size[i]);
389
                    }
390
                    size[i] = 16;
391
                }
392
            }
393

394
            void unquantizeSigned() {
395
                for (int i = 0; i < RGBfChannels; i++) {
396
                    if (size[i] >= 16 || channel[i] == 0) {
397
                        continue;
398
                    }
399

400
                    int16_t value = (int16_t)channel[i];
401
                    int32_t result = value;
402
                    bool signBit = value < 0;
403
                    if (signBit) {
404
                        value = -value;
405
                    }
406

407
                    if (value >= ((1 << (size[i] - 1)) - 1)) {
408
                        result = 0x7FFF;
409
                    } else {
410
                        // Need 32 bits to avoid overflow
411
                        int32_t tmp = value;
412
                        result = (((tmp << 15) + 0x4000) >> (size[i] - 1));
413
                    }
414

415
                    if (signBit) {
416
                        result = -result;
417
                    }
418

419
                    channel[i] = (uint16_t) result;
420
                    size[i] = 16;
421
                }
422
            }
423
        };
424

425
        struct Data {
426
            uint64_t low64;
427
            uint64_t high64;
428

429
            Data() = default;
430

431
            Data(uint64_t low64, uint64_t high64)
432
                : low64(low64), high64(high64) {
433
            }
434

435
            // Consumes the lowest N bits from from low64 and high64 where N is:
436
            //      abs(MSB - LSB)
437
            // MSB and LSB come from the block description of the BC6h spec and specify
438
            // the location of the bits in the returned bitstring.
439
            //
440
            // If MSB < LSB, then the bits are reversed. Otherwise, the bitstring is read and
441
            // shifted without further modification.
442
            //
443
            uint32_t consumeBits(uint32_t MSB, uint32_t LSB) {
444
                bool reversed = MSB < LSB;
445
                if (reversed) {
446
                    std::swap(MSB, LSB);
447
                }
448
                assert(MSB - LSB + 1 < sizeof(uint32_t) * 8);
449

450
                uint32_t numBits = MSB - LSB + 1;
451
                uint32_t mask = (1 << numBits) - 1;
452
                // Read the low N bits
453
                uint32_t bits = (low64 & mask);
454

455
                low64 >>= numBits;
456
                // Put the low N bits of high64 into the high 64-N bits of low64
457
                low64 |= (high64 & mask) << (sizeof(high64) * 8 - numBits);
458
                high64 >>= numBits;
459

460
                if (reversed) {
461
                    uint32_t tmp = 0;
462
                    for (uint32_t numSwaps = 0; numSwaps < numBits; numSwaps++) {
463
                        tmp <<= 1;
464
                        tmp |= (bits & 1);
465
                        bits >>= 1;
466
                    }
467

468
                    bits = tmp;
469
                }
470

471
                return bits << LSB;
472
            }
473
        };
474

475
        struct IndexInfo {
476
            uint64_t value;
477
            int numBits;
478
        };
479

480
// Interpolates between two endpoints, then does a final unquantization step
481
        Color interpolate(RGBf e0, RGBf e1, const IndexInfo &index, bool isSigned) {
482
            static constexpr uint32_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
483
            static constexpr uint32_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30,
484
                                                    34, 38, 43, 47, 51, 55, 60, 64};
485
            static constexpr uint32_t const *weightsN[] = {
486
                nullptr, nullptr, nullptr, weights3, weights4
487
            };
488
            auto weights = weightsN[index.numBits];
489
            assert(weights != nullptr);
490
            Color color;
491
            uint32_t e0Weight = 64 - weights[index.value];
492
            uint32_t e1Weight = weights[index.value];
493

494
            for (int i = 0; i < RGBfChannels; i++) {
495
                int32_t e0Channel = e0.channel[i];
496
                int32_t e1Channel = e1.channel[i];
497

498
                if (isSigned) {
499
                    e0Channel = extendSign(e0Channel, 16);
500
                    e1Channel = extendSign(e1Channel, 16);
501
                }
502

503
                int32_t e0Value = e0Channel * e0Weight;
504
                int32_t e1Value = e1Channel * e1Weight;
505

506
                uint32_t tmp = ((e0Value + e1Value + 32) >> 6);
507

508
                // Need to unquantize value to limit it to the legal range of half-precision
509
                // floats. We do this by scaling by 31/32 or 31/64 depending on if the value
510
                // is signed or unsigned.
511
                if (isSigned) {
512
                    tmp = ((tmp & 0x80000000) != 0) ? (((~tmp + 1) * 31) >> 5) | 0x8000 : (tmp * 31) >> 5;
513
                    // Don't return -0.0f, just normalize it to 0.0f.
514
                    if (tmp == 0x8000)
515
                        tmp = 0;
516
                } else {
517
                    tmp = (tmp * 31) >> 6;
518
                }
519

520
                color.channel[i] = (uint16_t) tmp;
521
            }
522

523
            return color;
524
        }
525

526
        enum DataType {
527
            // Endpoints
528
            EP0 = 0,
529
            EP1 = 1,
530
            EP2 = 2,
531
            EP3 = 3,
532
            Mode,
533
            Partition,
534
            End,
535
        };
536

537
        enum Channel {
538
            R = 0,
539
            G = 1,
540
            B = 2,
541
            None,
542
        };
543

544
        struct DeltaBits {
545
            size_t channel[3];
546

547
            constexpr DeltaBits()
548
                : channel{0, 0, 0} {
549
            }
550

551
            constexpr DeltaBits(size_t r, size_t g, size_t b)
552
                : channel{r, g, b} {
553
            }
554
        };
555

556
        struct ModeDesc {
557
            int number;
558
            bool hasDelta;
559
            int partitionCount;
560
            int endpointBits;
561
            DeltaBits deltaBits;
562

563
            constexpr ModeDesc()
564
                : number(-1), hasDelta(false), partitionCount(0), endpointBits(0) {
565
            }
566

567
            constexpr ModeDesc(int number, bool hasDelta, int partitionCount, int endpointBits, DeltaBits deltaBits)
568
                : number(number), hasDelta(hasDelta), partitionCount(partitionCount), endpointBits(endpointBits), deltaBits(deltaBits) {
569
            }
570
        };
571

572
        struct BlockDesc {
573
            DataType type;
574
            Channel channel;
575
            int MSB;
576
            int LSB;
577
            ModeDesc modeDesc;
578

579
            constexpr BlockDesc()
580
                : type(End), channel(None), MSB(0), LSB(0), modeDesc() {
581
            }
582

583
            constexpr BlockDesc(const DataType type, Channel channel, int MSB, int LSB, ModeDesc modeDesc)
584
                : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc(modeDesc) {
585
            }
586

587
            constexpr BlockDesc(DataType type, Channel channel, int MSB, int LSB)
588
                : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc() {
589
            }
590
        };
591

592
// Turns a legal mode into an index into the BlockDesc table.
593
// Illegal or reserved modes return -1.
594
        static int modeToIndex(uint8_t mode) {
595
            if (mode <= 3) {
596
                return mode;
597
            } else if ((mode & 0x2) != 0) {
598
                if (mode <= 18) {
599
// Turns 6 into 4, 7 into 5, 10 into 6, etc.
600
                    return (mode / 2) + 1 + (mode & 0x1);
601
                } else if (mode == 22 || mode == 26 || mode == 30) {
602
// Turns 22 into 11, 26 into 12, etc.
603
                    return mode / 4 + 6;
604
                }
605
            }
606

607
            return -1;
608
        }
609

610
// Returns a description of the bitfields for each mode from the LSB
611
// to the MSB before the index data starts.
612
//
613
// The numbers come from the BC6h block description. Each BlockDesc in the
614
//   {Type, Channel, MSB, LSB}
615
//   * Type describes which endpoint this is, or if this is a mode, a partition
616
//     number, or the end of the block description.
617
//   * Channel describes one of the 3 color channels within an endpoint
618
//   * MSB and LSB specificy:
619
//      * The size of the bitfield being read
620
//      * The position of the bitfield within the variable it is being read to
621
//      * If the bitfield is stored in reverse bit order
622
//     If MSB < LSB then the bitfield is stored in reverse order. The size of
623
//     the bitfield is abs(MSB-LSB+1). And the position of the bitfield within
624
//     the variable is min(LSB, MSB).
625
//
626
// Invalid or reserved modes return an empty list.
627
        static constexpr int NumBlocks = 14;
628
// The largest number of descriptions within a block.
629
        static constexpr int MaxBlockDescIndex = 26;
630
        static constexpr BlockDesc blockDescs[NumBlocks][MaxBlockDescIndex] = {
631
// @fmt:off
632
// Mode 0, Index 0
633
{
634
{ Mode, None, 1, 0, { 0, true, 2, 10, { 5, 5, 5 } } },
635
{ EP2, G, 4, 4 }, { EP2, B, 4, 4 }, { EP3, B, 4, 4 },
636
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
637
{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
638
{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
639
{ EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
640
{ EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
641
{ EP3, B, 3, 3 },
642
{ Partition, None, 4, 0 },
643
{ End, None, 0, 0},
644
},
645
// Mode 1, Index 1
646
{
647
{ Mode, None, 1, 0, { 1, true, 2, 7, { 6, 6, 6 } } },
648
{ EP2, G, 5, 5 }, { EP3, G, 5, 4 }, { EP0, R, 6, 0 },
649
{ EP3, B, 1, 0 }, { EP2, B, 4, 4 }, { EP0, G, 6, 0 },
650
{ EP2, B, 5, 5 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
651
{ EP0, B, 6, 0 }, { EP3, B, 3, 3 }, { EP3, B, 5, 5 },
652
{ EP3, B, 4, 4 }, { EP1, R, 5, 0 }, { EP2, G, 3, 0 },
653
{ EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 5, 0 },
654
{ EP2, B, 3, 0 }, { EP2, R, 5, 0 }, { EP3, R, 5, 0 },
655
{ Partition, None, 4, 0 },
656
{ End, None, 0, 0},
657
},
658
// Mode 2, Index 2
659
{
660
{ Mode, None, 4, 0, { 2, true, 2, 11, { 5, 4, 4 } } },
661
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
662
{ EP1, R, 4, 0 }, { EP0, R, 10, 10 }, { EP2, G, 3, 0 },
663
{ EP1, G, 3, 0 }, { EP0, G, 10, 10 }, { EP3, B, 0, 0 },
664
{ EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
665
{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
666
{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
667
{ Partition, None, 4, 0 },
668
{ End, None, 0, 0},
669
},
670
// Mode 3, Index 3
671
{
672
{ Mode, None, 4, 0, { 3, false, 1, 10, { 0, 0, 0 } } },
673
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
674
{ EP1, R, 9, 0 }, { EP1, G, 9, 0 }, { EP1, B, 9, 0 },
675
{ End, None, 0, 0},
676
},
677
// Mode 6, Index 4
678
{
679
{ Mode, None, 4, 0, { 6, true, 2, 11, { 4, 5, 4 } } }, // 1 1
680
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
681
{ EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP3, G, 4, 4 },
682
{ EP2, G, 3, 0 }, { EP1, G, 4, 0 }, { EP0, G, 10, 10 },
683
{ EP3, G, 3, 0 }, { EP1, B, 3, 0 }, { EP0, B, 10, 10 },
684
{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
685
{ EP3, B, 0, 0 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 }, // 18 19
686
{ EP2, G, 4, 4 }, { EP3, B, 3, 3 }, // 2 21
687
{ Partition, None, 4, 0 },
688
{ End, None, 0, 0},
689
},
690
// Mode 7, Index 5
691
{
692
{ Mode, None, 4, 0, { 7, true, 1, 11, { 9, 9, 9 } } },
693
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
694
{ EP1, R, 8, 0 }, { EP0, R, 10, 10 }, { EP1, G, 8, 0 },
695
{ EP0, G, 10, 10 }, { EP1, B, 8, 0 }, { EP0, B, 10, 10 },
696
{ End, None, 0, 0},
697
},
698
// Mode 10, Index 6
699
{
700
{ Mode, None, 4, 0, { 10, true, 2, 11, { 4, 4, 5 } } },
701
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
702
{ EP1, R, 3, 0 }, { EP0, R, 10, 10 }, { EP2, B, 4, 4 },
703
{ EP2, G, 3, 0 }, { EP1, G, 3, 0 }, { EP0, G, 10, 10 },
704
{ EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
705
{ EP0, B, 10, 10 }, { EP2, B, 3, 0 }, { EP2, R, 3, 0 },
706
{ EP3, B, 1, 1 }, { EP3, B, 2, 2 }, { EP3, R, 3, 0 },
707
{ EP3, B, 4, 4 }, { EP3, B, 3, 3 },
708
{ Partition, None, 4, 0 },
709
{ End, None, 0, 0},
710
},
711
// Mode 11, Index 7
712
{
713
{ Mode, None, 4, 0, { 11, true, 1, 12, { 8, 8, 8 } } },
714
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
715
{ EP1, R, 7, 0 }, { EP0, R, 10, 11 }, { EP1, G, 7, 0 },
716
{ EP0, G, 10, 11 }, { EP1, B, 7, 0 }, { EP0, B, 10, 11 },
717
{ End, None, 0, 0},
718
},
719
// Mode 14, Index 8
720
{
721
{ Mode, None, 4, 0, { 14, true, 2, 9, { 5, 5, 5 } } },
722
{ EP0, R, 8, 0 }, { EP2, B, 4, 4 }, { EP0, G, 8, 0 },
723
{ EP2, G, 4, 4 }, { EP0, B, 8, 0 }, { EP3, B, 4, 4 },
724
{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
725
{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
726
{ EP1, B, 4, 0 }, { EP3, B, 1, 1 }, { EP2, B, 3, 0 },
727
{ EP2, R, 4, 0 }, { EP3, B, 2, 2 }, { EP3, R, 4, 0 },
728
{ EP3, B, 3, 3 },
729
{ Partition, None, 4, 0 },
730
{ End, None, 0, 0},
731
},
732
// Mode 15, Index 9
733
{
734
{ Mode, None, 4, 0, { 15, true, 1, 16, { 4, 4, 4 } } },
735
{ EP0, R, 9, 0 }, { EP0, G, 9, 0 }, { EP0, B, 9, 0 },
736
{ EP1, R, 3, 0 }, { EP0, R, 10, 15 }, { EP1, G, 3, 0 },
737
{ EP0, G, 10, 15 }, { EP1, B, 3, 0 }, { EP0, B, 10, 15 },
738
{ End, None, 0, 0},
739
},
740
// Mode 18, Index 10
741
{
742
{ Mode, None, 4, 0, { 18, true, 2, 8, { 6, 5, 5 } } },
743
{ EP0, R, 7, 0 }, { EP3, G, 4, 4 }, { EP2, B, 4, 4 },
744
{ EP0, G, 7, 0 }, { EP3, B, 2, 2 }, { EP2, G, 4, 4 },
745
{ EP0, B, 7, 0 }, { EP3, B, 3, 3 }, { EP3, B, 4, 4 },
746
{ EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 4, 0 },
747
{ EP3, B, 0, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
748
{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 5, 0 },
749
{ EP3, R, 5, 0 },
750
{ Partition, None, 4, 0 },
751
{ End, None, 0, 0},
752
},
753
// Mode 22, Index 11
754
{
755
{ Mode, None, 4, 0, { 22, true, 2, 8, { 5, 6, 5 } } },
756
{ EP0, R, 7, 0 }, { EP3, B, 0, 0 }, { EP2, B, 4, 4 },
757
{ EP0, G, 7, 0 }, { EP2, G, 5, 5 }, { EP2, G, 4, 4 },
758
{ EP0, B, 7, 0 }, { EP3, G, 5, 5 }, { EP3, B, 4, 4 },
759
{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
760
{ EP1, G, 5, 0 }, { EP3, G, 3, 0 }, { EP1, B, 4, 0 },
761
{ EP3, B, 1, 1 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
762
{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
763
{ Partition, None, 4, 0 },
764
{ End, None, 0, 0},
765
},
766
// Mode 26, Index 12
767
{
768
{ Mode, None, 4, 0, { 26, true, 2, 8, { 5, 5, 6 } } },
769
{ EP0, R, 7, 0 }, { EP3, B, 1, 1 }, { EP2, B, 4, 4 },
770
{ EP0, G, 7, 0 }, { EP2, B, 5, 5 }, { EP2, G, 4, 4 },
771
{ EP0, B, 7, 0 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
772
{ EP1, R, 4, 0 }, { EP3, G, 4, 4 }, { EP2, G, 3, 0 },
773
{ EP1, G, 4, 0 }, { EP3, B, 0, 0 }, { EP3, G, 3, 0 },
774
{ EP1, B, 5, 0 }, { EP2, B, 3, 0 }, { EP2, R, 4, 0 },
775
{ EP3, B, 2, 2 }, { EP3, R, 4, 0 }, { EP3, B, 3, 3 },
776
{ Partition, None, 4, 0 },
777
{ End, None, 0, 0},
778
},
779
// Mode 30, Index 13
780
{
781
{ Mode, None, 4, 0, { 30, false, 2, 6, { 0, 0, 0 } } },
782
{ EP0, R, 5, 0 }, { EP3, G, 4, 4 }, { EP3, B, 0, 0 },
783
{ EP3, B, 1, 1 }, { EP2, B, 4, 4 }, { EP0, G, 5, 0 },
784
{ EP2, G, 5, 5 }, { EP2, B, 5, 5 }, { EP3, B, 2, 2 },
785
{ EP2, G, 4, 4 }, { EP0, B, 5, 0 }, { EP3, G, 5, 5 },
786
{ EP3, B, 3, 3 }, { EP3, B, 5, 5 }, { EP3, B, 4, 4 },
787
{ EP1, R, 5, 0 }, { EP2, G, 3, 0 }, { EP1, G, 5, 0 },
788
{ EP3, G, 3, 0 }, { EP1, B, 5, 0 }, { EP2, B, 3, 0 },
789
{ EP2, R, 5, 0 }, { EP3, R, 5, 0 },
790
{ Partition, None, 4, 0 },
791
{ End, None, 0, 0},
792
}
793
// @fmt:on
794
        };
795

796
        struct Block {
797
            uint64_t low64;
798
            uint64_t high64;
799

800
            void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch, size_t dstBpp, bool isSigned) const {
801
                uint8_t mode = 0;
802
                Data data(low64, high64);
803
                assert(dstBpp == sizeof(Color));
804

805
                if ((data.low64 & 0x2) == 0) {
806
                    mode = data.consumeBits(1, 0);
807
                } else {
808
                    mode = data.consumeBits(4, 0);
809
                }
810

811
                int blockIndex = modeToIndex(mode);
812
                // Handle illegal or reserved mode
813
                if (blockIndex == -1) {
814
                    for (int y = 0; y < 4 && y + dstY < dstHeight; y++) {
815
                        for (int x = 0; x < 4 && x + dstX < dstWidth; x++) {
816
                            auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
817
                            out->rgba = {0, 0, 0};
818
                        }
819
                    }
820
                    return;
821
                }
822
                const BlockDesc *blockDesc = blockDescs[blockIndex];
823

824
                RGBf e[4];
825
                e[0].isSigned = e[1].isSigned = e[2].isSigned = e[3].isSigned = isSigned;
826

827
                int partition = 0;
828
                ModeDesc modeDesc;
829
                for (int index = 0; blockDesc[index].type != End; index++) {
830
                    const BlockDesc desc = blockDesc[index];
831

832
                    switch (desc.type) {
833
                        case Mode:
834
                            modeDesc = desc.modeDesc;
835
                            assert(modeDesc.number == mode);
836

837
                            e[0].size[0] = e[0].size[1] = e[0].size[2] = modeDesc.endpointBits;
838
                            for (int i = 0; i < RGBfChannels; i++) {
839
                                if (modeDesc.hasDelta) {
840
                                    e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.deltaBits.channel[i];
841
                                } else {
842
                                    e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.endpointBits;
843
                                }
844
                            }
845
                            break;
846
                        case Partition:
847
                            partition |= data.consumeBits(desc.MSB, desc.LSB);
848
                            break;
849
                        case EP0:
850
                        case EP1:
851
                        case EP2:
852
                        case EP3:
853
                            e[desc.type].channel[desc.channel] |= data.consumeBits(desc.MSB, desc.LSB);
854
                            break;
855
                        default:
856
                            assert(false);
857
                            return;
858
                    }
859
                }
860

861
                // Sign extension
862
                if (isSigned) {
863
                    for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) {
864
                        e[ep].extendSign();
865
                    }
866
                } else if (modeDesc.hasDelta) {
867
                    // Don't sign-extend the base endpoint in an unsigned format.
868
                    for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) {
869
                        e[ep].extendSign();
870
                    }
871
                }
872

873
                // Turn the deltas into endpoints
874
                if (modeDesc.hasDelta) {
875
                    for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) {
876
                        e[ep].resolveDelta(e[0]);
877
                    }
878
                }
879

880
                for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) {
881
                    e[ep].unquantize();
882
                }
883

884
                // Get the indices, calculate final colors, and output
885
                for (int y = 0; y < 4; y++) {
886
                    for (int x = 0; x < 4; x++) {
887
                        int pixelNum = x + y * 4;
888
                        IndexInfo idx;
889
                        bool isAnchor = false;
890
                        int firstEndpoint = 0;
891
                        // Bc6H can have either 1 or 2 petitions depending on the mode.
892
                        // The number of petitions affects the number of indices with implicit
893
                        // leading 0 bits and the number of bits per index.
894
                        if (modeDesc.partitionCount == 1) {
895
                            idx.numBits = 4;
896
                            // There's an implicit leading 0 bit for the first idx
897
                            isAnchor = (pixelNum == 0);
898
                        } else {
899
                            idx.numBits = 3;
900
                            // There are 2 indices with implicit leading 0-bits.
901
                            isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition]));
902
                            firstEndpoint = PartitionTable2[partition][pixelNum] * 2;
903
                        }
904

905
                        idx.value = data.consumeBits(idx.numBits - isAnchor - 1, 0);
906

907
                        // Don't exit the loop early, we need to consume these index bits regardless if
908
                        // we actually output them or not.
909
                        if ((y + dstY >= dstHeight) || (x + dstX >= dstWidth)) {
910
                            continue;
911
                        }
912

913
                        Color color = interpolate(e[firstEndpoint], e[firstEndpoint + 1], idx, isSigned);
914
                        auto out = reinterpret_cast<Color *>(dst + dstBpp * x + dstPitch * y);
915
                        *out = color;
916
                    }
917
                }
918
            }
919
        };
920

921
    }  // namespace BC6H
922

923
    namespace BC7 {
924
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt
925
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format
926

927
        struct Bitfield {
928
            int offset;
929
            int count;
930

931
            constexpr Bitfield Then(const int bits) { return {offset + count, bits}; }
932

933
            constexpr bool operator==(const Bitfield &rhs) {
934
                return offset == rhs.offset && count == rhs.count;
935
            }
936
        };
937

938
        struct Mode {
939
            const int IDX;  // Mode index
940
            const int NS;   // Number of subsets in each partition
941
            const int PB;   // Partition bits
942
            const int RB;   // Rotation bits
943
            const int ISB;  // Index selection bits
944
            const int CB;   // Color bits
945
            const int AB;   // Alpha bits
946
            const int EPB;  // Endpoint P-bits
947
            const int SPB;  // Shared P-bits
948
            const int IB;   // Primary index bits per element
949
            const int IBC;  // Primary index bits total
950
            const int IB2;  // Secondary index bits per element
951

952
            constexpr int NumColors() const { return NS * 2; }
953

954
            constexpr Bitfield Partition() const { return {IDX + 1, PB}; }
955

956
            constexpr Bitfield Rotation() const { return Partition().Then(RB); }
957

958
            constexpr Bitfield IndexSelection() const { return Rotation().Then(ISB); }
959

960
            constexpr Bitfield Red(int idx) const {
961
                return IndexSelection().Then(CB * idx).Then(CB);
962
            }
963

964
            constexpr Bitfield Green(int idx) const {
965
                return Red(NumColors() - 1).Then(CB * idx).Then(CB);
966
            }
967

968
            constexpr Bitfield Blue(int idx) const {
969
                return Green(NumColors() - 1).Then(CB * idx).Then(CB);
970
            }
971

972
            constexpr Bitfield Alpha(int idx) const {
973
                return Blue(NumColors() - 1).Then(AB * idx).Then(AB);
974
            }
975

976
            constexpr Bitfield EndpointPBit(int idx) const {
977
                return Alpha(NumColors() - 1).Then(EPB * idx).Then(EPB);
978
            }
979

980
            constexpr Bitfield SharedPBit0() const {
981
                return EndpointPBit(NumColors() - 1).Then(SPB);
982
            }
983

984
            constexpr Bitfield SharedPBit1() const {
985
                return SharedPBit0().Then(SPB);
986
            }
987

988
            constexpr Bitfield PrimaryIndex(int offset, int count) const {
989
                return SharedPBit1().Then(offset).Then(count);
990
            }
991

992
            constexpr Bitfield SecondaryIndex(int offset, int count) const {
993
                return SharedPBit1().Then(IBC + offset).Then(count);
994
            }
995
        };
996

997
        static constexpr Mode Modes[] = {
998
            //     IDX  NS   PB   RB   ISB  CB   AB   EPB  SPB  IB   IBC, IB2
999
            /**/ {0x0, 0x3, 0x4, 0x0, 0x0, 0x4, 0x0, 0x1, 0x0, 0x3, 0x2d, 0x0},
1000
/**/ {0x1, 0x2, 0x6, 0x0, 0x0, 0x6, 0x0, 0x0, 0x1, 0x3, 0x2e, 0x0},
1001
/**/ {0x2, 0x3, 0x6, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x1d, 0x0},
1002
/**/ {0x3, 0x2, 0x6, 0x0, 0x0, 0x7, 0x0, 0x1, 0x0, 0x2, 0x1e, 0x0},
1003
/**/ {0x4, 0x1, 0x0, 0x2, 0x1, 0x5, 0x6, 0x0, 0x0, 0x2, 0x1f, 0x3},
1004
/**/ {0x5, 0x1, 0x0, 0x2, 0x0, 0x7, 0x8, 0x0, 0x0, 0x2, 0x1f, 0x2},
1005
/**/ {0x6, 0x1, 0x0, 0x0, 0x0, 0x7, 0x7, 0x1, 0x0, 0x4, 0x3f, 0x0},
1006
/**/ {0x7, 0x2, 0x6, 0x0, 0x0, 0x5, 0x5, 0x1, 0x0, 0x2, 0x1e, 0x0},
1007
/**/ {-1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x00, 0x0},
1008
        };
1009

1010
        static constexpr int MaxPartitions = 64;
1011
        static constexpr int MaxSubsets = 3;
1012

1013
        static constexpr uint8_t PartitionTable2[MaxPartitions][16] = {
1014
            {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
1015
            {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1},
1016
            {0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1},
1017
            {0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1},
1018
            {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1},
1019
            {0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
1020
            {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
1021
            {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1},
1022
            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1},
1023
            {0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1024
            {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
1025
            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1},
1026
            {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1027
            {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
1028
            {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1029
            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
1030
            {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1},
1031
            {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
1032
            {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0},
1033
            {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0},
1034
            {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
1035
            {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0},
1036
            {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
1037
            {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1},
1038
            {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0},
1039
            {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
1040
            {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0},
1041
            {0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0},
1042
            {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0},
1043
            {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
1044
            {0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0},
1045
            {0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0},
1046
            {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1},
1047
            {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1},
1048
            {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0},
1049
            {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0},
1050
            {0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0},
1051
            {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0},
1052
            {0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1},
1053
            {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1},
1054
            {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0},
1055
            {0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0},
1056
            {0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0},
1057
            {0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0},
1058
            {0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
1059
            {0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1},
1060
            {0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1},
1061
            {0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0},
1062
            {0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0},
1063
            {0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0},
1064
            {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0},
1065
            {0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0},
1066
            {0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1},
1067
            {0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1},
1068
            {0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0},
1069
            {0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0},
1070
            {0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1},
1071
            {0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1},
1072
            {0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1},
1073
            {0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1},
1074
            {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
1075
            {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
1076
            {0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0},
1077
            {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1},
1078
        };
1079

1080
        static constexpr uint8_t PartitionTable3[MaxPartitions][16] = {
1081
            {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2},
1082
            {0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1},
1083
            {0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1},
1084
            {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1},
1085
            {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2},
1086
            {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2},
1087
            {0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1},
1088
            {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1},
1089
            {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2},
1090
            {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2},
1091
            {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2},
1092
            {0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2},
1093
            {0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2},
1094
            {0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2},
1095
            {0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2},
1096
            {0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0},
1097
            {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2},
1098
            {0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0},
1099
            {0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2},
1100
            {0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1},
1101
            {0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2},
1102
            {0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1},
1103
            {0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2},
1104
            {0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0},
1105
            {0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0},
1106
            {0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2},
1107
            {0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0},
1108
            {0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1},
1109
            {0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2},
1110
            {0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2},
1111
            {0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1},
1112
            {0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1},
1113
            {0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2},
1114
            {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1},
1115
            {0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2},
1116
            {0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0},
1117
            {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0},
1118
            {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
1119
            {0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0},
1120
            {0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1},
1121
            {0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1},
1122
            {0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2},
1123
            {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1},
1124
            {0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2},
1125
            {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1},
1126
            {0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1},
1127
            {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1},
1128
            {0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1},
1129
            {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2},
1130
            {0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1},
1131
            {0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2},
1132
            {0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2},
1133
            {0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2},
1134
            {0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2},
1135
            {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2},
1136
            {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2},
1137
            {0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2},
1138
            {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2},
1139
            {0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2},
1140
            {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2},
1141
            {0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1},
1142
            {0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2},
1143
            {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2},
1144
            {0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0},
1145
        };
1146

1147
        static constexpr uint8_t AnchorTable2[MaxPartitions] = {
1148
// @fmt:off
1149
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
1150
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
1151
0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
1152
0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
1153
0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
1154
0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
1155
0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
1156
0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
1157
// @fmt:on
1158
        };
1159

1160
        static constexpr uint8_t AnchorTable3a[MaxPartitions] = {
1161
// @fmt:off
1162
0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf,
1163
0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3,
1164
0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa,
1165
0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf,
1166
0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf,
1167
0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf,
1168
0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa,
1169
0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3,
1170
// @fmt:on
1171
        };
1172

1173
        static constexpr uint8_t AnchorTable3b[MaxPartitions] = {
1174
// @fmt:off
1175
0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8,
1176
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8,
1177
0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8,
1178
0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8,
1179
0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa,
1180
0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8,
1181
0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
1182
0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8,
1183
// @fmt:on
1184
        };
1185

1186
        struct Color {
1187
            struct RGB {
1188
                RGB() = default;
1189

1190
                RGB(uint8_t r, uint8_t g, uint8_t b)
1191
                    : b(b), g(g), r(r) {}
1192

1193
                RGB(int r, int g, int b)
1194
                    : b(static_cast<uint8_t>(b)), g(static_cast<uint8_t>(g)), r(static_cast<uint8_t>(r)) {}
1195

1196
                RGB operator<<(int shift) const { return {r << shift, g << shift, b << shift}; }
1197

1198
                RGB operator>>(int shift) const { return {r >> shift, g >> shift, b >> shift}; }
1199

1200
                RGB operator|(int bits) const { return {r | bits, g | bits, b | bits}; }
1201

1202
                RGB operator|(const RGB &rhs) const { return {r | rhs.r, g | rhs.g, b | rhs.b}; }
1203

1204
                RGB operator+(const RGB &rhs) const { return {r + rhs.r, g + rhs.g, b + rhs.b}; }
1205

1206
                uint8_t b;
1207
                uint8_t g;
1208
                uint8_t r;
1209
            };
1210

1211
            RGB rgb;
1212
            uint8_t a;
1213
        };
1214

1215
        static_assert(sizeof(Color) == 4, "Color size must be 4 bytes");
1216

1217
        struct Block {
1218
            constexpr uint64_t Get(const Bitfield &bf) const {
1219
                uint64_t mask = (1ULL << bf.count) - 1;
1220
                if (bf.offset + bf.count <= 64) {
1221
                    return (low >> bf.offset) & mask;
1222
                }
1223
                if (bf.offset >= 64) {
1224
                    return (high >> (bf.offset - 64)) & mask;
1225
                }
1226
                return ((low >> bf.offset) | (high << (64 - bf.offset))) & mask;
1227
            }
1228

1229
            const Mode &mode() const {
1230
                if ((low & 0b00000001) != 0) {
1231
                    return Modes[0];
1232
                }
1233
                if ((low & 0b00000010) != 0) {
1234
                    return Modes[1];
1235
                }
1236
                if ((low & 0b00000100) != 0) {
1237
                    return Modes[2];
1238
                }
1239
                if ((low & 0b00001000) != 0) {
1240
                    return Modes[3];
1241
                }
1242
                if ((low & 0b00010000) != 0) {
1243
                    return Modes[4];
1244
                }
1245
                if ((low & 0b00100000) != 0) {
1246
                    return Modes[5];
1247
                }
1248
                if ((low & 0b01000000) != 0) {
1249
                    return Modes[6];
1250
                }
1251
                if ((low & 0b10000000) != 0) {
1252
                    return Modes[7];
1253
                }
1254
                return Modes[8];  // Invalid mode
1255
            }
1256

1257
            struct IndexInfo {
1258
                uint64_t value;
1259
                int numBits;
1260
            };
1261

1262
            uint8_t interpolate(uint8_t e0, uint8_t e1, const IndexInfo &index) const {
1263
                static constexpr uint16_t weights2[] = {0, 21, 43, 64};
1264
                static constexpr uint16_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
1265
                static constexpr uint16_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30,
1266
                                                        34, 38, 43, 47, 51, 55, 60, 64};
1267
                static constexpr uint16_t const *weightsN[] = {
1268
                    nullptr, nullptr, weights2, weights3, weights4
1269
                };
1270
                auto weights = weightsN[index.numBits];
1271
                assert(weights != nullptr);
1272
                return (uint8_t) (((64 - weights[index.value]) * uint16_t(e0) + weights[index.value] * uint16_t(e1) + 32) >> 6);
1273
            }
1274

1275
            void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch) const {
1276
                auto const &mode = this->mode();
1277

1278
                if (mode.IDX < 0)  // Invalid mode:
1279
                {
1280
                    for (size_t y = 0; y < 4 && y + dstY < dstHeight; y++) {
1281
                        for (size_t x = 0; x < 4 && x + dstX < dstWidth; x++) {
1282
                            auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
1283
                            out->rgb = {0, 0, 0};
1284
                            out->a = 0;
1285
                        }
1286
                    }
1287
                    return;
1288
                }
1289

1290
                using Endpoint = std::array<Color, 2>;
1291
                std::array<Endpoint, MaxSubsets> subsets;
1292

1293
                for (size_t i = 0; i < mode.NS; i++) {
1294
                    auto &subset = subsets[i];
1295
                    subset[0].rgb.r = Get(mode.Red(i * 2 + 0));
1296
                    subset[0].rgb.g = Get(mode.Green(i * 2 + 0));
1297
                    subset[0].rgb.b = Get(mode.Blue(i * 2 + 0));
1298
                    subset[0].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 0)) : 255;
1299

1300
                    subset[1].rgb.r = Get(mode.Red(i * 2 + 1));
1301
                    subset[1].rgb.g = Get(mode.Green(i * 2 + 1));
1302
                    subset[1].rgb.b = Get(mode.Blue(i * 2 + 1));
1303
                    subset[1].a = (mode.AB > 0) ? Get(mode.Alpha(i * 2 + 1)) : 255;
1304
                }
1305

1306
                if (mode.SPB > 0) {
1307
                    auto pbit0 = Get(mode.SharedPBit0());
1308
                    auto pbit1 = Get(mode.SharedPBit1());
1309
                    subsets[0][0].rgb = (subsets[0][0].rgb << 1) | pbit0;
1310
                    subsets[0][1].rgb = (subsets[0][1].rgb << 1) | pbit0;
1311
                    subsets[1][0].rgb = (subsets[1][0].rgb << 1) | pbit1;
1312
                    subsets[1][1].rgb = (subsets[1][1].rgb << 1) | pbit1;
1313
                }
1314

1315
                if (mode.EPB > 0) {
1316
                    for (size_t i = 0; i < mode.NS; i++) {
1317
                        auto &subset = subsets[i];
1318
                        auto pbit0 = Get(mode.EndpointPBit(i * 2 + 0));
1319
                        auto pbit1 = Get(mode.EndpointPBit(i * 2 + 1));
1320
                        subset[0].rgb = (subset[0].rgb << 1) | pbit0;
1321
                        subset[1].rgb = (subset[1].rgb << 1) | pbit1;
1322
                        if (mode.AB > 0) {
1323
                            subset[0].a = (subset[0].a << 1) | pbit0;
1324
                            subset[1].a = (subset[1].a << 1) | pbit1;
1325
                        }
1326
                    }
1327
                }
1328

1329
                auto const colorBits = mode.CB + mode.SPB + mode.EPB;
1330
                auto const alphaBits = mode.AB + mode.SPB + mode.EPB;
1331

1332
                for (size_t i = 0; i < mode.NS; i++) {
1333
                    auto &subset = subsets[i];
1334
                    subset[0].rgb = subset[0].rgb << (8 - colorBits);
1335
                    subset[1].rgb = subset[1].rgb << (8 - colorBits);
1336
                    subset[0].rgb = subset[0].rgb | (subset[0].rgb >> colorBits);
1337
                    subset[1].rgb = subset[1].rgb | (subset[1].rgb >> colorBits);
1338

1339
                    if (mode.AB > 0) {
1340
                        subset[0].a = subset[0].a << (8 - alphaBits);
1341
                        subset[1].a = subset[1].a << (8 - alphaBits);
1342
                        subset[0].a = subset[0].a | (subset[0].a >> alphaBits);
1343
                        subset[1].a = subset[1].a | (subset[1].a >> alphaBits);
1344
                    }
1345
                }
1346

1347
                int colorIndexBitOffset = 0;
1348
                int alphaIndexBitOffset = 0;
1349
                for (int y = 0; y < 4; y++) {
1350
                    for (int x = 0; x < 4; x++) {
1351
                        auto texelIdx = y * 4 + x;
1352
                        auto partitionIdx = Get(mode.Partition());
1353
                        assert(partitionIdx < MaxPartitions);
1354
                        auto subsetIdx = subsetIndex(mode, partitionIdx, texelIdx);
1355
                        assert(subsetIdx < MaxSubsets);
1356
                        auto const &subset = subsets[subsetIdx];
1357

1358
                        auto anchorIdx = anchorIndex(mode, partitionIdx, subsetIdx);
1359
                        auto isAnchor = anchorIdx == texelIdx;
1360
                        auto colorIdx = colorIndex(mode, isAnchor, colorIndexBitOffset);
1361
                        auto alphaIdx = alphaIndex(mode, isAnchor, alphaIndexBitOffset);
1362

1363
                        if (y + dstY >= dstHeight || x + dstX >= dstWidth) {
1364
                            // Don't be tempted to skip early at the loops:
1365
                            // The calls to colorIndex() and alphaIndex() adjust bit
1366
                            // offsets that need to be carefully tracked.
1367
                            continue;
1368
                        }
1369

1370
                        Color output;
1371
                        // Note: We flip r and b channels past this point as the texture storage is BGR while the output is RGB
1372
                        output.rgb.r = interpolate(subset[0].rgb.b, subset[1].rgb.b, colorIdx);
1373
                        output.rgb.g = interpolate(subset[0].rgb.g, subset[1].rgb.g, colorIdx);
1374
                        output.rgb.b = interpolate(subset[0].rgb.r, subset[1].rgb.r, colorIdx);
1375
                        output.a = interpolate(subset[0].a, subset[1].a, alphaIdx);
1376

1377
                        switch (Get(mode.Rotation())) {
1378
                            default:
1379
                                break;
1380
                            case 1:
1381
                                std::swap(output.a, output.rgb.b);
1382
                                break;
1383
                            case 2:
1384
                                std::swap(output.a, output.rgb.g);
1385
                                break;
1386
                            case 3:
1387
                                std::swap(output.a, output.rgb.r);
1388
                                break;
1389
                        }
1390

1391
                        auto out = reinterpret_cast<Color *>(dst + sizeof(Color) * x + dstPitch * y);
1392
                        *out = output;
1393
                    }
1394
                }
1395
            }
1396

1397
            int subsetIndex(const Mode &mode, int partitionIdx, int texelIndex) const {
1398
                switch (mode.NS) {
1399
                    default:
1400
                        return 0;
1401
                    case 2:
1402
                        return PartitionTable2[partitionIdx][texelIndex];
1403
                    case 3:
1404
                        return PartitionTable3[partitionIdx][texelIndex];
1405
                }
1406
            }
1407

1408
            int anchorIndex(const Mode &mode, int partitionIdx, int subsetIdx) const {
1409
                // ARB_texture_compression_bptc states:
1410
                // "In partition zero, the anchor index is always index zero.
1411
                // In other partitions, the anchor index is specified by tables
1412
                // Table.A2 and Table.A3.""
1413
                // Note: This is really confusing - I believe they meant subset instead
1414
                // of partition here.
1415
                switch (subsetIdx) {
1416
                    default:
1417
                        return 0;
1418
                    case 1:
1419
                        return mode.NS == 2 ? AnchorTable2[partitionIdx] : AnchorTable3a[partitionIdx];
1420
                    case 2:
1421
                        return AnchorTable3b[partitionIdx];
1422
                }
1423
            }
1424

1425
            IndexInfo colorIndex(const Mode &mode, bool isAnchor,
1426
                                 int &indexBitOffset) const {
1427
                // ARB_texture_compression_bptc states:
1428
                // "The index value for interpolating color comes from the secondary
1429
                // index for the texel if the format has an index selection bit and its
1430
                // value is one and from the primary index otherwise.""
1431
                auto idx = Get(mode.IndexSelection());
1432
                assert(idx <= 1);
1433
                bool secondary = idx == 1;
1434
                auto numBits = secondary ? mode.IB2 : mode.IB;
1435
                auto numReadBits = numBits - (isAnchor ? 1 : 0);
1436
                auto index =
1437
                    Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits)
1438
                                  : mode.PrimaryIndex(indexBitOffset, numReadBits));
1439
                indexBitOffset += numReadBits;
1440
                return {index, numBits};
1441
            }
1442

1443
            IndexInfo alphaIndex(const Mode &mode, bool isAnchor,
1444
                                 int &indexBitOffset) const {
1445
                // ARB_texture_compression_bptc states:
1446
                // "The alpha index comes from the secondary index if the block has a
1447
                // secondary index and the block either doesn't have an index selection
1448
                // bit or that bit is zero and the primary index otherwise."
1449
                auto idx = Get(mode.IndexSelection());
1450
                assert(idx <= 1);
1451
                bool secondary = (mode.IB2 != 0) && (idx == 0);
1452
                auto numBits = secondary ? mode.IB2 : mode.IB;
1453
                auto numReadBits = numBits - (isAnchor ? 1 : 0);
1454
                auto index =
1455
                    Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits)
1456
                                  : mode.PrimaryIndex(indexBitOffset, numReadBits));
1457
                indexBitOffset += numReadBits;
1458
                return {index, numBits};
1459
            }
1460

1461
            // Assumes little-endian
1462
            uint64_t low;
1463
            uint64_t high;
1464
        };
1465

1466
    }  // namespace BC7
1467
}  // anonymous namespace
1468

1469
namespace bcn {
1470
    constexpr size_t R8Bpp{1}; //!< The amount of bytes per pixel in R8
1471
    constexpr size_t R8g8Bpp{2}; //!< The amount of bytes per pixel in R8G8
1472
    constexpr size_t R8g8b8a8Bpp{4}; //!< The amount of bytes per pixel in R8G8B8A8
1473
    constexpr size_t R16g16b16a16Bpp{8}; //!< The amount of bytes per pixel in R16G16B16
1474

1475
    void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1476
        const auto *color{reinterpret_cast<const BC_color *>(src)};
1477
        size_t pitch{R8g8b8a8Bpp * width};
1478
        color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, true, false);
1479
    }
1480

1481
    void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1482
        const auto *alpha{reinterpret_cast<const BC_alpha *>(src)};
1483
        const auto *color{reinterpret_cast<const BC_color *>(src + 8)};
1484
        size_t pitch{R8g8b8a8Bpp * width};
1485
        color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true);
1486
        alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp);
1487
    }
1488

1489
    void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1490
        const auto *alpha{reinterpret_cast<const BC_channel *>(src)};
1491
        const auto *color{reinterpret_cast<const BC_color *>(src + 8)};
1492
        size_t pitch{R8g8b8a8Bpp * width};
1493
        color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, false, true);
1494
        alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, 3, false);
1495
    }
1496

1497
    void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
1498
        const auto *red{reinterpret_cast<const BC_channel *>(src)};
1499
        size_t pitch{R8Bpp * width};
1500
        red->decode(dst, x, y, width, height, pitch, R8Bpp, 0, isSigned);
1501
    }
1502

1503
    void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
1504
        const auto *red{reinterpret_cast<const BC_channel *>(src)};
1505
        const auto *green{reinterpret_cast<const BC_channel *>(src + 8)};
1506
        size_t pitch{R8g8Bpp * width};
1507
        red->decode(dst, x, y, width, height, pitch, R8g8Bpp, 0, isSigned);
1508
        green->decode(dst, x, y, width, height, pitch, R8g8Bpp, 1, isSigned);
1509
    }
1510

1511
    void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) {
1512
        const auto *block{reinterpret_cast<const BC6H::Block *>(src)};
1513
        size_t pitch{R16g16b16a16Bpp * width};
1514
        block->decode(dst, x, y, width, height, pitch, R16g16b16a16Bpp, isSigned);
1515
    }
1516

1517
    void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) {
1518
        const auto *block{reinterpret_cast<const BC7::Block *>(src)};
1519
        size_t pitch{R8g8b8a8Bpp * width};
1520
        block->decode(dst, x, y, width, height, pitch);
1521
    }
1522
}
1523

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.