Pillow
908 строк · 27.6 Кб
1/*
2* The Python Imaging Library
3*
4* decoder for DXTn-compressed data
5*
6* Format documentation:
7* https://web.archive.org/web/20170802060935/http://oss.sgi.com/projects/ogl-sample/registry/EXT/texture_compression_s3tc.txt
8*
9* The contents of this file are in the public domain (CC0)
10* Full text of the CC0 license:
11* https://creativecommons.org/publicdomain/zero/1.0/
12*/
13
14#include "Imaging.h"
15
16#include "Bcn.h"
17
18typedef struct {
19UINT8 r, g, b, a;
20} rgba;
21
22typedef struct {
23UINT8 l;
24} lum;
25
26typedef struct {
27UINT16 c0, c1;
28UINT32 lut;
29} bc1_color;
30
31typedef struct {
32UINT8 a0, a1;
33UINT8 lut[6];
34} bc3_alpha;
35
36typedef struct {
37INT8 a0, a1;
38UINT8 lut[6];
39} bc5s_alpha;
40
41#define LOAD16(p) (p)[0] | ((p)[1] << 8)
42
43#define LOAD32(p) (p)[0] | ((p)[1] << 8) | ((p)[2] << 16) | ((p)[3] << 24)
44
45static void
46bc1_color_load(bc1_color *dst, const UINT8 *src) {
47dst->c0 = LOAD16(src);
48dst->c1 = LOAD16(src + 2);
49dst->lut = LOAD32(src + 4);
50}
51
52static rgba
53decode_565(UINT16 x) {
54rgba c;
55int r, g, b;
56r = (x & 0xf800) >> 8;
57r |= r >> 5;
58c.r = r;
59g = (x & 0x7e0) >> 3;
60g |= g >> 6;
61c.g = g;
62b = (x & 0x1f) << 3;
63b |= b >> 5;
64c.b = b;
65c.a = 0xff;
66return c;
67}
68
69static void
70decode_bc1_color(rgba *dst, const UINT8 *src, int separate_alpha) {
71bc1_color col;
72rgba p[4];
73int n, cw;
74UINT16 r0, g0, b0, r1, g1, b1;
75bc1_color_load(&col, src);
76
77p[0] = decode_565(col.c0);
78r0 = p[0].r;
79g0 = p[0].g;
80b0 = p[0].b;
81p[1] = decode_565(col.c1);
82r1 = p[1].r;
83g1 = p[1].g;
84b1 = p[1].b;
85
86/* NOTE: BC2 and BC3 reuse BC1 color blocks but always act like c0 > c1 */
87if (col.c0 > col.c1 || separate_alpha) {
88p[2].r = (2 * r0 + 1 * r1) / 3;
89p[2].g = (2 * g0 + 1 * g1) / 3;
90p[2].b = (2 * b0 + 1 * b1) / 3;
91p[2].a = 0xff;
92p[3].r = (1 * r0 + 2 * r1) / 3;
93p[3].g = (1 * g0 + 2 * g1) / 3;
94p[3].b = (1 * b0 + 2 * b1) / 3;
95p[3].a = 0xff;
96} else {
97p[2].r = (r0 + r1) / 2;
98p[2].g = (g0 + g1) / 2;
99p[2].b = (b0 + b1) / 2;
100p[2].a = 0xff;
101p[3].r = 0;
102p[3].g = 0;
103p[3].b = 0;
104p[3].a = 0;
105}
106for (n = 0; n < 16; n++) {
107cw = 3 & (col.lut >> (2 * n));
108dst[n] = p[cw];
109}
110}
111
112static void
113decode_bc3_alpha(char *dst, const UINT8 *src, int stride, int o, int sign) {
114UINT16 a0, a1;
115UINT8 a[8];
116int n, lut1, lut2, aw;
117if (sign == 1) {
118bc5s_alpha b;
119memcpy(&b, src, sizeof(bc5s_alpha));
120a0 = b.a0 + 128;
121a1 = b.a1 + 128;
122lut1 = b.lut[0] | (b.lut[1] << 8) | (b.lut[2] << 16);
123lut2 = b.lut[3] | (b.lut[4] << 8) | (b.lut[5] << 16);
124} else {
125bc3_alpha b;
126memcpy(&b, src, sizeof(bc3_alpha));
127a0 = b.a0;
128a1 = b.a1;
129lut1 = b.lut[0] | (b.lut[1] << 8) | (b.lut[2] << 16);
130lut2 = b.lut[3] | (b.lut[4] << 8) | (b.lut[5] << 16);
131}
132
133a[0] = (UINT8)a0;
134a[1] = (UINT8)a1;
135if (a0 > a1) {
136a[2] = (6 * a0 + 1 * a1) / 7;
137a[3] = (5 * a0 + 2 * a1) / 7;
138a[4] = (4 * a0 + 3 * a1) / 7;
139a[5] = (3 * a0 + 4 * a1) / 7;
140a[6] = (2 * a0 + 5 * a1) / 7;
141a[7] = (1 * a0 + 6 * a1) / 7;
142} else {
143a[2] = (4 * a0 + 1 * a1) / 5;
144a[3] = (3 * a0 + 2 * a1) / 5;
145a[4] = (2 * a0 + 3 * a1) / 5;
146a[5] = (1 * a0 + 4 * a1) / 5;
147a[6] = 0;
148a[7] = 0xff;
149}
150for (n = 0; n < 8; n++) {
151aw = 7 & (lut1 >> (3 * n));
152dst[stride * n + o] = a[aw];
153}
154for (n = 0; n < 8; n++) {
155aw = 7 & (lut2 >> (3 * n));
156dst[stride * (8 + n) + o] = a[aw];
157}
158}
159
160static void
161decode_bc1_block(rgba *col, const UINT8 *src) {
162decode_bc1_color(col, src, 0);
163}
164
165static void
166decode_bc2_block(rgba *col, const UINT8 *src) {
167int n, bitI, byI, av;
168decode_bc1_color(col, src + 8, 1);
169for (n = 0; n < 16; n++) {
170bitI = n * 4;
171byI = bitI >> 3;
172av = 0xf & (src[byI] >> (bitI & 7));
173av = (av << 4) | av;
174col[n].a = av;
175}
176}
177
178static void
179decode_bc3_block(rgba *col, const UINT8 *src) {
180decode_bc1_color(col, src + 8, 1);
181decode_bc3_alpha((char *)col, src, sizeof(col[0]), 3, 0);
182}
183
184static void
185decode_bc4_block(lum *col, const UINT8 *src) {
186decode_bc3_alpha((char *)col, src, sizeof(col[0]), 0, 0);
187}
188
189static void
190decode_bc5_block(rgba *col, const UINT8 *src, int sign) {
191decode_bc3_alpha((char *)col, src, sizeof(col[0]), 0, sign);
192decode_bc3_alpha((char *)col, src + 8, sizeof(col[0]), 1, sign);
193}
194
195/* BC6 and BC7 are described here:
196https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_bptc.txt
197*/
198
199static UINT8
200get_bit(const UINT8 *src, int bit) {
201int by = bit >> 3;
202bit &= 7;
203return (src[by] >> bit) & 1;
204}
205
206static UINT8
207get_bits(const UINT8 *src, int bit, int count) {
208UINT8 v;
209int x;
210int by = bit >> 3;
211bit &= 7;
212if (!count) {
213return 0;
214}
215if (bit + count <= 8) {
216v = (src[by] >> bit) & ((1 << count) - 1);
217} else {
218x = src[by] | (src[by + 1] << 8);
219v = (x >> bit) & ((1 << count) - 1);
220}
221return v;
222}
223
224/* BC7 */
225typedef struct {
226char ns;
227char pb;
228char rb;
229char isb;
230char cb;
231char ab;
232char epb;
233char spb;
234char ib;
235char ib2;
236} bc7_mode_info;
237
238static const bc7_mode_info bc7_modes[] = {
239{3, 4, 0, 0, 4, 0, 1, 0, 3, 0},
240{2, 6, 0, 0, 6, 0, 0, 1, 3, 0},
241{3, 6, 0, 0, 5, 0, 0, 0, 2, 0},
242{2, 6, 0, 0, 7, 0, 1, 0, 2, 0},
243{1, 0, 2, 1, 5, 6, 0, 0, 2, 3},
244{1, 0, 2, 0, 7, 8, 0, 0, 2, 2},
245{1, 0, 0, 0, 7, 7, 1, 0, 4, 0},
246{2, 6, 0, 0, 5, 5, 1, 0, 2, 0}
247};
248
249/* Subset indices:
250Table.P2, 1 bit per index */
251static const UINT16 bc7_si2[] = {
2520xcccc, 0x8888, 0xeeee, 0xecc8, 0xc880, 0xfeec, 0xfec8, 0xec80, 0xc800, 0xffec,
2530xfe80, 0xe800, 0xffe8, 0xff00, 0xfff0, 0xf000, 0xf710, 0x008e, 0x7100, 0x08ce,
2540x008c, 0x7310, 0x3100, 0x8cce, 0x088c, 0x3110, 0x6666, 0x366c, 0x17e8, 0x0ff0,
2550x718e, 0x399c, 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, 0x3c3c, 0x55aa, 0x9696, 0xa55a,
2560x73ce, 0x13c8, 0x324c, 0x3bdc, 0x6996, 0xc33c, 0x9966, 0x0660, 0x0272, 0x04e4,
2570x4e40, 0x2720, 0xc936, 0x936c, 0x39c6, 0x639c, 0x9336, 0x9cc6, 0x817e, 0xe718,
2580xccf0, 0x0fcc, 0x7744, 0xee22
259};
260
261/* Table.P3, 2 bits per index */
262static const UINT32 bc7_si3[] = {
2630xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, 0xa5a50000, 0xa0a05050, 0x5555a0a0,
2640x5a5a5050, 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, 0x94949494, 0xa4a4a4a4,
2650xa9a59450, 0x2a0a4250, 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, 0xa8a85454,
2660x6a6a4040, 0xa4a45000, 0x1a1a0500, 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,
2670xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, 0xa9a58000, 0x5090a0a8, 0xa8a09050,
2680x24242424, 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, 0x500aa550, 0xaaaa4444,
2690x66660000, 0xa5a0a5a0, 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, 0xaa444444,
2700x54a854a8, 0x95809580, 0x96969600, 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000,
2710xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, 0x40804080, 0xa9a8a9a8, 0xaaaaaa44,
2720x2a4a5254
273};
274
275/* Anchor indices:
276Table.A2 */
277static const char bc7_ai0[] = {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
27815, 15, 15, 15, 2, 8, 2, 2, 8, 8, 15, 2, 8,
2792, 2, 8, 8, 2, 2, 15, 15, 6, 8, 2, 8, 15,
28015, 2, 8, 2, 2, 2, 15, 15, 6, 6, 2, 6, 8,
28115, 15, 2, 2, 15, 15, 15, 15, 15, 2, 2, 15};
282
283/* Table.A3a */
284static const char bc7_ai1[] = {3, 3, 15, 15, 8, 3, 15, 15, 8, 8, 6, 6, 6,
2855, 3, 3, 3, 3, 8, 15, 3, 3, 6, 10, 5, 8,
2868, 6, 8, 5, 15, 15, 8, 15, 3, 5, 6, 10, 8,
28715, 15, 3, 15, 5, 15, 15, 15, 15, 3, 15, 5, 5,
2885, 8, 5, 10, 5, 10, 8, 13, 15, 12, 3, 3};
289
290/* Table.A3b */
291static const char bc7_ai2[] = {15, 8, 8, 3, 15, 15, 3, 8, 15, 15, 15, 15, 15,
29215, 15, 8, 15, 8, 15, 3, 15, 8, 15, 8, 3, 15,
2936, 10, 15, 15, 10, 8, 15, 3, 15, 10, 10, 8, 9,
29410, 6, 15, 8, 15, 3, 6, 6, 8, 15, 3, 15, 15,
29515, 15, 15, 15, 15, 15, 15, 15, 3, 15, 15, 8};
296
297/* Interpolation weights */
298static const char bc7_weights2[] = {0, 21, 43, 64};
299static const char bc7_weights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
300static const char bc7_weights4[] = {
3010, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64
302};
303
304static const char *
305bc7_get_weights(int n) {
306if (n == 2) {
307return bc7_weights2;
308}
309if (n == 3) {
310return bc7_weights3;
311}
312return bc7_weights4;
313}
314
315static int
316bc7_get_subset(int ns, int partition, int n) {
317if (ns == 2) {
318return 1 & (bc7_si2[partition] >> n);
319}
320if (ns == 3) {
321return 3 & (bc7_si3[partition] >> (2 * n));
322}
323return 0;
324}
325
326static UINT8
327expand_quantized(UINT8 v, int bits) {
328v = v << (8 - bits);
329return v | (v >> bits);
330}
331
332static void
333bc7_lerp(rgba *dst, const rgba *e, int s0, int s1) {
334int t0 = 64 - s0;
335int t1 = 64 - s1;
336dst->r = (UINT8)((t0 * e[0].r + s0 * e[1].r + 32) >> 6);
337dst->g = (UINT8)((t0 * e[0].g + s0 * e[1].g + 32) >> 6);
338dst->b = (UINT8)((t0 * e[0].b + s0 * e[1].b + 32) >> 6);
339dst->a = (UINT8)((t1 * e[0].a + s1 * e[1].a + 32) >> 6);
340}
341
342static void
343decode_bc7_block(rgba *col, const UINT8 *src) {
344rgba endpoints[6];
345int bit = 0, cibit, aibit;
346int mode = src[0];
347int i, j;
348int numep, cb, ab, ib, ib2, i0, i1, s;
349UINT8 index_sel, partition, rotation, val;
350const char *cw, *aw;
351const bc7_mode_info *info;
352
353/* mode is the number of unset bits before the first set bit: */
354if (!mode) {
355/* degenerate case when no bits set */
356for (i = 0; i < 16; i++) {
357col[i].r = col[i].g = col[i].b = 0;
358col[i].a = 255;
359}
360return;
361}
362while (!(mode & (1 << bit++)));
363mode = bit - 1;
364info = &bc7_modes[mode];
365/* color selection bits: {subset}{endpoint} */
366cb = info->cb;
367ab = info->ab;
368cw = bc7_get_weights(info->ib);
369aw = bc7_get_weights((ab && info->ib2) ? info->ib2 : info->ib);
370
371#define LOAD(DST, N) \
372DST = get_bits(src, bit, N); \
373bit += N;
374LOAD(partition, info->pb);
375LOAD(rotation, info->rb);
376LOAD(index_sel, info->isb);
377numep = info->ns << 1;
378
379/* red */
380for (i = 0; i < numep; i++) {
381LOAD(val, cb);
382endpoints[i].r = val;
383}
384
385/* green */
386for (i = 0; i < numep; i++) {
387LOAD(val, cb);
388endpoints[i].g = val;
389}
390
391/* blue */
392for (i = 0; i < numep; i++) {
393LOAD(val, cb);
394endpoints[i].b = val;
395}
396
397/* alpha */
398for (i = 0; i < numep; i++) {
399if (ab) {
400LOAD(val, ab);
401} else {
402val = 255;
403}
404endpoints[i].a = val;
405}
406
407/* p-bits */
408#define ASSIGN_P(x) x = (x << 1) | val
409if (info->epb) {
410/* per endpoint */
411cb++;
412if (ab) {
413ab++;
414}
415for (i = 0; i < numep; i++) {
416LOAD(val, 1);
417ASSIGN_P(endpoints[i].r);
418ASSIGN_P(endpoints[i].g);
419ASSIGN_P(endpoints[i].b);
420if (ab) {
421ASSIGN_P(endpoints[i].a);
422}
423}
424}
425if (info->spb) {
426/* per subset */
427cb++;
428if (ab) {
429ab++;
430}
431for (i = 0; i < numep; i += 2) {
432LOAD(val, 1);
433for (j = 0; j < 2; j++) {
434ASSIGN_P(endpoints[i + j].r);
435ASSIGN_P(endpoints[i + j].g);
436ASSIGN_P(endpoints[i + j].b);
437if (ab) {
438ASSIGN_P(endpoints[i + j].a);
439}
440}
441}
442}
443#undef ASSIGN_P
444#define EXPAND(x, b) x = expand_quantized(x, b)
445for (i = 0; i < numep; i++) {
446EXPAND(endpoints[i].r, cb);
447EXPAND(endpoints[i].g, cb);
448EXPAND(endpoints[i].b, cb);
449if (ab) {
450EXPAND(endpoints[i].a, ab);
451}
452}
453#undef EXPAND
454#undef LOAD
455cibit = bit;
456aibit = cibit + 16 * info->ib - info->ns;
457for (i = 0; i < 16; i++) {
458s = bc7_get_subset(info->ns, partition, i) << 1;
459ib = info->ib;
460if (i == 0) {
461ib--;
462} else if (info->ns == 2) {
463if (i == bc7_ai0[partition]) {
464ib--;
465}
466} else if (info->ns == 3) {
467if (i == bc7_ai1[partition]) {
468ib--;
469} else if (i == bc7_ai2[partition]) {
470ib--;
471}
472}
473i0 = get_bits(src, cibit, ib);
474cibit += ib;
475
476if (ab && info->ib2) {
477ib2 = info->ib2;
478if (ib2 && i == 0) {
479ib2--;
480}
481i1 = get_bits(src, aibit, ib2);
482aibit += ib2;
483if (index_sel) {
484bc7_lerp(&col[i], &endpoints[s], aw[i1], cw[i0]);
485} else {
486bc7_lerp(&col[i], &endpoints[s], cw[i0], aw[i1]);
487}
488} else {
489bc7_lerp(&col[i], &endpoints[s], cw[i0], cw[i0]);
490}
491#define ROTATE(x, y) \
492val = x; \
493x = y; \
494y = val
495if (rotation == 1) {
496ROTATE(col[i].r, col[i].a);
497} else if (rotation == 2) {
498ROTATE(col[i].g, col[i].a);
499} else if (rotation == 3) {
500ROTATE(col[i].b, col[i].a);
501}
502#undef ROTATE
503}
504}
505
506/* BC6 */
507typedef struct {
508char ns; /* number of subsets (also called regions) */
509char tr; /* whether endpoints are delta-compressed */
510char pb; /* partition bits */
511char epb; /* endpoint bits */
512char rb; /* red bits (delta) */
513char gb; /* green bits (delta) */
514char bb; /* blue bits (delta) */
515} bc6_mode_info;
516
517static const bc6_mode_info bc6_modes[] = {
518// 00
519{2, 1, 5, 10, 5, 5, 5},
520// 01
521{2, 1, 5, 7, 6, 6, 6},
522// 10
523{2, 1, 5, 11, 5, 4, 4},
524{2, 1, 5, 11, 4, 5, 4},
525{2, 1, 5, 11, 4, 4, 5},
526{2, 1, 5, 9, 5, 5, 5},
527{2, 1, 5, 8, 6, 5, 5},
528{2, 1, 5, 8, 5, 6, 5},
529{2, 1, 5, 8, 5, 5, 6},
530{2, 0, 5, 6, 6, 6, 6},
531// 11
532{1, 0, 0, 10, 10, 10, 10},
533{1, 1, 0, 11, 9, 9, 9},
534{1, 1, 0, 12, 8, 8, 8},
535{1, 1, 0, 16, 4, 4, 4}
536};
537
538/* Table.F, encoded as a sequence of bit indices */
539static const UINT8 bc6_bit_packings[][75] = {
540{116, 132, 180, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17,
54118, 19, 20, 21, 22, 23, 24, 25, 32, 33, 34, 35, 36, 37, 38,
54239, 40, 41, 48, 49, 50, 51, 52, 164, 112, 113, 114, 115, 64, 65,
54366, 67, 68, 176, 160, 161, 162, 163, 80, 81, 82, 83, 84, 177, 128,
544129, 130, 131, 96, 97, 98, 99, 100, 178, 144, 145, 146, 147, 148, 179},
545{117, 164, 165, 0, 1, 2, 3, 4, 5, 6, 176, 177, 132, 16, 17,
54618, 19, 20, 21, 22, 133, 178, 116, 32, 33, 34, 35, 36, 37, 38,
547179, 181, 180, 48, 49, 50, 51, 52, 53, 112, 113, 114, 115, 64, 65,
54866, 67, 68, 69, 160, 161, 162, 163, 80, 81, 82, 83, 84, 85, 128,
549129, 130, 131, 96, 97, 98, 99, 100, 101, 144, 145, 146, 147, 148, 149},
550{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20,
55121, 22, 23, 24, 25, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
55248, 49, 50, 51, 52, 10, 112, 113, 114, 115, 64, 65, 66, 67, 26,
553176, 160, 161, 162, 163, 80, 81, 82, 83, 42, 177, 128, 129, 130, 131,
55496, 97, 98, 99, 100, 178, 144, 145, 146, 147, 148, 179},
555{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20,
55621, 22, 23, 24, 25, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
55748, 49, 50, 51, 10, 164, 112, 113, 114, 115, 64, 65, 66, 67, 68,
55826, 160, 161, 162, 163, 80, 81, 82, 83, 42, 177, 128, 129, 130, 131,
55996, 97, 98, 99, 176, 178, 144, 145, 146, 147, 116, 179},
560{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20,
56121, 22, 23, 24, 25, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
56248, 49, 50, 51, 10, 132, 112, 113, 114, 115, 64, 65, 66, 67, 26,
563176, 160, 161, 162, 163, 80, 81, 82, 83, 84, 42, 128, 129, 130, 131,
56496, 97, 98, 99, 177, 178, 144, 145, 146, 147, 180, 179},
565{0, 1, 2, 3, 4, 5, 6, 7, 8, 132, 16, 17, 18, 19, 20,
56621, 22, 23, 24, 116, 32, 33, 34, 35, 36, 37, 38, 39, 40, 180,
56748, 49, 50, 51, 52, 164, 112, 113, 114, 115, 64, 65, 66, 67, 68,
568176, 160, 161, 162, 163, 80, 81, 82, 83, 84, 177, 128, 129, 130, 131,
56996, 97, 98, 99, 100, 178, 144, 145, 146, 147, 148, 179},
570{0, 1, 2, 3, 4, 5, 6, 7, 164, 132, 16, 17, 18, 19, 20,
57121, 22, 23, 178, 116, 32, 33, 34, 35, 36, 37, 38, 39, 179, 180,
57248, 49, 50, 51, 52, 53, 112, 113, 114, 115, 64, 65, 66, 67, 68,
573176, 160, 161, 162, 163, 80, 81, 82, 83, 84, 177, 128, 129, 130, 131,
57496, 97, 98, 99, 100, 101, 144, 145, 146, 147, 148, 149},
575{0, 1, 2, 3, 4, 5, 6, 7, 176, 132, 16, 17, 18, 19, 20,
57621, 22, 23, 117, 116, 32, 33, 34, 35, 36, 37, 38, 39, 165, 180,
57748, 49, 50, 51, 52, 164, 112, 113, 114, 115, 64, 65, 66, 67, 68,
57869, 160, 161, 162, 163, 80, 81, 82, 83, 84, 177, 128, 129, 130, 131,
57996, 97, 98, 99, 100, 178, 144, 145, 146, 147, 148, 179},
580{0, 1, 2, 3, 4, 5, 6, 7, 177, 132, 16, 17, 18, 19, 20,
58121, 22, 23, 133, 116, 32, 33, 34, 35, 36, 37, 38, 39, 181, 180,
58248, 49, 50, 51, 52, 164, 112, 113, 114, 115, 64, 65, 66, 67, 68,
583176, 160, 161, 162, 163, 80, 81, 82, 83, 84, 85, 128, 129, 130, 131,
58496, 97, 98, 99, 100, 178, 144, 145, 146, 147, 148, 179},
585{0, 1, 2, 3, 4, 5, 164, 176, 177, 132, 16, 17, 18, 19, 20,
58621, 117, 133, 178, 116, 32, 33, 34, 35, 36, 37, 165, 179, 181, 180,
58748, 49, 50, 51, 52, 53, 112, 113, 114, 115, 64, 65, 66, 67, 68,
58869, 160, 161, 162, 163, 80, 81, 82, 83, 84, 85, 128, 129, 130, 131,
58996, 97, 98, 99, 100, 101, 144, 145, 146, 147, 148, 149},
590{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
59132, 33, 34, 35, 36, 37, 38, 39, 40, 41, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
59264, 65, 66, 67, 68, 69, 70, 71, 72, 73, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89},
593{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
59432, 33, 34, 35, 36, 37, 38, 39, 40, 41, 48, 49, 50, 51, 52, 53, 54, 55, 56, 10,
59564, 65, 66, 67, 68, 69, 70, 71, 72, 26, 80, 81, 82, 83, 84, 85, 86, 87, 88, 42},
596{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
59732, 33, 34, 35, 36, 37, 38, 39, 40, 41, 48, 49, 50, 51, 52, 53, 54, 55, 11, 10,
59864, 65, 66, 67, 68, 69, 70, 71, 27, 26, 80, 81, 82, 83, 84, 85, 86, 87, 43, 42},
599{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
60032, 33, 34, 35, 36, 37, 38, 39, 40, 41, 48, 49, 50, 51, 15, 14, 13, 12, 11, 10,
60164, 65, 66, 67, 31, 30, 29, 28, 27, 26, 80, 81, 82, 83, 47, 46, 45, 44, 43, 42}
602};
603
604static void
605bc6_sign_extend(UINT16 *v, int prec) {
606int x = *v;
607if (x & (1 << (prec - 1))) {
608x |= -1 << prec;
609}
610*v = (UINT16)x;
611}
612
613static int
614bc6_unquantize(UINT16 v, int prec, int sign) {
615int s = 0;
616int x;
617if (!sign) {
618x = v;
619if (prec >= 15) {
620return x;
621}
622if (x == 0) {
623return 0;
624}
625if (x == ((1 << prec) - 1)) {
626return 0xffff;
627}
628return ((x << 15) + 0x4000) >> (prec - 1);
629} else {
630x = (INT16)v;
631if (prec >= 16) {
632return x;
633}
634if (x < 0) {
635s = 1;
636x = -x;
637}
638
639if (x != 0) {
640if (x >= ((1 << (prec - 1)) - 1)) {
641x = 0x7fff;
642} else {
643x = ((x << 15) + 0x4000) >> (prec - 1);
644}
645}
646
647if (s) {
648return -x;
649}
650return x;
651}
652}
653
654static float
655half_to_float(UINT16 h) {
656/* https://gist.github.com/rygorous/2144712 */
657union {
658UINT32 u;
659float f;
660} o, m;
661m.u = 0x77800000;
662o.u = (h & 0x7fff) << 13;
663o.f *= m.f;
664m.u = 0x47800000;
665if (o.f >= m.f) {
666o.u |= 255 << 23;
667}
668o.u |= (h & 0x8000) << 16;
669return o.f;
670}
671
672static float
673bc6_finalize(int v, int sign) {
674if (sign) {
675if (v < 0) {
676v = ((-v) * 31) / 32;
677return half_to_float((UINT16)(0x8000 | v));
678} else {
679return half_to_float((UINT16)((v * 31) / 32));
680}
681} else {
682return half_to_float((UINT16)((v * 31) / 64));
683}
684}
685
686static UINT8
687bc6_clamp(float value) {
688if (value < 0.0f) {
689return 0;
690} else if (value > 1.0f) {
691return 255;
692} else {
693return (UINT8)(value * 255.0f);
694}
695}
696
697static void
698bc6_lerp(rgba *col, int *e0, int *e1, int s, int sign) {
699int r, g, b;
700int t = 64 - s;
701r = (e0[0] * t + e1[0] * s) >> 6;
702g = (e0[1] * t + e1[1] * s) >> 6;
703b = (e0[2] * t + e1[2] * s) >> 6;
704col->r = bc6_clamp(bc6_finalize(r, sign));
705col->g = bc6_clamp(bc6_finalize(g, sign));
706col->b = bc6_clamp(bc6_finalize(b, sign));
707}
708
709static void
710decode_bc6_block(rgba *col, const UINT8 *src, int sign) {
711UINT16 endpoints[12]; /* storage for r0, g0, b0, r1, ... */
712int ueps[12];
713int i, i0, ib2, di, dw, mask, numep, s;
714UINT8 partition;
715const bc6_mode_info *info;
716const char *cw;
717int bit = 5;
718int epbits = 75;
719int ib = 3;
720int mode = src[0] & 0x1f;
721if ((mode & 3) == 0 || (mode & 3) == 1) {
722mode &= 3;
723bit = 2;
724} else if ((mode & 3) == 2) {
725mode = 2 + (mode >> 2);
726epbits = 72;
727} else {
728mode = 10 + (mode >> 2);
729epbits = 60;
730ib = 4;
731}
732if (mode >= 14) {
733/* invalid block */
734memset(col, 0, 16 * sizeof(col[0]));
735return;
736}
737info = &bc6_modes[mode];
738cw = bc7_get_weights(ib);
739numep = info->ns == 2 ? 12 : 6;
740for (i = 0; i < 12; i++) {
741endpoints[i] = 0;
742}
743for (i = 0; i < epbits; i++) {
744di = bc6_bit_packings[mode][i];
745dw = di >> 4;
746di &= 15;
747endpoints[dw] |= (UINT16)get_bit(src, bit + i) << di;
748}
749bit += epbits;
750partition = get_bits(src, bit, info->pb);
751bit += info->pb;
752mask = (1 << info->epb) - 1;
753if (sign) { /* sign-extend e0 if signed */
754bc6_sign_extend(&endpoints[0], info->epb);
755bc6_sign_extend(&endpoints[1], info->epb);
756bc6_sign_extend(&endpoints[2], info->epb);
757}
758if (sign || info->tr) { /* sign-extend e1,2,3 if signed or deltas */
759for (i = 3; i < numep; i += 3) {
760bc6_sign_extend(&endpoints[i], info->rb);
761bc6_sign_extend(&endpoints[i + 1], info->gb);
762bc6_sign_extend(&endpoints[i + 2], info->bb);
763}
764}
765if (info->tr) { /* apply deltas */
766for (i = 3; i < numep; i += 3) {
767endpoints[i] = (endpoints[i] + endpoints[0]) & mask;
768endpoints[i + 1] = (endpoints[i + 1] + endpoints[1]) & mask;
769endpoints[i + 2] = (endpoints[i + 2] + endpoints[2]) & mask;
770}
771}
772for (i = 0; i < numep; i++) {
773ueps[i] = bc6_unquantize(endpoints[i], info->epb, sign);
774}
775for (i = 0; i < 16; i++) {
776s = bc7_get_subset(info->ns, partition, i) * 6;
777ib2 = ib;
778if (i == 0) {
779ib2--;
780} else if (info->ns == 2) {
781if (i == bc7_ai0[partition]) {
782ib2--;
783}
784}
785i0 = get_bits(src, bit, ib2);
786bit += ib2;
787
788bc6_lerp(&col[i], &ueps[s], &ueps[s + 3], cw[i0], sign);
789}
790}
791
792static void
793put_block(Imaging im, ImagingCodecState state, const char *col, int sz, int C) {
794int width = state->xsize;
795int height = state->ysize;
796int xmax = width + state->xoff;
797int ymax = height + state->yoff;
798int j, i, y, x;
799char *dst;
800for (j = 0; j < 4; j++) {
801y = state->y + j;
802if (C) {
803if (y >= height) {
804continue;
805}
806if (state->ystep < 0) {
807y = state->yoff + ymax - y - 1;
808}
809dst = im->image[y];
810for (i = 0; i < 4; i++) {
811x = state->x + i;
812if (x >= width) {
813continue;
814}
815memcpy(dst + sz * x, col + sz * (j * 4 + i), sz);
816}
817} else {
818if (state->ystep < 0) {
819y = state->yoff + ymax - y - 1;
820}
821x = state->x;
822dst = im->image[y] + sz * x;
823memcpy(dst, col + sz * (j * 4), 4 * sz);
824}
825}
826state->x += 4;
827if (state->x >= xmax) {
828state->y += 4;
829state->x = state->xoff;
830}
831}
832
833static int
834decode_bcn(
835Imaging im,
836ImagingCodecState state,
837const UINT8 *src,
838int bytes,
839int N,
840int C,
841char *pixel_format
842) {
843int ymax = state->ysize + state->yoff;
844const UINT8 *ptr = src;
845switch (N) {
846#define DECODE_LOOP(NN, SZ, TY, ...) \
847case NN: \
848while (bytes >= SZ) { \
849TY col[16]; \
850memset(col, 0, 16 * sizeof(col[0])); \
851decode_bc##NN##_block(col, ptr); \
852put_block(im, state, (const char *)col, sizeof(col[0]), C); \
853ptr += SZ; \
854bytes -= SZ; \
855if (state->y >= ymax) { \
856return -1; \
857} \
858} \
859break
860
861DECODE_LOOP(1, 8, rgba);
862DECODE_LOOP(2, 16, rgba);
863DECODE_LOOP(3, 16, rgba);
864DECODE_LOOP(4, 8, lum);
865case 5: {
866int sign = strcmp(pixel_format, "BC5S") == 0 ? 1 : 0;
867while (bytes >= 16) {
868rgba col[16];
869memset(col, sign ? 128 : 0, 16 * sizeof(col[0]));
870decode_bc5_block(col, ptr, sign);
871put_block(im, state, (const char *)col, sizeof(col[0]), C);
872ptr += 16;
873bytes -= 16;
874if (state->y >= ymax) {
875return -1;
876}
877}
878break;
879}
880case 6: {
881int sign = strcmp(pixel_format, "BC6HS") == 0 ? 1 : 0;
882while (bytes >= 16) {
883rgba col[16];
884decode_bc6_block(col, ptr, sign);
885put_block(im, state, (const char *)col, sizeof(col[0]), C);
886ptr += 16;
887bytes -= 16;
888if (state->y >= ymax) {
889return -1;
890}
891}
892break;
893}
894DECODE_LOOP(7, 16, rgba);
895#undef DECODE_LOOP
896}
897return (int)(ptr - src);
898}
899
900int
901ImagingBcnDecode(Imaging im, ImagingCodecState state, UINT8 *buf, Py_ssize_t bytes) {
902int N = state->state & 0xf;
903int width = state->xsize;
904int height = state->ysize;
905int C = (width & 3) | (height & 3) ? 1 : 0;
906char *pixel_format = ((BCNSTATE *)state->context)->pixel_format;
907return decode_bcn(im, state, buf, bytes, N, C, pixel_format);
908}
909