opencv
1/* trees.c -- output deflated data using Huffman coding
2* Copyright (C) 1995-2021 Jean-loup Gailly
3* detect_data_type() function provided freely by Cosmin Truta, 2006
4* For conditions of distribution and use, see copyright notice in zlib.h
5*/
6
7/*
8* ALGORITHM
9*
10* The "deflation" process uses several Huffman trees. The more
11* common source values are represented by shorter bit sequences.
12*
13* Each code tree is stored in a compressed form which is itself
14* a Huffman encoding of the lengths of all the code strings (in
15* ascending order by source values). The actual code strings are
16* reconstructed from the lengths in the inflate process, as described
17* in the deflate specification.
18*
19* REFERENCES
20*
21* Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
22* Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
23*
24* Storer, James A.
25* Data Compression: Methods and Theory, pp. 49-50.
26* Computer Science Press, 1988. ISBN 0-7167-8156-5.
27*
28* Sedgewick, R.
29* Algorithms, p290.
30* Addison-Wesley, 1983. ISBN 0-201-06672-6.
31*/
32
33#include "zbuild.h"34#include "deflate.h"35#include "trees.h"36#include "trees_emit.h"37#include "trees_tbl.h"38
39/* The lengths of the bit length codes are sent in order of decreasing
40* probability, to avoid transmitting the lengths for unused bit length codes.
41*/
42
43/* ===========================================================================
44* Local data. These are initialized only once.
45*/
46
47struct static_tree_desc_s {48const ct_data *static_tree; /* static tree or NULL */49const int *extra_bits; /* extra bits for each code or NULL */50int extra_base; /* base index for extra_bits */51int elems; /* max number of elements in the tree */52unsigned int max_length; /* max bit length for the codes */53};54
55static const static_tree_desc static_l_desc =56{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};57
58static const static_tree_desc static_d_desc =59{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};60
61static const static_tree_desc static_bl_desc =62{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};63
64/* ===========================================================================
65* Local (static) routines in this file.
66*/
67
68static void init_block (deflate_state *s);69static void pqdownheap (deflate_state *s, ct_data *tree, int k);70static void gen_bitlen (deflate_state *s, tree_desc *desc);71static void build_tree (deflate_state *s, tree_desc *desc);72static void scan_tree (deflate_state *s, ct_data *tree, int max_code);73static void send_tree (deflate_state *s, ct_data *tree, int max_code);74static int build_bl_tree (deflate_state *s);75static void send_all_trees (deflate_state *s, int lcodes, int dcodes, int blcodes);76static void compress_block (deflate_state *s, const ct_data *ltree, const ct_data *dtree);77static int detect_data_type (deflate_state *s);78static void bi_flush (deflate_state *s);79
80/* ===========================================================================
81* Initialize the tree data structures for a new zlib stream.
82*/
83void Z_INTERNAL zng_tr_init(deflate_state *s) {84s->l_desc.dyn_tree = s->dyn_ltree;85s->l_desc.stat_desc = &static_l_desc;86
87s->d_desc.dyn_tree = s->dyn_dtree;88s->d_desc.stat_desc = &static_d_desc;89
90s->bl_desc.dyn_tree = s->bl_tree;91s->bl_desc.stat_desc = &static_bl_desc;92
93s->bi_buf = 0;94s->bi_valid = 0;95#ifdef ZLIB_DEBUG96s->compressed_len = 0L;97s->bits_sent = 0L;98#endif99
100/* Initialize the first block of the first file: */101init_block(s);102}
103
104/* ===========================================================================
105* Initialize a new block.
106*/
107static void init_block(deflate_state *s) {108int n; /* iterates over tree elements */109
110/* Initialize the trees. */111for (n = 0; n < L_CODES; n++)112s->dyn_ltree[n].Freq = 0;113for (n = 0; n < D_CODES; n++)114s->dyn_dtree[n].Freq = 0;115for (n = 0; n < BL_CODES; n++)116s->bl_tree[n].Freq = 0;117
118s->dyn_ltree[END_BLOCK].Freq = 1;119s->opt_len = s->static_len = 0L;120s->sym_next = s->matches = 0;121}
122
123#define SMALLEST 1124/* Index within the heap array of least frequent node in the Huffman tree */
125
126
127/* ===========================================================================
128* Remove the smallest element from the heap and recreate the heap with
129* one less element. Updates heap and heap_len.
130*/
131#define pqremove(s, tree, top) \132{\133top = s->heap[SMALLEST]; \134s->heap[SMALLEST] = s->heap[s->heap_len--]; \135pqdownheap(s, tree, SMALLEST); \136}
137
138/* ===========================================================================
139* Compares to subtrees, using the tree depth as tie breaker when
140* the subtrees have equal frequency. This minimizes the worst case length.
141*/
142#define smaller(tree, n, m, depth) \143(tree[n].Freq < tree[m].Freq || \144(tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))145
146/* ===========================================================================
147* Restore the heap property by moving down the tree starting at node k,
148* exchanging a node with the smallest of its two sons if necessary, stopping
149* when the heap property is re-established (each father smaller than its
150* two sons).
151*/
152static void pqdownheap(deflate_state *s, ct_data *tree, int k) {153/* tree: the tree to restore */154/* k: node to move down */155int v = s->heap[k];156int j = k << 1; /* left son of k */157while (j <= s->heap_len) {158/* Set j to the smallest of the two sons: */159if (j < s->heap_len && smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {160j++;161}162/* Exit if v is smaller than both sons */163if (smaller(tree, v, s->heap[j], s->depth))164break;165
166/* Exchange v with the smallest son */167s->heap[k] = s->heap[j];168k = j;169
170/* And continue down the tree, setting j to the left son of k */171j <<= 1;172}173s->heap[k] = v;174}
175
176/* ===========================================================================
177* Compute the optimal bit lengths for a tree and update the total bit length
178* for the current block.
179* IN assertion: the fields freq and dad are set, heap[heap_max] and
180* above are the tree nodes sorted by increasing frequency.
181* OUT assertions: the field len is set to the optimal bit length, the
182* array bl_count contains the frequencies for each bit length.
183* The length opt_len is updated; static_len is also updated if stree is
184* not null.
185*/
186static void gen_bitlen(deflate_state *s, tree_desc *desc) {187/* desc: the tree descriptor */188ct_data *tree = desc->dyn_tree;189int max_code = desc->max_code;190const ct_data *stree = desc->stat_desc->static_tree;191const int *extra = desc->stat_desc->extra_bits;192int base = desc->stat_desc->extra_base;193unsigned int max_length = desc->stat_desc->max_length;194int h; /* heap index */195int n, m; /* iterate over the tree elements */196unsigned int bits; /* bit length */197int xbits; /* extra bits */198uint16_t f; /* frequency */199int overflow = 0; /* number of elements with bit length too large */200
201for (bits = 0; bits <= MAX_BITS; bits++)202s->bl_count[bits] = 0;203
204/* In a first pass, compute the optimal bit lengths (which may205* overflow in the case of the bit length tree).
206*/
207tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */208
209for (h = s->heap_max + 1; h < HEAP_SIZE; h++) {210n = s->heap[h];211bits = tree[tree[n].Dad].Len + 1u;212if (bits > max_length){213bits = max_length;214overflow++;215}216tree[n].Len = (uint16_t)bits;217/* We overwrite tree[n].Dad which is no longer needed */218
219if (n > max_code) /* not a leaf node */220continue;221
222s->bl_count[bits]++;223xbits = 0;224if (n >= base)225xbits = extra[n-base];226f = tree[n].Freq;227s->opt_len += (unsigned long)f * (unsigned int)(bits + xbits);228if (stree)229s->static_len += (unsigned long)f * (unsigned int)(stree[n].Len + xbits);230}231if (overflow == 0)232return;233
234Tracev((stderr, "\nbit length overflow\n"));235/* This happens for example on obj2 and pic of the Calgary corpus */236
237/* Find the first bit length which could increase: */238do {239bits = max_length - 1;240while (s->bl_count[bits] == 0)241bits--;242s->bl_count[bits]--; /* move one leaf down the tree */243s->bl_count[bits+1] += 2u; /* move one overflow item as its brother */244s->bl_count[max_length]--;245/* The brother of the overflow item also moves one step up,246* but this does not affect bl_count[max_length]
247*/
248overflow -= 2;249} while (overflow > 0);250
251/* Now recompute all bit lengths, scanning in increasing frequency.252* h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
253* lengths instead of fixing only the wrong ones. This idea is taken
254* from 'ar' written by Haruhiko Okumura.)
255*/
256for (bits = max_length; bits != 0; bits--) {257n = s->bl_count[bits];258while (n != 0) {259m = s->heap[--h];260if (m > max_code)261continue;262if (tree[m].Len != bits) {263Tracev((stderr, "code %d bits %d->%u\n", m, tree[m].Len, bits));264s->opt_len += (unsigned long)(bits * tree[m].Freq);265s->opt_len -= (unsigned long)(tree[m].Len * tree[m].Freq);266tree[m].Len = (uint16_t)bits;267}268n--;269}270}271}
272
273/* ===========================================================================
274* Generate the codes for a given tree and bit counts (which need not be
275* optimal).
276* IN assertion: the array bl_count contains the bit length statistics for
277* the given tree and the field len is set for all tree elements.
278* OUT assertion: the field code is set for all tree elements of non
279* zero code length.
280*/
281Z_INTERNAL void gen_codes(ct_data *tree, int max_code, uint16_t *bl_count) {282/* tree: the tree to decorate */283/* max_code: largest code with non zero frequency */284/* bl_count: number of codes at each bit length */285uint16_t next_code[MAX_BITS+1]; /* next code value for each bit length */286unsigned int code = 0; /* running code value */287int bits; /* bit index */288int n; /* code index */289
290/* The distribution counts are first used to generate the code values291* without bit reversal.
292*/
293for (bits = 1; bits <= MAX_BITS; bits++) {294code = (code + bl_count[bits-1]) << 1;295next_code[bits] = (uint16_t)code;296}297/* Check that the bit counts in bl_count are consistent. The last code298* must be all ones.
299*/
300Assert(code + bl_count[MAX_BITS]-1 == (1 << MAX_BITS)-1, "inconsistent bit counts");301Tracev((stderr, "\ngen_codes: max_code %d ", max_code));302
303for (n = 0; n <= max_code; n++) {304int len = tree[n].Len;305if (len == 0)306continue;307/* Now reverse the bits */308tree[n].Code = PREFIX(bi_reverse)(next_code[len]++, len);309
310Tracecv(tree != static_ltree, (stderr, "\nn %3d %c l %2d c %4x (%x) ",311n, (isgraph(n & 0xff) ? n : ' '), len, tree[n].Code, next_code[len]-1));312}313}
314
315/* ===========================================================================
316* Construct one Huffman tree and assigns the code bit strings and lengths.
317* Update the total bit length for the current block.
318* IN assertion: the field freq is set for all tree elements.
319* OUT assertions: the fields len and code are set to the optimal bit length
320* and corresponding code. The length opt_len is updated; static_len is
321* also updated if stree is not null. The field max_code is set.
322*/
323static void build_tree(deflate_state *s, tree_desc *desc) {324/* desc: the tree descriptor */325ct_data *tree = desc->dyn_tree;326const ct_data *stree = desc->stat_desc->static_tree;327int elems = desc->stat_desc->elems;328int n, m; /* iterate over heap elements */329int max_code = -1; /* largest code with non zero frequency */330int node; /* new node being created */331
332/* Construct the initial heap, with least frequent element in333* heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
334* heap[0] is not used.
335*/
336s->heap_len = 0;337s->heap_max = HEAP_SIZE;338
339for (n = 0; n < elems; n++) {340if (tree[n].Freq != 0) {341s->heap[++(s->heap_len)] = max_code = n;342s->depth[n] = 0;343} else {344tree[n].Len = 0;345}346}347
348/* The pkzip format requires that at least one distance code exists,349* and that at least one bit should be sent even if there is only one
350* possible code. So to avoid special checks later on we force at least
351* two codes of non zero frequency.
352*/
353while (s->heap_len < 2) {354node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);355tree[node].Freq = 1;356s->depth[node] = 0;357s->opt_len--;358if (stree)359s->static_len -= stree[node].Len;360/* node is 0 or 1 so it does not have extra bits */361}362desc->max_code = max_code;363
364/* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,365* establish sub-heaps of increasing lengths:
366*/
367for (n = s->heap_len/2; n >= 1; n--)368pqdownheap(s, tree, n);369
370/* Construct the Huffman tree by repeatedly combining the least two371* frequent nodes.
372*/
373node = elems; /* next internal node of the tree */374do {375pqremove(s, tree, n); /* n = node of least frequency */376m = s->heap[SMALLEST]; /* m = node of next least frequency */377
378s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */379s->heap[--(s->heap_max)] = m;380
381/* Create a new node father of n and m */382tree[node].Freq = tree[n].Freq + tree[m].Freq;383s->depth[node] = (unsigned char)((s->depth[n] >= s->depth[m] ?384s->depth[n] : s->depth[m]) + 1);385tree[n].Dad = tree[m].Dad = (uint16_t)node;386#ifdef DUMP_BL_TREE387if (tree == s->bl_tree) {388fprintf(stderr, "\nnode %d(%d), sons %d(%d) %d(%d)",389node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);390}391#endif392/* and insert the new node in the heap */393s->heap[SMALLEST] = node++;394pqdownheap(s, tree, SMALLEST);395} while (s->heap_len >= 2);396
397s->heap[--(s->heap_max)] = s->heap[SMALLEST];398
399/* At this point, the fields freq and dad are set. We can now400* generate the bit lengths.
401*/
402gen_bitlen(s, (tree_desc *)desc);403
404/* The field len is now set, we can generate the bit codes */405gen_codes((ct_data *)tree, max_code, s->bl_count);406}
407
408/* ===========================================================================
409* Scan a literal or distance tree to determine the frequencies of the codes
410* in the bit length tree.
411*/
412static void scan_tree(deflate_state *s, ct_data *tree, int max_code) {413/* tree: the tree to be scanned */414/* max_code: and its largest code of non zero frequency */415int n; /* iterates over all tree elements */416int prevlen = -1; /* last emitted length */417int curlen; /* length of current code */418int nextlen = tree[0].Len; /* length of next code */419uint16_t count = 0; /* repeat count of the current code */420uint16_t max_count = 7; /* max repeat count */421uint16_t min_count = 4; /* min repeat count */422
423if (nextlen == 0)424max_count = 138, min_count = 3;425
426tree[max_code+1].Len = (uint16_t)0xffff; /* guard */427
428for (n = 0; n <= max_code; n++) {429curlen = nextlen;430nextlen = tree[n+1].Len;431if (++count < max_count && curlen == nextlen) {432continue;433} else if (count < min_count) {434s->bl_tree[curlen].Freq += count;435} else if (curlen != 0) {436if (curlen != prevlen)437s->bl_tree[curlen].Freq++;438s->bl_tree[REP_3_6].Freq++;439} else if (count <= 10) {440s->bl_tree[REPZ_3_10].Freq++;441} else {442s->bl_tree[REPZ_11_138].Freq++;443}444count = 0;445prevlen = curlen;446if (nextlen == 0) {447max_count = 138, min_count = 3;448} else if (curlen == nextlen) {449max_count = 6, min_count = 3;450} else {451max_count = 7, min_count = 4;452}453}454}
455
456/* ===========================================================================
457* Send a literal or distance tree in compressed form, using the codes in
458* bl_tree.
459*/
460static void send_tree(deflate_state *s, ct_data *tree, int max_code) {461/* tree: the tree to be scanned */462/* max_code and its largest code of non zero frequency */463int n; /* iterates over all tree elements */464int prevlen = -1; /* last emitted length */465int curlen; /* length of current code */466int nextlen = tree[0].Len; /* length of next code */467int count = 0; /* repeat count of the current code */468int max_count = 7; /* max repeat count */469int min_count = 4; /* min repeat count */470
471/* tree[max_code+1].Len = -1; */ /* guard already set */472if (nextlen == 0)473max_count = 138, min_count = 3;474
475// Temp local variables476uint32_t bi_valid = s->bi_valid;477uint64_t bi_buf = s->bi_buf;478
479for (n = 0; n <= max_code; n++) {480curlen = nextlen;481nextlen = tree[n+1].Len;482if (++count < max_count && curlen == nextlen) {483continue;484} else if (count < min_count) {485do {486send_code(s, curlen, s->bl_tree, bi_buf, bi_valid);487} while (--count != 0);488
489} else if (curlen != 0) {490if (curlen != prevlen) {491send_code(s, curlen, s->bl_tree, bi_buf, bi_valid);492count--;493}494Assert(count >= 3 && count <= 6, " 3_6?");495send_code(s, REP_3_6, s->bl_tree, bi_buf, bi_valid);496send_bits(s, count-3, 2, bi_buf, bi_valid);497
498} else if (count <= 10) {499send_code(s, REPZ_3_10, s->bl_tree, bi_buf, bi_valid);500send_bits(s, count-3, 3, bi_buf, bi_valid);501
502} else {503send_code(s, REPZ_11_138, s->bl_tree, bi_buf, bi_valid);504send_bits(s, count-11, 7, bi_buf, bi_valid);505}506count = 0;507prevlen = curlen;508if (nextlen == 0) {509max_count = 138, min_count = 3;510} else if (curlen == nextlen) {511max_count = 6, min_count = 3;512} else {513max_count = 7, min_count = 4;514}515}516
517// Store back temp variables518s->bi_buf = bi_buf;519s->bi_valid = bi_valid;520}
521
522/* ===========================================================================
523* Construct the Huffman tree for the bit lengths and return the index in
524* bl_order of the last bit length code to send.
525*/
526static int build_bl_tree(deflate_state *s) {527int max_blindex; /* index of last bit length code of non zero freq */528
529/* Determine the bit length frequencies for literal and distance trees */530scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);531scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);532
533/* Build the bit length tree: */534build_tree(s, (tree_desc *)(&(s->bl_desc)));535/* opt_len now includes the length of the tree representations, except536* the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
537*/
538
539/* Determine the number of bit length codes to send. The pkzip format540* requires that at least 4 bit length codes be sent. (appnote.txt says
541* 3 but the actual value used is 4.)
542*/
543for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {544if (s->bl_tree[bl_order[max_blindex]].Len != 0)545break;546}547/* Update opt_len to include the bit length tree and counts */548s->opt_len += 3*((unsigned long)max_blindex+1) + 5+5+4;549Tracev((stderr, "\ndyn trees: dyn %lu, stat %lu", s->opt_len, s->static_len));550
551return max_blindex;552}
553
554/* ===========================================================================
555* Send the header for a block using dynamic Huffman trees: the counts, the
556* lengths of the bit length codes, the literal tree and the distance tree.
557* IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
558*/
559static void send_all_trees(deflate_state *s, int lcodes, int dcodes, int blcodes) {560int rank; /* index in bl_order */561
562Assert(lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");563Assert(lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, "too many codes");564
565// Temp local variables566uint32_t bi_valid = s->bi_valid;567uint64_t bi_buf = s->bi_buf;568
569Tracev((stderr, "\nbl counts: "));570send_bits(s, lcodes-257, 5, bi_buf, bi_valid); /* not +255 as stated in appnote.txt */571send_bits(s, dcodes-1, 5, bi_buf, bi_valid);572send_bits(s, blcodes-4, 4, bi_buf, bi_valid); /* not -3 as stated in appnote.txt */573for (rank = 0; rank < blcodes; rank++) {574Tracev((stderr, "\nbl code %2u ", bl_order[rank]));575send_bits(s, s->bl_tree[bl_order[rank]].Len, 3, bi_buf, bi_valid);576}577Tracev((stderr, "\nbl tree: sent %lu", s->bits_sent));578
579// Store back temp variables580s->bi_buf = bi_buf;581s->bi_valid = bi_valid;582
583send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */584Tracev((stderr, "\nlit tree: sent %lu", s->bits_sent));585
586send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */587Tracev((stderr, "\ndist tree: sent %lu", s->bits_sent));588}
589
590/* ===========================================================================
591* Send a stored block
592*/
593void Z_INTERNAL zng_tr_stored_block(deflate_state *s, char *buf, uint32_t stored_len, int last) {594/* buf: input block */595/* stored_len: length of input block */596/* last: one if this is the last block for a file */597zng_tr_emit_tree(s, STORED_BLOCK, last); /* send block type */598zng_tr_emit_align(s); /* align on byte boundary */599cmpr_bits_align(s);600put_short(s, (uint16_t)stored_len);601put_short(s, (uint16_t)~stored_len);602cmpr_bits_add(s, 32);603sent_bits_add(s, 32);604if (stored_len) {605memcpy(s->pending_buf + s->pending, (unsigned char *)buf, stored_len);606s->pending += stored_len;607cmpr_bits_add(s, stored_len << 3);608sent_bits_add(s, stored_len << 3);609}610}
611
612/* ===========================================================================
613* Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
614*/
615void Z_INTERNAL zng_tr_flush_bits(deflate_state *s) {616bi_flush(s);617}
618
619/* ===========================================================================
620* Send one empty static block to give enough lookahead for inflate.
621* This takes 10 bits, of which 7 may remain in the bit buffer.
622*/
623void Z_INTERNAL zng_tr_align(deflate_state *s) {624zng_tr_emit_tree(s, STATIC_TREES, 0);625zng_tr_emit_end_block(s, static_ltree, 0);626bi_flush(s);627}
628
629/* ===========================================================================
630* Determine the best encoding for the current block: dynamic trees, static
631* trees or store, and write out the encoded block.
632*/
633void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_len, int last) {634/* buf: input block, or NULL if too old */635/* stored_len: length of input block */636/* last: one if this is the last block for a file */637unsigned long opt_lenb, static_lenb; /* opt_len and static_len in bytes */638int max_blindex = 0; /* index of last bit length code of non zero freq */639
640/* Build the Huffman trees unless a stored block is forced */641if (UNLIKELY(s->sym_next == 0)) {642/* Emit an empty static tree block with no codes */643opt_lenb = static_lenb = 0;644s->static_len = 7;645} else if (s->level > 0) {646/* Check if the file is binary or text */647if (s->strm->data_type == Z_UNKNOWN)648s->strm->data_type = detect_data_type(s);649
650/* Construct the literal and distance trees */651build_tree(s, (tree_desc *)(&(s->l_desc)));652Tracev((stderr, "\nlit data: dyn %lu, stat %lu", s->opt_len, s->static_len));653
654build_tree(s, (tree_desc *)(&(s->d_desc)));655Tracev((stderr, "\ndist data: dyn %lu, stat %lu", s->opt_len, s->static_len));656/* At this point, opt_len and static_len are the total bit lengths of657* the compressed block data, excluding the tree representations.
658*/
659
660/* Build the bit length tree for the above two trees, and get the index661* in bl_order of the last bit length code to send.
662*/
663max_blindex = build_bl_tree(s);664
665/* Determine the best encoding. Compute the block lengths in bytes. */666opt_lenb = (s->opt_len+3+7) >> 3;667static_lenb = (s->static_len+3+7) >> 3;668
669Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %u lit %u ",670opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,671s->sym_next / 3));672
673if (static_lenb <= opt_lenb || s->strategy == Z_FIXED)674opt_lenb = static_lenb;675
676} else {677Assert(buf != NULL, "lost buf");678opt_lenb = static_lenb = stored_len + 5; /* force a stored block */679}680
681if (stored_len+4 <= opt_lenb && buf != NULL) {682/* 4: two words for the lengths683* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
684* Otherwise we can't have processed more than WSIZE input bytes since
685* the last block flush, because compression would have been
686* successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
687* transform a block into a stored block.
688*/
689zng_tr_stored_block(s, buf, stored_len, last);690
691} else if (static_lenb == opt_lenb) {692zng_tr_emit_tree(s, STATIC_TREES, last);693compress_block(s, (const ct_data *)static_ltree, (const ct_data *)static_dtree);694cmpr_bits_add(s, s->static_len);695} else {696zng_tr_emit_tree(s, DYN_TREES, last);697send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, max_blindex+1);698compress_block(s, (const ct_data *)s->dyn_ltree, (const ct_data *)s->dyn_dtree);699cmpr_bits_add(s, s->opt_len);700}701Assert(s->compressed_len == s->bits_sent, "bad compressed size");702/* The above check is made mod 2^32, for files larger than 512 MB703* and unsigned long implemented on 32 bits.
704*/
705init_block(s);706
707if (last) {708zng_tr_emit_align(s);709}710Tracev((stderr, "\ncomprlen %lu(%lu) ", s->compressed_len>>3, s->compressed_len-7*last));711}
712
713/* ===========================================================================
714* Send the block data compressed using the given Huffman trees
715*/
716static void compress_block(deflate_state *s, const ct_data *ltree, const ct_data *dtree) {717/* ltree: literal tree */718/* dtree: distance tree */719unsigned dist; /* distance of matched string */720int lc; /* match length or unmatched char (if dist == 0) */721unsigned sx = 0; /* running index in sym_buf */722
723if (s->sym_next != 0) {724do {725dist = s->sym_buf[sx++] & 0xff;726dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;727lc = s->sym_buf[sx++];728if (dist == 0) {729zng_emit_lit(s, ltree, lc);730} else {731zng_emit_dist(s, ltree, dtree, lc, dist);732} /* literal or match pair ? */733
734/* Check that the overlay between pending_buf and sym_buf is ok: */735Assert(s->pending < s->lit_bufsize + sx, "pending_buf overflow");736} while (sx < s->sym_next);737}738
739zng_emit_end_block(s, ltree, 0);740}
741
742/* ===========================================================================
743* Check if the data type is TEXT or BINARY, using the following algorithm:
744* - TEXT if the two conditions below are satisfied:
745* a) There are no non-portable control characters belonging to the
746* "black list" (0..6, 14..25, 28..31).
747* b) There is at least one printable character belonging to the
748* "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
749* - BINARY otherwise.
750* - The following partially-portable control characters form a
751* "gray list" that is ignored in this detection algorithm:
752* (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
753* IN assertion: the fields Freq of dyn_ltree are set.
754*/
755static int detect_data_type(deflate_state *s) {756/* black_mask is the bit mask of black-listed bytes757* set bits 0..6, 14..25, and 28..31
758* 0xf3ffc07f = binary 11110011111111111100000001111111
759*/
760unsigned long black_mask = 0xf3ffc07fUL;761int n;762
763/* Check for non-textual ("black-listed") bytes. */764for (n = 0; n <= 31; n++, black_mask >>= 1)765if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))766return Z_BINARY;767
768/* Check for textual ("white-listed") bytes. */769if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 || s->dyn_ltree[13].Freq != 0)770return Z_TEXT;771for (n = 32; n < LITERALS; n++)772if (s->dyn_ltree[n].Freq != 0)773return Z_TEXT;774
775/* There are no "black-listed" or "white-listed" bytes:776* this stream either is empty or has tolerated ("gray-listed") bytes only.
777*/
778return Z_BINARY;779}
780
781/* ===========================================================================
782* Flush the bit buffer, keeping at most 7 bits in it.
783*/
784static void bi_flush(deflate_state *s) {785if (s->bi_valid == 64) {786put_uint64(s, s->bi_buf);787s->bi_buf = 0;788s->bi_valid = 0;789} else {790if (s->bi_valid >= 32) {791put_uint32(s, (uint32_t)s->bi_buf);792s->bi_buf >>= 32;793s->bi_valid -= 32;794}795if (s->bi_valid >= 16) {796put_short(s, (uint16_t)s->bi_buf);797s->bi_buf >>= 16;798s->bi_valid -= 16;799}800if (s->bi_valid >= 8) {801put_byte(s, s->bi_buf);802s->bi_buf >>= 8;803s->bi_valid -= 8;804}805}806}
807
808/* ===========================================================================
809* Reverse the first len bits of a code using bit manipulation
810*/
811Z_INTERNAL uint16_t PREFIX(bi_reverse)(unsigned code, int len) {812/* code: the value to invert */813/* len: its bit length */814Assert(len >= 1 && len <= 15, "code length must be 1-15");815#define bitrev8(b) \816(uint8_t)((((uint8_t)(b) * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32)817return (bitrev8(code >> 8) | (uint16_t)bitrev8(code) << 8) >> (16 - len);818}
819