1
// Copyright 2019+ Klaus Post. All rights reserved.
2
// License information can be found in the LICENSE file.
3
// Based on work by Yann Collet, released under BSD License.
13
"github.com/klauspost/compress/huff0"
22
dictLitEnc *huff0.Scratch
27
recentOffsets [3]uint32
28
prevRecentOffsets [3]uint32
34
// init should be used once the block has been created.
35
// If called more than once, the effect is the same as calling reset.
36
func (b *blockEnc) init() {
39
if cap(b.literals) < 1<<10 {
40
b.literals = make([]byte, 0, 1<<10)
43
if cap(b.sequences) < defSeqs {
44
b.sequences = make([]seq, 0, defSeqs)
47
if cap(b.output) < 1<<10 {
48
b.output = make([]byte, 0, 1<<10)
51
if cap(b.literals) < maxCompressedBlockSize {
52
b.literals = make([]byte, 0, maxCompressedBlockSize)
55
if cap(b.sequences) < defSeqs {
56
b.sequences = make([]seq, 0, defSeqs)
58
if cap(b.output) < maxCompressedBlockSize {
59
b.output = make([]byte, 0, maxCompressedBlockSize)
63
if b.coders.mlEnc == nil {
64
b.coders.mlEnc = &fseEncoder{}
65
b.coders.mlPrev = &fseEncoder{}
66
b.coders.ofEnc = &fseEncoder{}
67
b.coders.ofPrev = &fseEncoder{}
68
b.coders.llEnc = &fseEncoder{}
69
b.coders.llPrev = &fseEncoder{}
71
b.litEnc = &huff0.Scratch{WantLogLess: 4}
75
// initNewEncode can be used to reset offsets and encoders to the initial state.
76
func (b *blockEnc) initNewEncode() {
77
b.recentOffsets = [3]uint32{1, 4, 8}
78
b.litEnc.Reuse = huff0.ReusePolicyNone
79
b.coders.setPrev(nil, nil, nil)
82
// reset will reset the block for a new encode, but in the same stream,
83
// meaning that state will be carried over, but the block content is reset.
84
// If a previous block is provided, the recent offsets are carried over.
85
func (b *blockEnc) reset(prev *blockEnc) {
87
b.literals = b.literals[:0]
89
b.sequences = b.sequences[:0]
90
b.output = b.output[:0]
93
b.recentOffsets = prev.prevRecentOffsets
98
// reset will reset the block for a new encode, but in the same stream,
99
// meaning that state will be carried over, but the block content is reset.
100
// If a previous block is provided, the recent offsets are carried over.
101
func (b *blockEnc) swapEncoders(prev *blockEnc) {
102
b.coders.swap(&prev.coders)
103
b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
106
// blockHeader contains the information for a block header.
107
type blockHeader uint32
109
// setLast sets the 'last' indicator on a block.
110
func (h *blockHeader) setLast(b bool) {
114
const mask = (1 << 24) - 2
119
// setSize will store the compressed size of a block.
120
func (h *blockHeader) setSize(v uint32) {
122
*h = (*h)&mask | blockHeader(v<<3)
125
// setType sets the block type.
126
func (h *blockHeader) setType(t blockType) {
127
const mask = 1 | (((1 << 24) - 1) ^ 7)
128
*h = (*h & mask) | blockHeader(t<<1)
131
// appendTo will append the block header to a slice.
132
func (h blockHeader) appendTo(b []byte) []byte {
133
return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
136
// String returns a string representation of the block.
137
func (h blockHeader) String() string {
138
return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
141
// literalsHeader contains literals header information.
142
type literalsHeader uint64
144
// setType can be used to set the type of literal block.
145
func (h *literalsHeader) setType(t literalsBlockType) {
146
const mask = math.MaxUint64 - 3
147
*h = (*h & mask) | literalsHeader(t)
150
// setSize can be used to set a single size, for uncompressed and RLE content.
151
func (h *literalsHeader) setSize(regenLen int) {
152
inBits := bits.Len32(uint32(regenLen))
153
// Only retain 2 bits
155
lh := uint64(*h & mask)
158
lh |= (uint64(regenLen) << 3) | (1 << 60)
160
got := int(lh>>3) & 0xff
162
panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
166
lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60)
168
lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60)
170
panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
172
*h = literalsHeader(lh)
175
// setSizes will set the size of a compressed literals section and the input length.
176
func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
177
compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
178
// Only retain 2 bits
180
lh := uint64(*h & mask)
182
case compBits <= 10 && inBits <= 10:
186
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
188
const mmask = (1 << 24) - 1
189
n := (lh >> 4) & mmask
190
if int(n&1023) != inLen {
191
panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
193
if int(n>>10) != compLen {
194
panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
197
case compBits <= 14 && inBits <= 14:
198
lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
200
panic("single stream used with more than 10 bits length.")
202
case compBits <= 18 && inBits <= 18:
203
lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
205
panic("single stream used with more than 10 bits length.")
208
panic("internal error: block too big")
210
*h = literalsHeader(lh)
213
// appendTo will append the literals header to a byte slice.
214
func (h literalsHeader) appendTo(b []byte) []byte {
215
size := uint8(h >> 60)
218
b = append(b, uint8(h))
220
b = append(b, uint8(h), uint8(h>>8))
222
b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
224
b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
226
b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
228
panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
233
// size returns the output size with currently set values.
234
func (h literalsHeader) size() int {
238
func (h literalsHeader) String() string {
239
return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
242
// pushOffsets will push the recent offsets to the backup store.
243
func (b *blockEnc) pushOffsets() {
244
b.prevRecentOffsets = b.recentOffsets
247
// pushOffsets will push the recent offsets to the backup store.
248
func (b *blockEnc) popOffsets() {
249
b.recentOffsets = b.prevRecentOffsets
252
// matchOffset will adjust recent offsets and return the adjusted one,
253
// if it matches a previous offset.
254
func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
255
// Check if offset is one of the recent offsets.
256
// Adjusts the output offset accordingly.
257
// Gives a tiny bit of compression, typically around 1%.
261
case b.recentOffsets[0]:
263
case b.recentOffsets[1]:
264
b.recentOffsets[1] = b.recentOffsets[0]
265
b.recentOffsets[0] = offset
267
case b.recentOffsets[2]:
268
b.recentOffsets[2] = b.recentOffsets[1]
269
b.recentOffsets[1] = b.recentOffsets[0]
270
b.recentOffsets[0] = offset
273
b.recentOffsets[2] = b.recentOffsets[1]
274
b.recentOffsets[1] = b.recentOffsets[0]
275
b.recentOffsets[0] = offset
280
case b.recentOffsets[1]:
281
b.recentOffsets[1] = b.recentOffsets[0]
282
b.recentOffsets[0] = offset
284
case b.recentOffsets[2]:
285
b.recentOffsets[2] = b.recentOffsets[1]
286
b.recentOffsets[1] = b.recentOffsets[0]
287
b.recentOffsets[0] = offset
289
case b.recentOffsets[0] - 1:
290
b.recentOffsets[2] = b.recentOffsets[1]
291
b.recentOffsets[1] = b.recentOffsets[0]
292
b.recentOffsets[0] = offset
295
b.recentOffsets[2] = b.recentOffsets[1]
296
b.recentOffsets[1] = b.recentOffsets[0]
297
b.recentOffsets[0] = offset
307
// encodeRaw can be used to set the output to a raw representation of supplied bytes.
308
func (b *blockEnc) encodeRaw(a []byte) {
311
bh.setSize(uint32(len(a)))
312
bh.setType(blockTypeRaw)
313
b.output = bh.appendTo(b.output[:0])
314
b.output = append(b.output, a...)
316
println("Adding RAW block, length", len(a), "last:", b.last)
320
// encodeRaw can be used to set the output to a raw representation of supplied bytes.
321
func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
324
bh.setSize(uint32(len(src)))
325
bh.setType(blockTypeRaw)
326
dst = bh.appendTo(dst)
327
dst = append(dst, src...)
329
println("Adding RAW block, length", len(src), "last:", b.last)
334
// encodeLits can be used if the block is only litLen.
335
func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
338
bh.setSize(uint32(len(lits)))
340
// Don't compress extremely small blocks
341
if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
343
println("Adding RAW block, length", len(lits), "last:", b.last)
345
bh.setType(blockTypeRaw)
346
b.output = bh.appendTo(b.output)
347
b.output = append(b.output, lits...)
356
if b.dictLitEnc != nil {
357
b.litEnc.TransferCTable(b.dictLitEnc)
358
b.litEnc.Reuse = huff0.ReusePolicyAllow
361
if len(lits) >= 1024 {
363
out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
364
} else if len(lits) > 32 {
367
out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
369
err = huff0.ErrIncompressible
373
case huff0.ErrIncompressible:
375
println("Adding RAW block, length", len(lits), "last:", b.last)
377
bh.setType(blockTypeRaw)
378
b.output = bh.appendTo(b.output)
379
b.output = append(b.output, lits...)
381
case huff0.ErrUseRLE:
383
println("Adding RLE block, length", len(lits))
385
bh.setType(blockTypeRLE)
386
b.output = bh.appendTo(b.output)
387
b.output = append(b.output, lits[0])
395
b.litEnc.Reuse = huff0.ReusePolicyAllow
396
bh.setType(blockTypeCompressed)
397
var lh literalsHeader
400
println("Reused tree, compressed to", len(out))
402
lh.setType(literalsBlockTreeless)
405
println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
407
lh.setType(literalsBlockCompressed)
410
lh.setSizes(len(out), len(lits), single)
411
bh.setSize(uint32(len(out) + lh.size() + 1))
413
// Write block headers.
414
b.output = bh.appendTo(b.output)
415
b.output = lh.appendTo(b.output)
416
// Add compressed data.
417
b.output = append(b.output, out...)
419
b.output = append(b.output, 0)
423
// fuzzFseEncoder can be used to fuzz the FSE encoder.
424
func fuzzFseEncoder(data []byte) int {
425
if len(data) > maxSequences || len(data) < 2 {
429
hist := enc.Histogram()
431
for i, v := range data {
443
maxCount := func(a []uint32) int {
445
for _, v := range a {
452
cnt := maxCount(hist[:maxSym])
453
if cnt == len(data) {
457
enc.HistogramFinished(maxSym, cnt)
458
err := enc.normalizeCount(len(data))
462
_, err = enc.writeCount(nil)
469
// encode will encode the block and append the output in b.output.
470
// Previous offset codes must be pushed if more blocks are expected.
471
func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
472
if len(b.sequences) == 0 {
473
return b.encodeLits(b.literals, rawAllLits)
475
// We want some difference to at least account for the headers.
476
saved := b.size - len(b.literals) - (b.size >> 5)
479
return errIncompressible
482
return b.encodeLits(org, rawAllLits)
486
var lh literalsHeader
488
bh.setType(blockTypeCompressed)
489
// Store offset of the block header. Needed when we know the size.
490
bhOffset := len(b.output)
491
b.output = bh.appendTo(b.output)
498
if b.dictLitEnc != nil {
499
b.litEnc.TransferCTable(b.dictLitEnc)
500
b.litEnc.Reuse = huff0.ReusePolicyAllow
503
if len(b.literals) >= 1024 && !raw {
505
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
506
} else if len(b.literals) > 32 && !raw {
509
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
511
err = huff0.ErrIncompressible
515
case huff0.ErrIncompressible:
516
lh.setType(literalsBlockRaw)
517
lh.setSize(len(b.literals))
518
b.output = lh.appendTo(b.output)
519
b.output = append(b.output, b.literals...)
521
println("Adding literals RAW, length", len(b.literals))
523
case huff0.ErrUseRLE:
524
lh.setType(literalsBlockRLE)
525
lh.setSize(len(b.literals))
526
b.output = lh.appendTo(b.output)
527
b.output = append(b.output, b.literals[0])
529
println("Adding literals RLE")
532
// Compressed litLen...
535
println("reused tree")
537
lh.setType(literalsBlockTreeless)
540
println("new tree, size:", len(b.litEnc.OutTable))
542
lh.setType(literalsBlockCompressed)
544
_, _, err := huff0.ReadTable(out, nil)
550
lh.setSizes(len(out), len(b.literals), single)
552
printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
553
println("Adding literal header:", lh)
555
b.output = lh.appendTo(b.output)
556
b.output = append(b.output, out...)
557
b.litEnc.Reuse = huff0.ReusePolicyAllow
559
println("Adding literals compressed")
563
println("Adding literals ERROR:", err)
567
// Sequence compression
569
// Write the number of sequences
571
case len(b.sequences) < 128:
572
b.output = append(b.output, uint8(len(b.sequences)))
573
case len(b.sequences) < 0x7f00: // TODO: this could be wrong
574
n := len(b.sequences)
575
b.output = append(b.output, 128+uint8(n>>8), uint8(n))
577
n := len(b.sequences) - 0x7f00
578
b.output = append(b.output, 255, uint8(n), uint8(n>>8))
581
println("Encoding", len(b.sequences), "sequences")
584
llEnc := b.coders.llEnc
585
ofEnc := b.coders.ofEnc
586
mlEnc := b.coders.mlEnc
587
err = llEnc.normalizeCount(len(b.sequences))
591
err = ofEnc.normalizeCount(len(b.sequences))
595
err = mlEnc.normalizeCount(len(b.sequences))
600
// Choose the best compression mode for each type.
601
// Will evaluate the new vs predefined and previous.
602
chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) {
603
// See if predefined/previous is better
604
hist := cur.count[:cur.symbolLen]
605
nSize := cur.approxSize(hist) + cur.maxHeaderSize()
606
predefSize := preDef.approxSize(hist)
607
prevSize := prev.approxSize(hist)
609
// Add a small penalty for new encoders.
610
// Don't bother with extremely small (<2 byte gains).
611
nSize = nSize + (nSize+2*8*16)>>4
613
case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
615
println("Using predefined", predefSize>>3, "<=", nSize>>3)
617
return preDef, compModePredefined
618
case prevSize <= nSize:
620
println("Using previous", prevSize>>3, "<=", nSize>>3)
622
return prev, compModeRepeat
625
println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
626
println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
628
return cur, compModeFSE
632
// Write compression mode
635
mode |= uint8(compModeRLE) << 6
636
llEnc.setRLE(b.sequences[0].llCode)
638
println("llEnc.useRLE")
642
llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
643
mode |= uint8(m) << 6
646
mode |= uint8(compModeRLE) << 4
647
ofEnc.setRLE(b.sequences[0].ofCode)
649
println("ofEnc.useRLE")
653
ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
654
mode |= uint8(m) << 4
658
mode |= uint8(compModeRLE) << 2
659
mlEnc.setRLE(b.sequences[0].mlCode)
661
println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
665
mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
666
mode |= uint8(m) << 2
668
b.output = append(b.output, mode)
670
printf("Compression modes: 0b%b", mode)
672
b.output, err = llEnc.writeCount(b.output)
676
start := len(b.output)
677
b.output, err = ofEnc.writeCount(b.output)
682
println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
683
fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
684
for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
685
fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
688
b.output, err = mlEnc.writeCount(b.output)
697
var ll, of, ml cState
700
seq := len(b.sequences) - 1
701
s := b.sequences[seq]
702
llEnc.setBits(llBitsTable[:])
703
mlEnc.setBits(mlBitsTable[:])
706
llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
708
// We have 3 bounds checks here (and in the loop).
709
// Since we are iterating backwards it is kinda hard to avoid.
710
llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
711
ll.init(wr, &llEnc.ct, llB)
712
of.init(wr, &ofEnc.ct, ofB)
714
ml.init(wr, &mlEnc.ct, mlB)
716
// Each of these lookups also generates a bounds check.
717
wr.addBits32NC(s.litLen, llB.outBits)
718
wr.addBits32NC(s.matchLen, mlB.outBits)
720
wr.addBits32NC(s.offset, ofB.outBits)
722
println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
725
// Store sequences in reverse...
729
ofB := ofTT[s.ofCode]
730
wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
732
nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
733
dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
734
wr.addBits16NC(of.state, uint8(nbBitsOut))
735
of.state = of.stateTable[dstState]
737
// Accumulate extra bits.
738
outBits := ofB.outBits & 31
739
extraBits := uint64(s.offset & bitMask32[outBits])
740
extraBitsN := outBits
742
mlB := mlTT[s.mlCode]
744
nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
745
dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
746
wr.addBits16NC(ml.state, uint8(nbBitsOut))
747
ml.state = ml.stateTable[dstState]
749
outBits = mlB.outBits & 31
750
extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
751
extraBitsN += outBits
753
llB := llTT[s.llCode]
755
nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
756
dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
757
wr.addBits16NC(ll.state, uint8(nbBitsOut))
758
ll.state = ll.stateTable[dstState]
760
outBits = llB.outBits & 31
761
extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
762
extraBitsN += outBits
765
wr.addBits64NC(extraBits, extraBitsN)
768
println("Encoded seq", seq, s)
773
ml.flush(mlEnc.actualTableLog)
774
of.flush(ofEnc.actualTableLog)
775
ll.flush(llEnc.actualTableLog)
782
if len(b.output)-3-bhOffset >= b.size {
783
// Maybe even add a bigger margin.
784
b.litEnc.Reuse = huff0.ReusePolicyNone
785
return errIncompressible
788
// Size is output minus block header.
789
bh.setSize(uint32(len(b.output)-bhOffset) - 3)
791
println("Rewriting block header", bh)
793
_ = bh.appendTo(b.output[bhOffset:bhOffset])
794
b.coders.setPrev(llEnc, mlEnc, ofEnc)
798
var errIncompressible = errors.New("incompressible")
800
func (b *blockEnc) genCodes() {
801
if len(b.sequences) == 0 {
805
if len(b.sequences) > math.MaxUint16 {
806
panic("can only encode up to 64K sequences")
808
// No bounds checks after here:
809
llH := b.coders.llEnc.Histogram()
810
ofH := b.coders.ofEnc.Histogram()
811
mlH := b.coders.mlEnc.Histogram()
822
var llMax, ofMax, mlMax uint8
823
for i := range b.sequences {
824
seq := &b.sequences[i]
825
v := llCode(seq.litLen)
832
v = ofCode(seq.offset)
839
v = mlCode(seq.matchLen)
844
if debugAsserts && mlMax > maxMatchLengthSymbol {
845
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
849
maxCount := func(a []uint32) int {
851
for _, v := range a {
858
if debugAsserts && mlMax > maxMatchLengthSymbol {
859
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
861
if debugAsserts && ofMax > maxOffsetBits {
862
panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
864
if debugAsserts && llMax > maxLiteralLengthSymbol {
865
panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
868
b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
869
b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
870
b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))