1
// Copyright 2019+ Klaus Post. All rights reserved.
2
// License information can be found in the LICENSE file.
3
// Based on work by Yann Collet, released under BSD License.
13
"github.com/klauspost/compress/huff0"
14
"github.com/klauspost/compress/zstd/internal/xxhash"
19
//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex
22
blockTypeRaw blockType = iota
28
type literalsBlockType uint8
31
literalsBlockRaw literalsBlockType = iota
33
literalsBlockCompressed
38
// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
39
maxCompressedBlockSize = 128 << 10
41
// Maximum possible block size (all Raw+Uncompressed).
42
maxBlockSize = (1 << 21) - 1
44
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
45
maxCompressedLiteralSize = 1 << 18
46
maxRLELiteralSize = 1 << 20
48
maxSequences = 0x7f00 + 0xffff
50
// We support slightly less than the reference decoder to be able to
51
// use ints on 32 bit archs.
56
huffDecoderPool = sync.Pool{New: func() interface{} {
57
return &huff0.Scratch{}
60
fseDecoderPool = sync.Pool{New: func() interface{} {
66
// Raw source data of the block.
70
// Destination of the decoded data.
73
// Buffer for literals data.
76
// Window size of the block.
81
// Check against this crc
84
// Frame to use for singlethreaded decoding.
85
// Should not be used by the decoder itself since parent may be another frame.
94
seqSize int // Size of uncompressed sequences
98
// Block is RLE, this is the size.
104
// Is this the last block of a frame?
111
func (b *blockDec) String() string {
115
return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize)
118
func newBlockDec(lowMem bool) *blockDec {
125
// reset will reset the block.
126
// Input must be a start of a block and will be at the end of the block when returned.
127
func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
128
b.WindowSize = windowSize
129
tmp, err := br.readSmall(3)
131
println("Reading block header:", err)
134
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
136
b.Type = blockType((bh >> 1) & 3)
138
cSize := int(bh >> 3)
139
maxSize := maxBlockSize
141
case blockTypeReserved:
142
return ErrReservedBlockType
144
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
146
printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
148
return ErrWindowSizeExceeded
150
b.RLESize = uint32(cSize)
155
case blockTypeCompressed:
157
println("Data size on stream:", cSize)
160
maxSize = maxCompressedBlockSize
161
if windowSize < maxCompressedBlockSize && b.lowMem {
162
maxSize = int(windowSize)
164
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
166
printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
168
return ErrCompressedSizeTooBig
171
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
173
printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
175
return ErrWindowSizeExceeded
179
// We do not need a destination for raw blocks.
182
panic("Invalid block type")
186
if cap(b.dataStorage) < cSize {
187
if b.lowMem || cSize > maxCompressedBlockSize {
188
b.dataStorage = make([]byte, 0, cSize)
190
b.dataStorage = make([]byte, 0, maxCompressedBlockSize)
193
if cap(b.dst) <= maxSize {
194
b.dst = make([]byte, 0, maxSize+1)
196
b.data, err = br.readBig(cSize, b.dataStorage)
199
println("Reading block:", err, "(", cSize, ")", len(b.data))
207
// sendEOF will make the decoder send EOF on this frame.
208
func (b *blockDec) sendErr(err error) {
210
b.Type = blockTypeReserved
214
// Close will release resources.
215
// Closed blockDec cannot be reset.
216
func (b *blockDec) Close() {
220
func (b *blockDec) decodeBuf(hist *history) error {
223
if cap(b.dst) < int(b.RLESize) {
225
b.dst = make([]byte, b.RLESize)
227
b.dst = make([]byte, maxBlockSize)
230
b.dst = b.dst[:b.RLESize]
232
for i := range b.dst {
235
hist.appendKeep(b.dst)
238
hist.appendKeep(b.data)
240
case blockTypeCompressed:
242
// Append directly to history
243
if hist.ignoreBuffer == 0 {
249
err := b.decodeCompressed(hist)
251
println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
253
if hist.ignoreBuffer == 0 {
257
hist.appendKeep(b.dst)
260
case blockTypeReserved:
261
// Used for returning errors.
264
panic("Invalid block type")
268
func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) {
269
// There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
271
return in, ErrBlockTooSmall
274
litType := literalsBlockType(in[0] & 3)
277
sizeFormat := (in[0] >> 2) & 3
281
case literalsBlockRaw, literalsBlockRLE:
284
// Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte.
285
litRegenSize = int(in[0] >> 3)
288
// Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes.
289
litRegenSize = int(in[0]>>4) + (int(in[1]) << 4)
292
// Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
294
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
295
return in, ErrBlockTooSmall
297
litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
300
case literalsBlockCompressed, literalsBlockTreeless:
303
// Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
305
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
306
return in, ErrBlockTooSmall
308
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
309
litRegenSize = int(n & 1023)
310
litCompSize = int(n >> 10)
311
fourStreams = sizeFormat == 1
316
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
317
return in, ErrBlockTooSmall
319
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
320
litRegenSize = int(n & 16383)
321
litCompSize = int(n >> 14)
326
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
327
return in, ErrBlockTooSmall
329
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
330
litRegenSize = int(n & 262143)
331
litCompSize = int(n >> 18)
336
println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
338
if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize {
339
return in, ErrWindowSizeExceeded
343
case literalsBlockRaw:
344
if len(in) < litRegenSize {
345
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
346
return in, ErrBlockTooSmall
348
literals = in[:litRegenSize]
349
in = in[litRegenSize:]
350
//printf("Found %d uncompressed literals\n", litRegenSize)
351
case literalsBlockRLE:
353
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
354
return in, ErrBlockTooSmall
356
if cap(b.literalBuf) < litRegenSize {
358
b.literalBuf = make([]byte, litRegenSize)
360
if litRegenSize > maxCompressedLiteralSize {
362
b.literalBuf = make([]byte, litRegenSize)
364
b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
368
literals = b.literalBuf[:litRegenSize]
370
for i := range literals {
375
printf("Found %d RLE compressed literals\n", litRegenSize)
377
case literalsBlockTreeless:
378
if len(in) < litCompSize {
379
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
380
return in, ErrBlockTooSmall
382
// Store compressed literals, so we defer decoding until we get history.
383
literals = in[:litCompSize]
384
in = in[litCompSize:]
386
printf("Found %d compressed literals\n", litCompSize)
388
huff := hist.huffTree
390
return in, errors.New("literal block was treeless, but no history was defined")
392
// Ensure we have space to store it.
393
if cap(b.literalBuf) < litRegenSize {
395
b.literalBuf = make([]byte, 0, litRegenSize)
397
b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
401
// Use our out buffer.
402
huff.MaxDecodedSize = maxCompressedBlockSize
404
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
406
literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
408
// Make sure we don't leak our literals buffer
410
println("decompressing literals:", err)
413
if len(literals) != litRegenSize {
414
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
417
case literalsBlockCompressed:
418
if len(in) < litCompSize {
419
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
420
return in, ErrBlockTooSmall
422
literals = in[:litCompSize]
423
in = in[litCompSize:]
424
// Ensure we have space to store it.
425
if cap(b.literalBuf) < litRegenSize {
427
b.literalBuf = make([]byte, 0, litRegenSize)
429
b.literalBuf = make([]byte, 0, maxCompressedBlockSize)
432
huff := hist.huffTree
433
if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) {
434
huff = huffDecoderPool.Get().(*huff0.Scratch)
436
huff = &huff0.Scratch{}
440
huff, literals, err = huff0.ReadTable(literals, huff)
442
println("reading huffman table:", err)
446
huff.MaxDecodedSize = maxCompressedBlockSize
447
// Use our out buffer.
449
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
451
literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
454
println("decoding compressed literals:", err)
457
// Make sure we don't leak our literals buffer
458
if len(literals) != litRegenSize {
459
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
462
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
465
hist.decoders.literals = literals
469
// decodeCompressed will start decompressing a block.
470
func (b *blockDec) decodeCompressed(hist *history) error {
472
in, err := b.decodeLiterals(in, hist)
476
err = b.prepareSequences(in, hist)
480
if hist.decoders.nSeqs == 0 {
481
b.dst = append(b.dst, hist.decoders.literals...)
484
err = hist.decoders.decodeSync(hist)
488
b.dst = hist.decoders.out
489
hist.recentOffsets = hist.decoders.prevOffset
493
func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
495
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
497
return ErrBlockTooSmall
504
case seqHeader < 128:
505
nSeqs = int(seqHeader)
507
case seqHeader < 255:
509
return ErrBlockTooSmall
511
nSeqs = int(seqHeader-128)<<8 | int(in[1])
513
case seqHeader == 255:
515
return ErrBlockTooSmall
517
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
521
var seqs = &hist.decoders
525
return ErrBlockTooSmall
527
br := byteReader{b: in, off: 0}
528
compMode := br.Uint8()
531
printf("Compression modes: 0b%b", compMode)
533
for i := uint(0); i < 3; i++ {
534
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
536
println("Table", tableIndex(i), "is", mode)
539
switch tableIndex(i) {
540
case tableLiteralLengths:
541
seq = &seqs.litLengths
544
case tableMatchLengths:
545
seq = &seqs.matchLengths
547
panic("unknown table")
550
case compModePredefined:
551
if seq.fse != nil && !seq.fse.preDefined {
552
fseDecoderPool.Put(seq.fse)
554
seq.fse = &fsePredef[i]
557
return ErrBlockTooSmall
561
if seq.fse == nil || seq.fse.preDefined {
562
seq.fse = fseDecoderPool.Get().(*fseDecoder)
564
symb, err := decSymbolValue(v, symbolTableX[i])
566
printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
571
printf("RLE set to %+v, code: %v", symb, v)
574
println("Reading table for", tableIndex(i))
575
if seq.fse == nil || seq.fse.preDefined {
576
seq.fse = fseDecoderPool.Get().(*fseDecoder)
578
err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i]))
580
println("Read table error:", err)
583
err = seq.fse.transform(symbolTableX[i])
585
println("Transform table error:", err)
589
println("Read table ok", "symbolLen:", seq.fse.symbolLen)
595
return io.ErrUnexpectedEOF
601
println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.")
605
if len(b.sequence) > 0 {
606
b.sequence = b.sequence[:0]
614
if err := br.init(in); err != nil {
618
if err := seqs.initialize(br, hist, b.dst); err != nil {
619
println("initializing sequences:", err)
625
func (b *blockDec) decodeSequences(hist *history) error {
626
if cap(b.sequence) < hist.decoders.nSeqs {
628
b.sequence = make([]seqVals, 0, hist.decoders.nSeqs)
630
b.sequence = make([]seqVals, 0, 0x7F00+0xffff)
633
b.sequence = b.sequence[:hist.decoders.nSeqs]
634
if hist.decoders.nSeqs == 0 {
635
hist.decoders.seqSize = len(hist.decoders.literals)
638
hist.decoders.prevOffset = hist.recentOffsets
639
err := hist.decoders.decode(b.sequence)
640
hist.recentOffsets = hist.decoders.prevOffset
644
func (b *blockDec) executeSequences(hist *history) error {
646
if len(hbytes) > hist.windowSize {
647
hbytes = hbytes[len(hbytes)-hist.windowSize:]
648
// We do not need history anymore.
649
if hist.dict != nil {
650
hist.dict.content = nil
653
hist.decoders.windowSize = hist.windowSize
654
hist.decoders.out = b.dst[:0]
655
err := hist.decoders.execute(b.sequence, hbytes)
659
return b.updateHistory(hist)
662
func (b *blockDec) updateHistory(hist *history) error {
663
if len(b.data) > maxCompressedBlockSize {
664
return fmt.Errorf("compressed block size too large (%d)", len(b.data))
666
// Set output and release references.
667
b.dst = hist.decoders.out
668
hist.recentOffsets = hist.decoders.prevOffset
671
// if last block we don't care about history.
672
println("Last block, no history returned")
678
println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b))
681
hist.decoders.out, hist.decoders.literals = nil, nil