1
// Copyright 2019+ Klaus Post. All rights reserved.
2
// License information can be found in the LICENSE file.
3
// Based on work by Yann Collet, released under BSD License.
18
// Codes are stored here for the encoder
19
// so they only have to be looked up once.
20
llCode, mlCode, ofCode uint8
27
func (s seq) String() string {
30
return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset: INVALID (0)")
32
return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset, " (repeat)")
34
return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset-3, " (new)")
40
compModePredefined seqCompMode = iota
46
type sequenceDec struct {
47
// decoder keeps track of the current state and updates it from the bitstream.
53
// init the state of the decoder with input from stream.
54
func (s *sequenceDec) init(br *bitReader) error {
56
return errors.New("sequence decoder not defined")
58
s.state.init(br, s.fse.actualTableLog, s.fse.dt[:1<<s.fse.actualTableLog])
62
// sequenceDecs contains all 3 sequence decoders and their state.
63
type sequenceDecs struct {
64
litLengths sequenceDec
66
matchLengths sequenceDec
78
// initialize all 3 decoders from the stream input.
79
func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) error {
80
if err := s.litLengths.init(br); err != nil {
81
return errors.New("litLengths:" + err.Error())
83
if err := s.offsets.init(br); err != nil {
84
return errors.New("offsets:" + err.Error())
86
if err := s.matchLengths.init(br); err != nil {
87
return errors.New("matchLengths:" + err.Error())
90
s.prevOffset = hist.recentOffsets
91
s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
92
s.windowSize = hist.windowSize
96
s.dict = hist.dict.content
101
// decode sequences from the stream with the provided history.
102
func (s *sequenceDecs) decode(seqs []seqVals) error {
105
// Grab full sizes tables, to avoid bounds checks.
106
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
107
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
109
litRemain := len(s.literals)
111
for i := range seqs {
113
if br.off > 4+((maxOffsetBits+16+16)>>3) {
115
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
117
// Final will not read from stream.
118
var llB, mlB, moB uint8
119
ll, llB = llState.final()
120
ml, mlB = mlState.final()
121
mo, moB = ofState.final()
123
// extra bits are stored in reverse order.
125
mo += br.getBits(moB)
129
ml += br.getBits(mlB)
130
ll += br.getBits(llB)
133
s.prevOffset[2] = s.prevOffset[1]
134
s.prevOffset[1] = s.prevOffset[0]
137
// mo = s.adjustOffset(mo, ll, moB)
138
// Inlined for rather big speedup
140
// There is an exception though, when current sequence's literals_length = 0.
141
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
142
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
151
temp = s.prevOffset[0] - 1
153
temp = s.prevOffset[mo]
157
// 0 is not valid; input is corrupted; force offset to 1
158
println("WARNING: temp was 0")
163
s.prevOffset[2] = s.prevOffset[1]
165
s.prevOffset[1] = s.prevOffset[0]
166
s.prevOffset[0] = temp
174
printf("reading sequence %d, exceeded available data\n", i)
176
return io.ErrUnexpectedEOF
178
ll, mo, ml = s.next(br, llState, mlState, ofState)
183
println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
186
// We might be doing this async, so do it early.
187
if mo == 0 && ml > 0 {
188
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
190
if ml > maxMatchLen {
191
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
194
if s.seqSize > maxBlockSize {
195
return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
199
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
206
if i == len(seqs)-1 {
207
// This is the last sequence, so we shouldn't update state.
211
// Manually inlined, ~ 5-20% faster
212
// Update all 3 states at once. Approx 20% faster.
213
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
215
llState = llTable[llState.newState()&maxTableMask]
216
mlState = mlTable[mlState.newState()&maxTableMask]
217
ofState = ofTable[ofState.newState()&maxTableMask]
219
bits := br.get32BitsFast(nBits)
220
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
221
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
223
lowBits = uint16(bits >> (ofState.nbBits() & 31))
224
lowBits &= bitMask[mlState.nbBits()&15]
225
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
227
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
228
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
231
s.seqSize += litRemain
232
if s.seqSize > maxBlockSize {
233
return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
237
printf("Closing sequences: %v, %+v\n", err, *br)
242
// execute will execute the decoded sequence with the provided history.
243
// The sequence must be evaluated before being sent.
244
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
245
// Ensure we have enough output size...
246
if len(s.out)+s.seqSize > cap(s.out) {
247
addBytes := s.seqSize + len(s.out)
248
s.out = append(s.out, make([]byte, addBytes)...)
249
s.out = s.out[:len(s.out)-addBytes]
253
printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize)
257
out := s.out[:t+s.seqSize]
259
for _, seq := range seqs {
261
copy(out[t:], s.literals[:seq.ll])
263
s.literals = s.literals[seq.ll:]
265
// Copy from dictionary...
266
if seq.mo > t+len(hist) || seq.mo > s.windowSize {
267
if len(s.dict) == 0 {
268
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
271
// we may be in dictionary.
272
dictO := len(s.dict) - (seq.mo - (t + len(hist)))
273
if dictO < 0 || dictO >= len(s.dict) {
274
return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict))
276
end := dictO + seq.ml
277
if end > len(s.dict) {
278
n := len(s.dict) - dictO
279
copy(out[t:], s.dict[dictO:])
283
copy(out[t:], s.dict[dictO:end])
289
// Copy from history.
290
if v := seq.mo - t; v > 0 {
291
// v is the start position in history from end.
292
start := len(hist) - v
294
// Some goes into current block.
295
// Copy remainder of history
296
copy(out[t:], hist[start:])
300
copy(out[t:], hist[start:start+seq.ml])
305
// We must be in current buffer now
308
if seq.ml <= t-start {
310
copy(out[t:], out[start:start+seq.ml])
315
// Extend destination slice and copy one byte at the time.
316
src := out[start : start+seq.ml]
320
// Destination is the space we just added.
327
// Add final literals
328
copy(out[t:], s.literals)
332
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
340
// decode sequences from the stream with the provided history.
341
func (s *sequenceDecs) decodeSync(history *history) error {
344
startSize := len(s.out)
345
// Grab full sizes tables, to avoid bounds checks.
346
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
347
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
348
hist := history.b[history.ignoreBuffer:]
351
for i := seqs - 1; i >= 0; i-- {
353
printf("reading sequence %d, exceeded available data\n", seqs-i)
354
return io.ErrUnexpectedEOF
357
if br.off > 4+((maxOffsetBits+16+16)>>3) {
359
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
361
// Final will not read from stream.
362
var llB, mlB, moB uint8
363
ll, llB = llState.final()
364
ml, mlB = mlState.final()
365
mo, moB = ofState.final()
367
// extra bits are stored in reverse order.
369
mo += br.getBits(moB)
373
ml += br.getBits(mlB)
374
ll += br.getBits(llB)
377
s.prevOffset[2] = s.prevOffset[1]
378
s.prevOffset[1] = s.prevOffset[0]
381
// mo = s.adjustOffset(mo, ll, moB)
382
// Inlined for rather big speedup
384
// There is an exception though, when current sequence's literals_length = 0.
385
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
386
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
395
temp = s.prevOffset[0] - 1
397
temp = s.prevOffset[mo]
401
// 0 is not valid; input is corrupted; force offset to 1
402
println("WARNING: temp was 0")
407
s.prevOffset[2] = s.prevOffset[1]
409
s.prevOffset[1] = s.prevOffset[0]
410
s.prevOffset[0] = temp
416
ll, mo, ml = s.next(br, llState, mlState, ofState)
421
println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
424
if ll > len(s.literals) {
425
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
427
size := ll + ml + len(out)
428
if size-startSize > maxBlockSize {
429
return fmt.Errorf("output (%d) bigger than max block size", size)
432
// Not enough size, which can happen under high volume block streaming conditions
433
// but could be if destination slice is too small for sync operations.
434
// over-allocating here can create a large amount of GC pressure so we try to keep
435
// it as contained as possible
436
used := len(out) - startSize
437
addBytes := 256 + ll + ml + used>>2
438
// Clamp to max block size.
439
if used+addBytes > maxBlockSize {
440
addBytes = maxBlockSize - used
442
out = append(out, make([]byte, addBytes)...)
443
out = out[:len(out)-addBytes]
445
if ml > maxMatchLen {
446
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
450
out = append(out, s.literals[:ll]...)
451
s.literals = s.literals[ll:]
453
if mo == 0 && ml > 0 {
454
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
457
if mo > len(out)+len(hist) || mo > s.windowSize {
458
if len(s.dict) == 0 {
459
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
462
// we may be in dictionary.
463
dictO := len(s.dict) - (mo - (len(out) + len(hist)))
464
if dictO < 0 || dictO >= len(s.dict) {
465
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
468
if end > len(s.dict) {
469
out = append(out, s.dict[dictO:]...)
470
ml -= len(s.dict) - dictO
472
out = append(out, s.dict[dictO:end]...)
478
// Copy from history.
479
// TODO: Blocks without history could be made to ignore this completely.
480
if v := mo - len(out); v > 0 {
481
// v is the start position in history from end.
482
start := len(hist) - v
484
// Some goes into current block.
485
// Copy remainder of history
486
out = append(out, hist[start:]...)
489
out = append(out, hist[start:start+ml]...)
493
// We must be in current buffer now
495
start := len(out) - mo
496
if ml <= len(out)-start {
498
out = append(out, out[start:start+ml]...)
501
// Extend destination slice and copy one byte at the time.
502
out = out[:len(out)+ml]
503
src := out[start : start+ml]
504
// Destination is the space we just added.
505
dst := out[len(out)-ml:]
513
// This is the last sequence, so we shouldn't update state.
517
// Manually inlined, ~ 5-20% faster
518
// Update all 3 states at once. Approx 20% faster.
519
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
521
llState = llTable[llState.newState()&maxTableMask]
522
mlState = mlTable[mlState.newState()&maxTableMask]
523
ofState = ofTable[ofState.newState()&maxTableMask]
525
bits := br.get32BitsFast(nBits)
526
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
527
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
529
lowBits = uint16(bits >> (ofState.nbBits() & 31))
530
lowBits &= bitMask[mlState.nbBits()&15]
531
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
533
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
534
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
538
// Add final literals
539
s.out = append(out, s.literals...)
543
// update states, at least 27 bits must be available.
544
func (s *sequenceDecs) update(br *bitReader) {
546
s.litLengths.state.next(br)
548
s.matchLengths.state.next(br)
550
s.offsets.state.next(br)
553
var bitMask [16]uint16
556
for i := range bitMask[:] {
557
bitMask[i] = uint16((1 << uint(i)) - 1)
561
// update states, at least 27 bits must be available.
562
func (s *sequenceDecs) updateAlt(br *bitReader) {
563
// Update all 3 states at once. Approx 20% faster.
564
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
566
nBits := a.nbBits() + b.nbBits() + c.nbBits()
568
s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
569
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
570
s.offsets.state.state = s.offsets.state.dt[c.newState()]
573
bits := br.get32BitsFast(nBits)
574
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
575
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
577
lowBits = uint16(bits >> (c.nbBits() & 31))
578
lowBits &= bitMask[b.nbBits()&15]
579
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
581
lowBits = uint16(bits) & bitMask[c.nbBits()&15]
582
s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
585
// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
586
func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
587
// Final will not read from stream.
588
ll, llB := llState.final()
589
ml, mlB := mlState.final()
590
mo, moB := ofState.final()
592
// extra bits are stored in reverse order.
594
mo += br.getBits(moB)
598
ml += br.getBits(mlB)
599
ll += br.getBits(llB)
602
s.prevOffset[2] = s.prevOffset[1]
603
s.prevOffset[1] = s.prevOffset[0]
607
// mo = s.adjustOffset(mo, ll, moB)
608
// Inlined for rather big speedup
610
// There is an exception though, when current sequence's literals_length = 0.
611
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
612
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
622
temp = s.prevOffset[0] - 1
624
temp = s.prevOffset[mo]
628
// 0 is not valid; input is corrupted; force offset to 1
629
println("temp was 0")
634
s.prevOffset[2] = s.prevOffset[1]
636
s.prevOffset[1] = s.prevOffset[0]
637
s.prevOffset[0] = temp
642
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
643
// Final will not read from stream.
644
ll, llB := llState.final()
645
ml, mlB := mlState.final()
646
mo, moB := ofState.final()
648
// extra bits are stored in reverse order.
651
mo += br.getBits(moB)
652
ml += br.getBits(mlB)
653
ll += br.getBits(llB)
655
mo += br.getBits(moB)
657
// matchlength+literal length, max 32 bits
658
ml += br.getBits(mlB)
659
ll += br.getBits(llB)
662
mo = s.adjustOffset(mo, ll, moB)
666
func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int {
668
s.prevOffset[2] = s.prevOffset[1]
669
s.prevOffset[1] = s.prevOffset[0]
670
s.prevOffset[0] = offset
675
// There is an exception though, when current sequence's literals_length = 0.
676
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
677
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
682
return s.prevOffset[0]
686
temp = s.prevOffset[0] - 1
688
temp = s.prevOffset[offset]
692
// 0 is not valid; input is corrupted; force offset to 1
693
println("temp was 0")
698
s.prevOffset[2] = s.prevOffset[1]
700
s.prevOffset[1] = s.prevOffset[0]
701
s.prevOffset[0] = temp