11
"github.com/klauspost/compress"
15
bestLongTableBits = 22
16
bestLongTableSize = 1 << bestLongTableBits
23
bestShortTableBits = 18
24
bestShortTableSize = 1 << bestShortTableBits
37
const highScore = 25000
40
func (m *match) estBits(bitsPerByte int32) {
41
mlc := mlCode(uint32(m.length - zstdMinMatch))
44
ofc = ofCode(uint32(m.s-m.offset) + 3)
46
ofc = ofCode(uint32(m.rep))
49
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
52
m.est = int32(ofTT.outBits + mlTT.outBits)
53
m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
55
m.est -= (m.length * bitsPerByte) >> 10
69
type bestFastEncoder struct {
71
table [bestShortTableSize]prevEntry
72
longTable [bestLongTableSize]prevEntry
74
dictLongTable []prevEntry
78
func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
83
minNonLiteralBlockSize = 16
87
for e.cur >= bufferReset {
89
for i := range e.table[:] {
90
e.table[i] = prevEntry{}
92
for i := range e.longTable[:] {
93
e.longTable[i] = prevEntry{}
99
minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
100
for i := range e.table[:] {
101
v := e.table[i].offset
102
v2 := e.table[i].prev
107
v = v - e.cur + e.maxMatchOff
111
v2 = v2 - e.cur + e.maxMatchOff
114
e.table[i] = prevEntry{
119
for i := range e.longTable[:] {
120
v := e.longTable[i].offset
121
v2 := e.longTable[i].prev
126
v = v - e.cur + e.maxMatchOff
130
v2 = v2 - e.cur + e.maxMatchOff
133
e.longTable[i] = prevEntry{
138
e.cur = e.maxMatchOff
144
if len(src) < minNonLiteralBlockSize {
145
blk.extraLits = len(src)
146
blk.literals = blk.literals[:len(src)]
147
copy(blk.literals, src)
153
bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
155
if bitsPerByte < 1024 {
161
sLimit := int32(len(src)) - inputMargin
162
const kSearchStrength = 10
166
cv := load6432(src, s)
169
offset1 := int32(blk.recentOffsets[0])
170
offset2 := int32(blk.recentOffsets[1])
171
offset3 := int32(blk.recentOffsets[2])
173
addLiterals := func(s *seq, until int32) {
174
if until == nextEmit {
177
blk.literals = append(blk.literals, src[nextEmit:until]...)
178
s.litLen = uint32(until - nextEmit)
183
println("recent offsets:", blk.recentOffsets)
189
canRepeat := len(blk.sequences) > 2
191
if debugAsserts && canRepeat && offset1 == 0 {
192
panic("offset0 was 0")
195
bestOf := func(a, b match) match {
196
if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
201
const goodEnough = 100
203
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
204
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
205
candidateL := e.longTable[nextHashL]
206
candidateS := e.table[nextHashS]
208
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
209
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
210
return match{s: s, est: highScore}
213
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
214
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
217
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
218
m.estBits(bitsPerByte)
222
best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
223
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
224
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
226
if canRepeat && best.length < goodEnough {
227
cv32 := uint32(cv >> 8)
229
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
230
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
231
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
233
cv32 = uint32(cv >> 24)
235
best = bestOf(best, matchAt(spp-offset1, spp, cv32, 1))
236
best = bestOf(best, matchAt(spp-offset2, spp, cv32, 2))
237
best = bestOf(best, matchAt(spp-offset3, spp, cv32, 3))
241
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
242
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
245
if best.length < goodEnough {
248
s += 1 + (s-nextEmit)>>(kSearchStrength-1)
252
cv = load6432(src, s)
257
candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
258
cv = load6432(src, s)
259
cv2 := load6432(src, s+1)
260
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
261
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
264
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
266
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
267
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
268
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
269
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
273
best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
277
if sAt := best.s + best.length; sAt < sLimit {
278
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
279
candidateEnd := e.longTable[nextHashL]
280
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
281
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
282
if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
283
bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
291
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
292
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
300
seq.matchLen = uint32(best.length - zstdMinMatch)
307
startLimit := nextEmit + 1
309
tMin := s - e.maxMatchOff
313
repIndex := best.offset
314
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
319
addLiterals(&seq, start)
322
seq.offset = uint32(best.rep)
324
println("repeat sequence", seq, "next s:", s)
326
blk.sequences = append(blk.sequences, seq)
330
s = best.s + best.length
335
println("repeat ended", s, best.length)
341
off := index0 + e.cur
343
cv0 := load6432(src, index0)
344
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
345
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
346
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
347
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
353
offset1, offset2 = offset2, offset1
355
offset1, offset2, offset3 = offset3, offset1, offset2
357
cv = load6432(src, s)
365
offset1, offset2, offset3 = s-t, offset1, offset2
367
if debugAsserts && s <= t {
368
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
371
if debugAsserts && int(offset1) > len(src) {
372
panic("invalid offset")
379
tMin := s - e.maxMatchOff
383
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
391
seq.litLen = uint32(s - nextEmit)
392
seq.matchLen = uint32(l - zstdMinMatch)
394
blk.literals = append(blk.literals, src[nextEmit:s]...)
396
seq.offset = uint32(s-t) + 3
399
println("sequence", seq, "next s:", s)
401
blk.sequences = append(blk.sequences, seq)
411
cv0 := load6432(src, index0)
412
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
413
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
414
off := index0 + e.cur
415
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
416
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
420
cv = load6432(src, s)
428
if load3232(src, o2) != uint32(cv) {
434
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
435
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
439
l := 4 + e.matchlen(s+4, o2+4, src)
441
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
442
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
443
seq.matchLen = uint32(l) - zstdMinMatch
451
println("sequence", seq, "next s:", s)
453
blk.sequences = append(blk.sequences, seq)
456
offset1, offset2 = offset2, offset1
461
cv = load6432(src, s)
465
if int(nextEmit) < len(src) {
466
blk.literals = append(blk.literals, src[nextEmit:]...)
467
blk.extraLits = len(src) - int(nextEmit)
469
blk.recentOffsets[0] = uint32(offset1)
470
blk.recentOffsets[1] = uint32(offset2)
471
blk.recentOffsets[2] = uint32(offset3)
473
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
480
func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
481
e.ensureHist(len(src))
486
func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
487
e.resetBase(d, singleBlock)
492
if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
493
if len(e.dictTable) != len(e.table) {
494
e.dictTable = make([]prevEntry, len(e.table))
496
end := int32(len(d.content)) - 8 + e.maxMatchOff
497
for i := e.maxMatchOff; i < end; i += 4 {
498
const hashLog = bestShortTableBits
500
cv := load6432(d.content, i-e.maxMatchOff)
501
nextHash := hashLen(cv, hashLog, bestShortLen)
502
nextHash1 := hashLen(cv>>8, hashLog, bestShortLen)
503
nextHash2 := hashLen(cv>>16, hashLog, bestShortLen)
504
nextHash3 := hashLen(cv>>24, hashLog, bestShortLen)
505
e.dictTable[nextHash] = prevEntry{
506
prev: e.dictTable[nextHash].offset,
509
e.dictTable[nextHash1] = prevEntry{
510
prev: e.dictTable[nextHash1].offset,
513
e.dictTable[nextHash2] = prevEntry{
514
prev: e.dictTable[nextHash2].offset,
517
e.dictTable[nextHash3] = prevEntry{
518
prev: e.dictTable[nextHash3].offset,
526
if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
527
if len(e.dictLongTable) != len(e.longTable) {
528
e.dictLongTable = make([]prevEntry, len(e.longTable))
530
if len(d.content) >= 8 {
531
cv := load6432(d.content, 0)
532
h := hashLen(cv, bestLongTableBits, bestLongLen)
533
e.dictLongTable[h] = prevEntry{
534
offset: e.maxMatchOff,
535
prev: e.dictLongTable[h].offset,
538
end := int32(len(d.content)) - 8 + e.maxMatchOff
540
for i := e.maxMatchOff + 1; i < end; i++ {
541
cv = cv>>8 | (uint64(d.content[off]) << 56)
542
h := hashLen(cv, bestLongTableBits, bestLongLen)
543
e.dictLongTable[h] = prevEntry{
545
prev: e.dictLongTable[h].offset,
553
copy(e.longTable[:], e.dictLongTable)
555
e.cur = e.maxMatchOff
557
copy(e.table[:], e.dictTable)