cubefs

blockenc.go
871 строка · 22.6 Кб
Перенос по словам
1
// Copyright 2019+ Klaus Post. All rights reserved.
2
// License information can be found in the LICENSE file.
3
// Based on work by Yann Collet, released under BSD License.
4

5
package zstd
6

7
import (
8
	"errors"
9
	"fmt"
10
	"math"
11
	"math/bits"
12

13
	"github.com/klauspost/compress/huff0"
14
)
15

16
type blockEnc struct {
17
	size       int
18
	literals   []byte
19
	sequences  []seq
20
	coders     seqCoders
21
	litEnc     *huff0.Scratch
22
	dictLitEnc *huff0.Scratch
23
	wr         bitWriter
24

25
	extraLits         int
26
	output            []byte
27
	recentOffsets     [3]uint32
28
	prevRecentOffsets [3]uint32
29

30
	last   bool
31
	lowMem bool
32
}
33

34
// init should be used once the block has been created.
35
// If called more than once, the effect is the same as calling reset.
36
func (b *blockEnc) init() {
37
	if b.lowMem {
38
		// 1K literals
39
		if cap(b.literals) < 1<<10 {
40
			b.literals = make([]byte, 0, 1<<10)
41
		}
42
		const defSeqs = 20
43
		if cap(b.sequences) < defSeqs {
44
			b.sequences = make([]seq, 0, defSeqs)
45
		}
46
		// 1K
47
		if cap(b.output) < 1<<10 {
48
			b.output = make([]byte, 0, 1<<10)
49
		}
50
	} else {
51
		if cap(b.literals) < maxCompressedBlockSize {
52
			b.literals = make([]byte, 0, maxCompressedBlockSize)
53
		}
54
		const defSeqs = 2000
55
		if cap(b.sequences) < defSeqs {
56
			b.sequences = make([]seq, 0, defSeqs)
57
		}
58
		if cap(b.output) < maxCompressedBlockSize {
59
			b.output = make([]byte, 0, maxCompressedBlockSize)
60
		}
61
	}
62

63
	if b.coders.mlEnc == nil {
64
		b.coders.mlEnc = &fseEncoder{}
65
		b.coders.mlPrev = &fseEncoder{}
66
		b.coders.ofEnc = &fseEncoder{}
67
		b.coders.ofPrev = &fseEncoder{}
68
		b.coders.llEnc = &fseEncoder{}
69
		b.coders.llPrev = &fseEncoder{}
70
	}
71
	b.litEnc = &huff0.Scratch{WantLogLess: 4}
72
	b.reset(nil)
73
}
74

75
// initNewEncode can be used to reset offsets and encoders to the initial state.
76
func (b *blockEnc) initNewEncode() {
77
	b.recentOffsets = [3]uint32{1, 4, 8}
78
	b.litEnc.Reuse = huff0.ReusePolicyNone
79
	b.coders.setPrev(nil, nil, nil)
80
}
81

82
// reset will reset the block for a new encode, but in the same stream,
83
// meaning that state will be carried over, but the block content is reset.
84
// If a previous block is provided, the recent offsets are carried over.
85
func (b *blockEnc) reset(prev *blockEnc) {
86
	b.extraLits = 0
87
	b.literals = b.literals[:0]
88
	b.size = 0
89
	b.sequences = b.sequences[:0]
90
	b.output = b.output[:0]
91
	b.last = false
92
	if prev != nil {
93
		b.recentOffsets = prev.prevRecentOffsets
94
	}
95
	b.dictLitEnc = nil
96
}
97

98
// reset will reset the block for a new encode, but in the same stream,
99
// meaning that state will be carried over, but the block content is reset.
100
// If a previous block is provided, the recent offsets are carried over.
101
func (b *blockEnc) swapEncoders(prev *blockEnc) {
102
	b.coders.swap(&prev.coders)
103
	b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
104
}
105

106
// blockHeader contains the information for a block header.
107
type blockHeader uint32
108

109
// setLast sets the 'last' indicator on a block.
110
func (h *blockHeader) setLast(b bool) {
111
	if b {
112
		*h = *h | 1
113
	} else {
114
		const mask = (1 << 24) - 2
115
		*h = *h & mask
116
	}
117
}
118

119
// setSize will store the compressed size of a block.
120
func (h *blockHeader) setSize(v uint32) {
121
	const mask = 7
122
	*h = (*h)&mask | blockHeader(v<<3)
123
}
124

125
// setType sets the block type.
126
func (h *blockHeader) setType(t blockType) {
127
	const mask = 1 | (((1 << 24) - 1) ^ 7)
128
	*h = (*h & mask) | blockHeader(t<<1)
129
}
130

131
// appendTo will append the block header to a slice.
132
func (h blockHeader) appendTo(b []byte) []byte {
133
	return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
134
}
135

136
// String returns a string representation of the block.
137
func (h blockHeader) String() string {
138
	return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
139
}
140

141
// literalsHeader contains literals header information.
142
type literalsHeader uint64
143

144
// setType can be used to set the type of literal block.
145
func (h *literalsHeader) setType(t literalsBlockType) {
146
	const mask = math.MaxUint64 - 3
147
	*h = (*h & mask) | literalsHeader(t)
148
}
149

150
// setSize can be used to set a single size, for uncompressed and RLE content.
151
func (h *literalsHeader) setSize(regenLen int) {
152
	inBits := bits.Len32(uint32(regenLen))
153
	// Only retain 2 bits
154
	const mask = 3
155
	lh := uint64(*h & mask)
156
	switch {
157
	case inBits < 5:
158
		lh |= (uint64(regenLen) << 3) | (1 << 60)
159
		if debugEncoder {
160
			got := int(lh>>3) & 0xff
161
			if got != regenLen {
162
				panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
163
			}
164
		}
165
	case inBits < 12:
166
		lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60)
167
	case inBits < 20:
168
		lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60)
169
	default:
170
		panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
171
	}
172
	*h = literalsHeader(lh)
173
}
174

175
// setSizes will set the size of a compressed literals section and the input length.
176
func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
177
	compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
178
	// Only retain 2 bits
179
	const mask = 3
180
	lh := uint64(*h & mask)
181
	switch {
182
	case compBits <= 10 && inBits <= 10:
183
		if !single {
184
			lh |= 1 << 2
185
		}
186
		lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
187
		if debugEncoder {
188
			const mmask = (1 << 24) - 1
189
			n := (lh >> 4) & mmask
190
			if int(n&1023) != inLen {
191
				panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
192
			}
193
			if int(n>>10) != compLen {
194
				panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
195
			}
196
		}
197
	case compBits <= 14 && inBits <= 14:
198
		lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
199
		if single {
200
			panic("single stream used with more than 10 bits length.")
201
		}
202
	case compBits <= 18 && inBits <= 18:
203
		lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
204
		if single {
205
			panic("single stream used with more than 10 bits length.")
206
		}
207
	default:
208
		panic("internal error: block too big")
209
	}
210
	*h = literalsHeader(lh)
211
}
212

213
// appendTo will append the literals header to a byte slice.
214
func (h literalsHeader) appendTo(b []byte) []byte {
215
	size := uint8(h >> 60)
216
	switch size {
217
	case 1:
218
		b = append(b, uint8(h))
219
	case 2:
220
		b = append(b, uint8(h), uint8(h>>8))
221
	case 3:
222
		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
223
	case 4:
224
		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
225
	case 5:
226
		b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
227
	default:
228
		panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
229
	}
230
	return b
231
}
232

233
// size returns the output size with currently set values.
234
func (h literalsHeader) size() int {
235
	return int(h >> 60)
236
}
237

238
func (h literalsHeader) String() string {
239
	return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
240
}
241

242
// pushOffsets will push the recent offsets to the backup store.
243
func (b *blockEnc) pushOffsets() {
244
	b.prevRecentOffsets = b.recentOffsets
245
}
246

247
// pushOffsets will push the recent offsets to the backup store.
248
func (b *blockEnc) popOffsets() {
249
	b.recentOffsets = b.prevRecentOffsets
250
}
251

252
// matchOffset will adjust recent offsets and return the adjusted one,
253
// if it matches a previous offset.
254
func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
255
	// Check if offset is one of the recent offsets.
256
	// Adjusts the output offset accordingly.
257
	// Gives a tiny bit of compression, typically around 1%.
258
	if true {
259
		if lits > 0 {
260
			switch offset {
261
			case b.recentOffsets[0]:
262
				offset = 1
263
			case b.recentOffsets[1]:
264
				b.recentOffsets[1] = b.recentOffsets[0]
265
				b.recentOffsets[0] = offset
266
				offset = 2
267
			case b.recentOffsets[2]:
268
				b.recentOffsets[2] = b.recentOffsets[1]
269
				b.recentOffsets[1] = b.recentOffsets[0]
270
				b.recentOffsets[0] = offset
271
				offset = 3
272
			default:
273
				b.recentOffsets[2] = b.recentOffsets[1]
274
				b.recentOffsets[1] = b.recentOffsets[0]
275
				b.recentOffsets[0] = offset
276
				offset += 3
277
			}
278
		} else {
279
			switch offset {
280
			case b.recentOffsets[1]:
281
				b.recentOffsets[1] = b.recentOffsets[0]
282
				b.recentOffsets[0] = offset
283
				offset = 1
284
			case b.recentOffsets[2]:
285
				b.recentOffsets[2] = b.recentOffsets[1]
286
				b.recentOffsets[1] = b.recentOffsets[0]
287
				b.recentOffsets[0] = offset
288
				offset = 2
289
			case b.recentOffsets[0] - 1:
290
				b.recentOffsets[2] = b.recentOffsets[1]
291
				b.recentOffsets[1] = b.recentOffsets[0]
292
				b.recentOffsets[0] = offset
293
				offset = 3
294
			default:
295
				b.recentOffsets[2] = b.recentOffsets[1]
296
				b.recentOffsets[1] = b.recentOffsets[0]
297
				b.recentOffsets[0] = offset
298
				offset += 3
299
			}
300
		}
301
	} else {
302
		offset += 3
303
	}
304
	return offset
305
}
306

307
// encodeRaw can be used to set the output to a raw representation of supplied bytes.
308
func (b *blockEnc) encodeRaw(a []byte) {
309
	var bh blockHeader
310
	bh.setLast(b.last)
311
	bh.setSize(uint32(len(a)))
312
	bh.setType(blockTypeRaw)
313
	b.output = bh.appendTo(b.output[:0])
314
	b.output = append(b.output, a...)
315
	if debugEncoder {
316
		println("Adding RAW block, length", len(a), "last:", b.last)
317
	}
318
}
319

320
// encodeRaw can be used to set the output to a raw representation of supplied bytes.
321
func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
322
	var bh blockHeader
323
	bh.setLast(b.last)
324
	bh.setSize(uint32(len(src)))
325
	bh.setType(blockTypeRaw)
326
	dst = bh.appendTo(dst)
327
	dst = append(dst, src...)
328
	if debugEncoder {
329
		println("Adding RAW block, length", len(src), "last:", b.last)
330
	}
331
	return dst
332
}
333

334
// encodeLits can be used if the block is only litLen.
335
func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
336
	var bh blockHeader
337
	bh.setLast(b.last)
338
	bh.setSize(uint32(len(lits)))
339

340
	// Don't compress extremely small blocks
341
	if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
342
		if debugEncoder {
343
			println("Adding RAW block, length", len(lits), "last:", b.last)
344
		}
345
		bh.setType(blockTypeRaw)
346
		b.output = bh.appendTo(b.output)
347
		b.output = append(b.output, lits...)
348
		return nil
349
	}
350

351
	var (
352
		out            []byte
353
		reUsed, single bool
354
		err            error
355
	)
356
	if b.dictLitEnc != nil {
357
		b.litEnc.TransferCTable(b.dictLitEnc)
358
		b.litEnc.Reuse = huff0.ReusePolicyAllow
359
		b.dictLitEnc = nil
360
	}
361
	if len(lits) >= 1024 {
362
		// Use 4 Streams.
363
		out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
364
	} else if len(lits) > 32 {
365
		// Use 1 stream
366
		single = true
367
		out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
368
	} else {
369
		err = huff0.ErrIncompressible
370
	}
371

372
	switch err {
373
	case huff0.ErrIncompressible:
374
		if debugEncoder {
375
			println("Adding RAW block, length", len(lits), "last:", b.last)
376
		}
377
		bh.setType(blockTypeRaw)
378
		b.output = bh.appendTo(b.output)
379
		b.output = append(b.output, lits...)
380
		return nil
381
	case huff0.ErrUseRLE:
382
		if debugEncoder {
383
			println("Adding RLE block, length", len(lits))
384
		}
385
		bh.setType(blockTypeRLE)
386
		b.output = bh.appendTo(b.output)
387
		b.output = append(b.output, lits[0])
388
		return nil
389
	case nil:
390
	default:
391
		return err
392
	}
393
	// Compressed...
394
	// Now, allow reuse
395
	b.litEnc.Reuse = huff0.ReusePolicyAllow
396
	bh.setType(blockTypeCompressed)
397
	var lh literalsHeader
398
	if reUsed {
399
		if debugEncoder {
400
			println("Reused tree, compressed to", len(out))
401
		}
402
		lh.setType(literalsBlockTreeless)
403
	} else {
404
		if debugEncoder {
405
			println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
406
		}
407
		lh.setType(literalsBlockCompressed)
408
	}
409
	// Set sizes
410
	lh.setSizes(len(out), len(lits), single)
411
	bh.setSize(uint32(len(out) + lh.size() + 1))
412

413
	// Write block headers.
414
	b.output = bh.appendTo(b.output)
415
	b.output = lh.appendTo(b.output)
416
	// Add compressed data.
417
	b.output = append(b.output, out...)
418
	// No sequences.
419
	b.output = append(b.output, 0)
420
	return nil
421
}
422

423
// fuzzFseEncoder can be used to fuzz the FSE encoder.
424
func fuzzFseEncoder(data []byte) int {
425
	if len(data) > maxSequences || len(data) < 2 {
426
		return 0
427
	}
428
	enc := fseEncoder{}
429
	hist := enc.Histogram()
430
	maxSym := uint8(0)
431
	for i, v := range data {
432
		v = v & 63
433
		data[i] = v
434
		hist[v]++
435
		if v > maxSym {
436
			maxSym = v
437
		}
438
	}
439
	if maxSym == 0 {
440
		// All 0
441
		return 0
442
	}
443
	maxCount := func(a []uint32) int {
444
		var max uint32
445
		for _, v := range a {
446
			if v > max {
447
				max = v
448
			}
449
		}
450
		return int(max)
451
	}
452
	cnt := maxCount(hist[:maxSym])
453
	if cnt == len(data) {
454
		// RLE
455
		return 0
456
	}
457
	enc.HistogramFinished(maxSym, cnt)
458
	err := enc.normalizeCount(len(data))
459
	if err != nil {
460
		return 0
461
	}
462
	_, err = enc.writeCount(nil)
463
	if err != nil {
464
		panic(err)
465
	}
466
	return 1
467
}
468

469
// encode will encode the block and append the output in b.output.
470
// Previous offset codes must be pushed if more blocks are expected.
471
func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
472
	if len(b.sequences) == 0 {
473
		return b.encodeLits(b.literals, rawAllLits)
474
	}
475
	// We want some difference to at least account for the headers.
476
	saved := b.size - len(b.literals) - (b.size >> 5)
477
	if saved < 16 {
478
		if org == nil {
479
			return errIncompressible
480
		}
481
		b.popOffsets()
482
		return b.encodeLits(org, rawAllLits)
483
	}
484

485
	var bh blockHeader
486
	var lh literalsHeader
487
	bh.setLast(b.last)
488
	bh.setType(blockTypeCompressed)
489
	// Store offset of the block header. Needed when we know the size.
490
	bhOffset := len(b.output)
491
	b.output = bh.appendTo(b.output)
492

493
	var (
494
		out            []byte
495
		reUsed, single bool
496
		err            error
497
	)
498
	if b.dictLitEnc != nil {
499
		b.litEnc.TransferCTable(b.dictLitEnc)
500
		b.litEnc.Reuse = huff0.ReusePolicyAllow
501
		b.dictLitEnc = nil
502
	}
503
	if len(b.literals) >= 1024 && !raw {
504
		// Use 4 Streams.
505
		out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
506
	} else if len(b.literals) > 32 && !raw {
507
		// Use 1 stream
508
		single = true
509
		out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
510
	} else {
511
		err = huff0.ErrIncompressible
512
	}
513

514
	switch err {
515
	case huff0.ErrIncompressible:
516
		lh.setType(literalsBlockRaw)
517
		lh.setSize(len(b.literals))
518
		b.output = lh.appendTo(b.output)
519
		b.output = append(b.output, b.literals...)
520
		if debugEncoder {
521
			println("Adding literals RAW, length", len(b.literals))
522
		}
523
	case huff0.ErrUseRLE:
524
		lh.setType(literalsBlockRLE)
525
		lh.setSize(len(b.literals))
526
		b.output = lh.appendTo(b.output)
527
		b.output = append(b.output, b.literals[0])
528
		if debugEncoder {
529
			println("Adding literals RLE")
530
		}
531
	case nil:
532
		// Compressed litLen...
533
		if reUsed {
534
			if debugEncoder {
535
				println("reused tree")
536
			}
537
			lh.setType(literalsBlockTreeless)
538
		} else {
539
			if debugEncoder {
540
				println("new tree, size:", len(b.litEnc.OutTable))
541
			}
542
			lh.setType(literalsBlockCompressed)
543
			if debugEncoder {
544
				_, _, err := huff0.ReadTable(out, nil)
545
				if err != nil {
546
					panic(err)
547
				}
548
			}
549
		}
550
		lh.setSizes(len(out), len(b.literals), single)
551
		if debugEncoder {
552
			printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
553
			println("Adding literal header:", lh)
554
		}
555
		b.output = lh.appendTo(b.output)
556
		b.output = append(b.output, out...)
557
		b.litEnc.Reuse = huff0.ReusePolicyAllow
558
		if debugEncoder {
559
			println("Adding literals compressed")
560
		}
561
	default:
562
		if debugEncoder {
563
			println("Adding literals ERROR:", err)
564
		}
565
		return err
566
	}
567
	// Sequence compression
568

569
	// Write the number of sequences
570
	switch {
571
	case len(b.sequences) < 128:
572
		b.output = append(b.output, uint8(len(b.sequences)))
573
	case len(b.sequences) < 0x7f00: // TODO: this could be wrong
574
		n := len(b.sequences)
575
		b.output = append(b.output, 128+uint8(n>>8), uint8(n))
576
	default:
577
		n := len(b.sequences) - 0x7f00
578
		b.output = append(b.output, 255, uint8(n), uint8(n>>8))
579
	}
580
	if debugEncoder {
581
		println("Encoding", len(b.sequences), "sequences")
582
	}
583
	b.genCodes()
584
	llEnc := b.coders.llEnc
585
	ofEnc := b.coders.ofEnc
586
	mlEnc := b.coders.mlEnc
587
	err = llEnc.normalizeCount(len(b.sequences))
588
	if err != nil {
589
		return err
590
	}
591
	err = ofEnc.normalizeCount(len(b.sequences))
592
	if err != nil {
593
		return err
594
	}
595
	err = mlEnc.normalizeCount(len(b.sequences))
596
	if err != nil {
597
		return err
598
	}
599

600
	// Choose the best compression mode for each type.
601
	// Will evaluate the new vs predefined and previous.
602
	chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) {
603
		// See if predefined/previous is better
604
		hist := cur.count[:cur.symbolLen]
605
		nSize := cur.approxSize(hist) + cur.maxHeaderSize()
606
		predefSize := preDef.approxSize(hist)
607
		prevSize := prev.approxSize(hist)
608

609
		// Add a small penalty for new encoders.
610
		// Don't bother with extremely small (<2 byte gains).
611
		nSize = nSize + (nSize+2*8*16)>>4
612
		switch {
613
		case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
614
			if debugEncoder {
615
				println("Using predefined", predefSize>>3, "<=", nSize>>3)
616
			}
617
			return preDef, compModePredefined
618
		case prevSize <= nSize:
619
			if debugEncoder {
620
				println("Using previous", prevSize>>3, "<=", nSize>>3)
621
			}
622
			return prev, compModeRepeat
623
		default:
624
			if debugEncoder {
625
				println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
626
				println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
627
			}
628
			return cur, compModeFSE
629
		}
630
	}
631

632
	// Write compression mode
633
	var mode uint8
634
	if llEnc.useRLE {
635
		mode |= uint8(compModeRLE) << 6
636
		llEnc.setRLE(b.sequences[0].llCode)
637
		if debugEncoder {
638
			println("llEnc.useRLE")
639
		}
640
	} else {
641
		var m seqCompMode
642
		llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
643
		mode |= uint8(m) << 6
644
	}
645
	if ofEnc.useRLE {
646
		mode |= uint8(compModeRLE) << 4
647
		ofEnc.setRLE(b.sequences[0].ofCode)
648
		if debugEncoder {
649
			println("ofEnc.useRLE")
650
		}
651
	} else {
652
		var m seqCompMode
653
		ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
654
		mode |= uint8(m) << 4
655
	}
656

657
	if mlEnc.useRLE {
658
		mode |= uint8(compModeRLE) << 2
659
		mlEnc.setRLE(b.sequences[0].mlCode)
660
		if debugEncoder {
661
			println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
662
		}
663
	} else {
664
		var m seqCompMode
665
		mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
666
		mode |= uint8(m) << 2
667
	}
668
	b.output = append(b.output, mode)
669
	if debugEncoder {
670
		printf("Compression modes: 0b%b", mode)
671
	}
672
	b.output, err = llEnc.writeCount(b.output)
673
	if err != nil {
674
		return err
675
	}
676
	start := len(b.output)
677
	b.output, err = ofEnc.writeCount(b.output)
678
	if err != nil {
679
		return err
680
	}
681
	if false {
682
		println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
683
		fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
684
		for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
685
			fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
686
		}
687
	}
688
	b.output, err = mlEnc.writeCount(b.output)
689
	if err != nil {
690
		return err
691
	}
692

693
	// Maybe in block?
694
	wr := &b.wr
695
	wr.reset(b.output)
696

697
	var ll, of, ml cState
698

699
	// Current sequence
700
	seq := len(b.sequences) - 1
701
	s := b.sequences[seq]
702
	llEnc.setBits(llBitsTable[:])
703
	mlEnc.setBits(mlBitsTable[:])
704
	ofEnc.setBits(nil)
705

706
	llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
707

708
	// We have 3 bounds checks here (and in the loop).
709
	// Since we are iterating backwards it is kinda hard to avoid.
710
	llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
711
	ll.init(wr, &llEnc.ct, llB)
712
	of.init(wr, &ofEnc.ct, ofB)
713
	wr.flush32()
714
	ml.init(wr, &mlEnc.ct, mlB)
715

716
	// Each of these lookups also generates a bounds check.
717
	wr.addBits32NC(s.litLen, llB.outBits)
718
	wr.addBits32NC(s.matchLen, mlB.outBits)
719
	wr.flush32()
720
	wr.addBits32NC(s.offset, ofB.outBits)
721
	if debugSequences {
722
		println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
723
	}
724
	seq--
725
	// Store sequences in reverse...
726
	for seq >= 0 {
727
		s = b.sequences[seq]
728

729
		ofB := ofTT[s.ofCode]
730
		wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
731
		//of.encode(ofB)
732
		nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
733
		dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
734
		wr.addBits16NC(of.state, uint8(nbBitsOut))
735
		of.state = of.stateTable[dstState]
736

737
		// Accumulate extra bits.
738
		outBits := ofB.outBits & 31
739
		extraBits := uint64(s.offset & bitMask32[outBits])
740
		extraBitsN := outBits
741

742
		mlB := mlTT[s.mlCode]
743
		//ml.encode(mlB)
744
		nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
745
		dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
746
		wr.addBits16NC(ml.state, uint8(nbBitsOut))
747
		ml.state = ml.stateTable[dstState]
748

749
		outBits = mlB.outBits & 31
750
		extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
751
		extraBitsN += outBits
752

753
		llB := llTT[s.llCode]
754
		//ll.encode(llB)
755
		nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
756
		dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
757
		wr.addBits16NC(ll.state, uint8(nbBitsOut))
758
		ll.state = ll.stateTable[dstState]
759

760
		outBits = llB.outBits & 31
761
		extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
762
		extraBitsN += outBits
763

764
		wr.flush32()
765
		wr.addBits64NC(extraBits, extraBitsN)
766

767
		if debugSequences {
768
			println("Encoded seq", seq, s)
769
		}
770

771
		seq--
772
	}
773
	ml.flush(mlEnc.actualTableLog)
774
	of.flush(ofEnc.actualTableLog)
775
	ll.flush(llEnc.actualTableLog)
776
	err = wr.close()
777
	if err != nil {
778
		return err
779
	}
780
	b.output = wr.out
781

782
	if len(b.output)-3-bhOffset >= b.size {
783
		// Maybe even add a bigger margin.
784
		b.litEnc.Reuse = huff0.ReusePolicyNone
785
		return errIncompressible
786
	}
787

788
	// Size is output minus block header.
789
	bh.setSize(uint32(len(b.output)-bhOffset) - 3)
790
	if debugEncoder {
791
		println("Rewriting block header", bh)
792
	}
793
	_ = bh.appendTo(b.output[bhOffset:bhOffset])
794
	b.coders.setPrev(llEnc, mlEnc, ofEnc)
795
	return nil
796
}
797

798
var errIncompressible = errors.New("incompressible")
799

800
func (b *blockEnc) genCodes() {
801
	if len(b.sequences) == 0 {
802
		// nothing to do
803
		return
804
	}
805
	if len(b.sequences) > math.MaxUint16 {
806
		panic("can only encode up to 64K sequences")
807
	}
808
	// No bounds checks after here:
809
	llH := b.coders.llEnc.Histogram()
810
	ofH := b.coders.ofEnc.Histogram()
811
	mlH := b.coders.mlEnc.Histogram()
812
	for i := range llH {
813
		llH[i] = 0
814
	}
815
	for i := range ofH {
816
		ofH[i] = 0
817
	}
818
	for i := range mlH {
819
		mlH[i] = 0
820
	}
821

822
	var llMax, ofMax, mlMax uint8
823
	for i := range b.sequences {
824
		seq := &b.sequences[i]
825
		v := llCode(seq.litLen)
826
		seq.llCode = v
827
		llH[v]++
828
		if v > llMax {
829
			llMax = v
830
		}
831

832
		v = ofCode(seq.offset)
833
		seq.ofCode = v
834
		ofH[v]++
835
		if v > ofMax {
836
			ofMax = v
837
		}
838

839
		v = mlCode(seq.matchLen)
840
		seq.mlCode = v
841
		mlH[v]++
842
		if v > mlMax {
843
			mlMax = v
844
			if debugAsserts && mlMax > maxMatchLengthSymbol {
845
				panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
846
			}
847
		}
848
	}
849
	maxCount := func(a []uint32) int {
850
		var max uint32
851
		for _, v := range a {
852
			if v > max {
853
				max = v
854
			}
855
		}
856
		return int(max)
857
	}
858
	if debugAsserts && mlMax > maxMatchLengthSymbol {
859
		panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
860
	}
861
	if debugAsserts && ofMax > maxOffsetBits {
862
		panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
863
	}
864
	if debugAsserts && llMax > maxLiteralLengthSymbol {
865
		panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
866
	}
867

868
	b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
869
	b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
870
	b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))
871
}
872
cubefs

Использование cookies