cubefs

Форк
0
144 строки · 4.6 Кб
1
// Copyright 2018 Klaus Post. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
5

6
// Package fse provides Finite State Entropy encoding and decoding.
7
//
8
// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
9
// for byte blocks as implemented in zstd.
10
//
11
// See https://github.com/klauspost/compress/tree/master/fse for more information.
12
package fse
13

14
import (
15
	"errors"
16
	"fmt"
17
	"math/bits"
18
)
19

20
const (
21
	/*!MEMORY_USAGE :
22
	 *  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
23
	 *  Increasing memory usage improves compression ratio
24
	 *  Reduced memory usage can improve speed, due to cache effect
25
	 *  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
26
	maxMemoryUsage     = 14
27
	defaultMemoryUsage = 13
28

29
	maxTableLog     = maxMemoryUsage - 2
30
	maxTablesize    = 1 << maxTableLog
31
	defaultTablelog = defaultMemoryUsage - 2
32
	minTablelog     = 5
33
	maxSymbolValue  = 255
34
)
35

36
var (
37
	// ErrIncompressible is returned when input is judged to be too hard to compress.
38
	ErrIncompressible = errors.New("input is not compressible")
39

40
	// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
41
	ErrUseRLE = errors.New("input is single value repeated")
42
)
43

44
// Scratch provides temporary storage for compression and decompression.
45
type Scratch struct {
46
	// Private
47
	count    [maxSymbolValue + 1]uint32
48
	norm     [maxSymbolValue + 1]int16
49
	br       byteReader
50
	bits     bitReader
51
	bw       bitWriter
52
	ct       cTable      // Compression tables.
53
	decTable []decSymbol // Decompression table.
54
	maxCount int         // count of the most probable symbol
55

56
	// Per block parameters.
57
	// These can be used to override compression parameters of the block.
58
	// Do not touch, unless you know what you are doing.
59

60
	// Out is output buffer.
61
	// If the scratch is re-used before the caller is done processing the output,
62
	// set this field to nil.
63
	// Otherwise the output buffer will be re-used for next Compression/Decompression step
64
	// and allocation will be avoided.
65
	Out []byte
66

67
	// DecompressLimit limits the maximum decoded size acceptable.
68
	// If > 0 decompression will stop when approximately this many bytes
69
	// has been decoded.
70
	// If 0, maximum size will be 2GB.
71
	DecompressLimit int
72

73
	symbolLen      uint16 // Length of active part of the symbol table.
74
	actualTableLog uint8  // Selected tablelog.
75
	zeroBits       bool   // no bits has prob > 50%.
76
	clearCount     bool   // clear count
77

78
	// MaxSymbolValue will override the maximum symbol value of the next block.
79
	MaxSymbolValue uint8
80

81
	// TableLog will attempt to override the tablelog for the next block.
82
	TableLog uint8
83
}
84

85
// Histogram allows to populate the histogram and skip that step in the compression,
86
// It otherwise allows to inspect the histogram when compression is done.
87
// To indicate that you have populated the histogram call HistogramFinished
88
// with the value of the highest populated symbol, as well as the number of entries
89
// in the most populated entry. These are accepted at face value.
90
// The returned slice will always be length 256.
91
func (s *Scratch) Histogram() []uint32 {
92
	return s.count[:]
93
}
94

95
// HistogramFinished can be called to indicate that the histogram has been populated.
96
// maxSymbol is the index of the highest set symbol of the next data segment.
97
// maxCount is the number of entries in the most populated entry.
98
// These are accepted at face value.
99
func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
100
	s.maxCount = maxCount
101
	s.symbolLen = uint16(maxSymbol) + 1
102
	s.clearCount = maxCount != 0
103
}
104

105
// prepare will prepare and allocate scratch tables used for both compression and decompression.
106
func (s *Scratch) prepare(in []byte) (*Scratch, error) {
107
	if s == nil {
108
		s = &Scratch{}
109
	}
110
	if s.MaxSymbolValue == 0 {
111
		s.MaxSymbolValue = 255
112
	}
113
	if s.TableLog == 0 {
114
		s.TableLog = defaultTablelog
115
	}
116
	if s.TableLog > maxTableLog {
117
		return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
118
	}
119
	if cap(s.Out) == 0 {
120
		s.Out = make([]byte, 0, len(in))
121
	}
122
	if s.clearCount && s.maxCount == 0 {
123
		for i := range s.count {
124
			s.count[i] = 0
125
		}
126
		s.clearCount = false
127
	}
128
	s.br.init(in)
129
	if s.DecompressLimit == 0 {
130
		// Max size 2GB.
131
		s.DecompressLimit = (2 << 30) - 1
132
	}
133

134
	return s, nil
135
}
136

137
// tableStep returns the next table index.
138
func tableStep(tableSize uint32) uint32 {
139
	return (tableSize >> 1) + (tableSize >> 3) + 3
140
}
141

142
func highBits(val uint32) (n uint32) {
143
	return uint32(bits.Len32(val) - 1)
144
}
145

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.