cubefs

Форк
0
317 строк · 10.1 Кб
1
package zstd
2

3
import (
4
	"errors"
5
	"fmt"
6
	"runtime"
7
	"strings"
8
)
9

10
// EOption is an option for creating a encoder.
11
type EOption func(*encoderOptions) error
12

13
// options retains accumulated state of multiple options.
14
type encoderOptions struct {
15
	concurrent      int
16
	level           EncoderLevel
17
	single          *bool
18
	pad             int
19
	blockSize       int
20
	windowSize      int
21
	crc             bool
22
	fullZero        bool
23
	noEntropy       bool
24
	allLitEntropy   bool
25
	customWindow    bool
26
	customALEntropy bool
27
	customBlockSize bool
28
	lowMem          bool
29
	dict            *dict
30
}
31

32
func (o *encoderOptions) setDefault() {
33
	*o = encoderOptions{
34
		concurrent:    runtime.GOMAXPROCS(0),
35
		crc:           true,
36
		single:        nil,
37
		blockSize:     maxCompressedBlockSize,
38
		windowSize:    8 << 20,
39
		level:         SpeedDefault,
40
		allLitEntropy: true,
41
		lowMem:        false,
42
	}
43
}
44

45
// encoder returns an encoder with the selected options.
46
func (o encoderOptions) encoder() encoder {
47
	switch o.level {
48
	case SpeedFastest:
49
		if o.dict != nil {
50
			return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
51
		}
52
		return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
53

54
	case SpeedDefault:
55
		if o.dict != nil {
56
			return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
57
		}
58
		return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
59
	case SpeedBetterCompression:
60
		if o.dict != nil {
61
			return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
62
		}
63
		return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
64
	case SpeedBestCompression:
65
		return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
66
	}
67
	panic("unknown compression level")
68
}
69

70
// WithEncoderCRC will add CRC value to output.
71
// Output will be 4 bytes larger.
72
func WithEncoderCRC(b bool) EOption {
73
	return func(o *encoderOptions) error { o.crc = b; return nil }
74
}
75

76
// WithEncoderConcurrency will set the concurrency,
77
// meaning the maximum number of encoders to run concurrently.
78
// The value supplied must be at least 1.
79
// For streams, setting a value of 1 will disable async compression.
80
// By default this will be set to GOMAXPROCS.
81
func WithEncoderConcurrency(n int) EOption {
82
	return func(o *encoderOptions) error {
83
		if n <= 0 {
84
			return fmt.Errorf("concurrency must be at least 1")
85
		}
86
		o.concurrent = n
87
		return nil
88
	}
89
}
90

91
// WithWindowSize will set the maximum allowed back-reference distance.
92
// The value must be a power of two between MinWindowSize and MaxWindowSize.
93
// A larger value will enable better compression but allocate more memory and,
94
// for above-default values, take considerably longer.
95
// The default value is determined by the compression level.
96
func WithWindowSize(n int) EOption {
97
	return func(o *encoderOptions) error {
98
		switch {
99
		case n < MinWindowSize:
100
			return fmt.Errorf("window size must be at least %d", MinWindowSize)
101
		case n > MaxWindowSize:
102
			return fmt.Errorf("window size must be at most %d", MaxWindowSize)
103
		case (n & (n - 1)) != 0:
104
			return errors.New("window size must be a power of 2")
105
		}
106

107
		o.windowSize = n
108
		o.customWindow = true
109
		if o.blockSize > o.windowSize {
110
			o.blockSize = o.windowSize
111
			o.customBlockSize = true
112
		}
113
		return nil
114
	}
115
}
116

117
// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
118
// This can be used to obfuscate the exact output size or make blocks of a certain size.
119
// The contents will be a skippable frame, so it will be invisible by the decoder.
120
// n must be > 0 and <= 1GB, 1<<30 bytes.
121
// The padded area will be filled with data from crypto/rand.Reader.
122
// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
123
func WithEncoderPadding(n int) EOption {
124
	return func(o *encoderOptions) error {
125
		if n <= 0 {
126
			return fmt.Errorf("padding must be at least 1")
127
		}
128
		// No need to waste our time.
129
		if n == 1 {
130
			o.pad = 0
131
		}
132
		if n > 1<<30 {
133
			return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
134
		}
135
		o.pad = n
136
		return nil
137
	}
138
}
139

140
// EncoderLevel predefines encoder compression levels.
141
// Only use the constants made available, since the actual mapping
142
// of these values are very likely to change and your compression could change
143
// unpredictably when upgrading the library.
144
type EncoderLevel int
145

146
const (
147
	speedNotSet EncoderLevel = iota
148

149
	// SpeedFastest will choose the fastest reasonable compression.
150
	// This is roughly equivalent to the fastest Zstandard mode.
151
	SpeedFastest
152

153
	// SpeedDefault is the default "pretty fast" compression option.
154
	// This is roughly equivalent to the default Zstandard mode (level 3).
155
	SpeedDefault
156

157
	// SpeedBetterCompression will yield better compression than the default.
158
	// Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
159
	// By using this, notice that CPU usage may go up in the future.
160
	SpeedBetterCompression
161

162
	// SpeedBestCompression will choose the best available compression option.
163
	// This will offer the best compression no matter the CPU cost.
164
	SpeedBestCompression
165

166
	// speedLast should be kept as the last actual compression option.
167
	// The is not for external usage, but is used to keep track of the valid options.
168
	speedLast
169
)
170

171
// EncoderLevelFromString will convert a string representation of an encoding level back
172
// to a compression level. The compare is not case sensitive.
173
// If the string wasn't recognized, (false, SpeedDefault) will be returned.
174
func EncoderLevelFromString(s string) (bool, EncoderLevel) {
175
	for l := speedNotSet + 1; l < speedLast; l++ {
176
		if strings.EqualFold(s, l.String()) {
177
			return true, l
178
		}
179
	}
180
	return false, SpeedDefault
181
}
182

183
// EncoderLevelFromZstd will return an encoder level that closest matches the compression
184
// ratio of a specific zstd compression level.
185
// Many input values will provide the same compression level.
186
func EncoderLevelFromZstd(level int) EncoderLevel {
187
	switch {
188
	case level < 3:
189
		return SpeedFastest
190
	case level >= 3 && level < 6:
191
		return SpeedDefault
192
	case level >= 6 && level < 10:
193
		return SpeedBetterCompression
194
	default:
195
		return SpeedBestCompression
196
	}
197
}
198

199
// String provides a string representation of the compression level.
200
func (e EncoderLevel) String() string {
201
	switch e {
202
	case SpeedFastest:
203
		return "fastest"
204
	case SpeedDefault:
205
		return "default"
206
	case SpeedBetterCompression:
207
		return "better"
208
	case SpeedBestCompression:
209
		return "best"
210
	default:
211
		return "invalid"
212
	}
213
}
214

215
// WithEncoderLevel specifies a predefined compression level.
216
func WithEncoderLevel(l EncoderLevel) EOption {
217
	return func(o *encoderOptions) error {
218
		switch {
219
		case l <= speedNotSet || l >= speedLast:
220
			return fmt.Errorf("unknown encoder level")
221
		}
222
		o.level = l
223
		if !o.customWindow {
224
			switch o.level {
225
			case SpeedFastest:
226
				o.windowSize = 4 << 20
227
				if !o.customBlockSize {
228
					o.blockSize = 1 << 16
229
				}
230
			case SpeedDefault:
231
				o.windowSize = 8 << 20
232
			case SpeedBetterCompression:
233
				o.windowSize = 16 << 20
234
			case SpeedBestCompression:
235
				o.windowSize = 32 << 20
236
			}
237
		}
238
		if !o.customALEntropy {
239
			o.allLitEntropy = l > SpeedFastest
240
		}
241

242
		return nil
243
	}
244
}
245

246
// WithZeroFrames will encode 0 length input as full frames.
247
// This can be needed for compatibility with zstandard usage,
248
// but is not needed for this package.
249
func WithZeroFrames(b bool) EOption {
250
	return func(o *encoderOptions) error {
251
		o.fullZero = b
252
		return nil
253
	}
254
}
255

256
// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
257
// Disabling this will skip incompressible data faster, but in cases with no matches but
258
// skewed character distribution compression is lost.
259
// Default value depends on the compression level selected.
260
func WithAllLitEntropyCompression(b bool) EOption {
261
	return func(o *encoderOptions) error {
262
		o.customALEntropy = true
263
		o.allLitEntropy = b
264
		return nil
265
	}
266
}
267

268
// WithNoEntropyCompression will always skip entropy compression of literals.
269
// This can be useful if content has matches, but unlikely to benefit from entropy
270
// compression. Usually the slight speed improvement is not worth enabling this.
271
func WithNoEntropyCompression(b bool) EOption {
272
	return func(o *encoderOptions) error {
273
		o.noEntropy = b
274
		return nil
275
	}
276
}
277

278
// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
279
// If this flag is set, data must be regenerated within a single continuous memory segment.
280
// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
281
// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
282
// In order to preserve the decoder from unreasonable memory requirements,
283
// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
284
// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
285
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
286
// If this is not specified, block encodes will automatically choose this based on the input size.
287
// This setting has no effect on streamed encodes.
288
func WithSingleSegment(b bool) EOption {
289
	return func(o *encoderOptions) error {
290
		o.single = &b
291
		return nil
292
	}
293
}
294

295
// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
296
// slower encoding speed.
297
// This will not change the window size which is the primary function for reducing
298
// memory usage. See WithWindowSize.
299
func WithLowerEncoderMem(b bool) EOption {
300
	return func(o *encoderOptions) error {
301
		o.lowMem = b
302
		return nil
303
	}
304
}
305

306
// WithEncoderDict allows to register a dictionary that will be used for the encode.
307
// The encoder *may* choose to use no dictionary instead for certain payloads.
308
func WithEncoderDict(dict []byte) EOption {
309
	return func(o *encoderOptions) error {
310
		d, err := loadDict(dict)
311
		if err != nil {
312
			return err
313
		}
314
		o.dict = d
315
		return nil
316
	}
317
}
318

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.