cubefs

Форк
0
381 строка · 9.2 Кб
1
// Copyright 2019+ Klaus Post. All rights reserved.
2
// License information can be found in the LICENSE file.
3
// Based on work by Yann Collet, released under BSD License.
4

5
package zstd
6

7
import (
8
	"bytes"
9
	"encoding/hex"
10
	"errors"
11
	"io"
12

13
	"github.com/klauspost/compress/zstd/internal/xxhash"
14
)
15

16
type frameDec struct {
17
	o   decoderOptions
18
	crc *xxhash.Digest
19

20
	WindowSize uint64
21

22
	// Frame history passed between blocks
23
	history history
24

25
	rawInput byteBuffer
26

27
	// Byte buffer that can be reused for small input blocks.
28
	bBuf byteBuf
29

30
	FrameContentSize uint64
31

32
	DictionaryID  *uint32
33
	HasCheckSum   bool
34
	SingleSegment bool
35
}
36

37
const (
38
	// MinWindowSize is the minimum Window Size, which is 1 KB.
39
	MinWindowSize = 1 << 10
40

41
	// MaxWindowSize is the maximum encoder window size
42
	// and the default decoder maximum window size.
43
	MaxWindowSize = 1 << 29
44
)
45

46
var (
47
	frameMagic          = []byte{0x28, 0xb5, 0x2f, 0xfd}
48
	skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
49
)
50

51
func newFrameDec(o decoderOptions) *frameDec {
52
	if o.maxWindowSize > o.maxDecodedSize {
53
		o.maxWindowSize = o.maxDecodedSize
54
	}
55
	d := frameDec{
56
		o: o,
57
	}
58
	return &d
59
}
60

61
// reset will read the frame header and prepare for block decoding.
62
// If nothing can be read from the input, io.EOF will be returned.
63
// Any other error indicated that the stream contained data, but
64
// there was a problem.
65
func (d *frameDec) reset(br byteBuffer) error {
66
	d.HasCheckSum = false
67
	d.WindowSize = 0
68
	var signature [4]byte
69
	for {
70
		var err error
71
		// Check if we can read more...
72
		b, err := br.readSmall(1)
73
		switch err {
74
		case io.EOF, io.ErrUnexpectedEOF:
75
			return io.EOF
76
		default:
77
			return err
78
		case nil:
79
			signature[0] = b[0]
80
		}
81
		// Read the rest, don't allow io.ErrUnexpectedEOF
82
		b, err = br.readSmall(3)
83
		switch err {
84
		case io.EOF:
85
			return io.EOF
86
		default:
87
			return err
88
		case nil:
89
			copy(signature[1:], b)
90
		}
91

92
		if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
93
			if debugDecoder {
94
				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
95
			}
96
			// Break if not skippable frame.
97
			break
98
		}
99
		// Read size to skip
100
		b, err = br.readSmall(4)
101
		if err != nil {
102
			if debugDecoder {
103
				println("Reading Frame Size", err)
104
			}
105
			return err
106
		}
107
		n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
108
		println("Skipping frame with", n, "bytes.")
109
		err = br.skipN(int(n))
110
		if err != nil {
111
			if debugDecoder {
112
				println("Reading discarded frame", err)
113
			}
114
			return err
115
		}
116
	}
117
	if !bytes.Equal(signature[:], frameMagic) {
118
		if debugDecoder {
119
			println("Got magic numbers: ", signature, "want:", frameMagic)
120
		}
121
		return ErrMagicMismatch
122
	}
123

124
	// Read Frame_Header_Descriptor
125
	fhd, err := br.readByte()
126
	if err != nil {
127
		if debugDecoder {
128
			println("Reading Frame_Header_Descriptor", err)
129
		}
130
		return err
131
	}
132
	d.SingleSegment = fhd&(1<<5) != 0
133

134
	if fhd&(1<<3) != 0 {
135
		return errors.New("reserved bit set on frame header")
136
	}
137

138
	// Read Window_Descriptor
139
	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
140
	d.WindowSize = 0
141
	if !d.SingleSegment {
142
		wd, err := br.readByte()
143
		if err != nil {
144
			if debugDecoder {
145
				println("Reading Window_Descriptor", err)
146
			}
147
			return err
148
		}
149
		printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
150
		windowLog := 10 + (wd >> 3)
151
		windowBase := uint64(1) << windowLog
152
		windowAdd := (windowBase / 8) * uint64(wd&0x7)
153
		d.WindowSize = windowBase + windowAdd
154
	}
155

156
	// Read Dictionary_ID
157
	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
158
	d.DictionaryID = nil
159
	if size := fhd & 3; size != 0 {
160
		if size == 3 {
161
			size = 4
162
		}
163

164
		b, err := br.readSmall(int(size))
165
		if err != nil {
166
			println("Reading Dictionary_ID", err)
167
			return err
168
		}
169
		var id uint32
170
		switch size {
171
		case 1:
172
			id = uint32(b[0])
173
		case 2:
174
			id = uint32(b[0]) | (uint32(b[1]) << 8)
175
		case 4:
176
			id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
177
		}
178
		if debugDecoder {
179
			println("Dict size", size, "ID:", id)
180
		}
181
		if id > 0 {
182
			// ID 0 means "sorry, no dictionary anyway".
183
			// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
184
			d.DictionaryID = &id
185
		}
186
	}
187

188
	// Read Frame_Content_Size
189
	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
190
	var fcsSize int
191
	v := fhd >> 6
192
	switch v {
193
	case 0:
194
		if d.SingleSegment {
195
			fcsSize = 1
196
		}
197
	default:
198
		fcsSize = 1 << v
199
	}
200
	d.FrameContentSize = 0
201
	if fcsSize > 0 {
202
		b, err := br.readSmall(fcsSize)
203
		if err != nil {
204
			println("Reading Frame content", err)
205
			return err
206
		}
207
		switch fcsSize {
208
		case 1:
209
			d.FrameContentSize = uint64(b[0])
210
		case 2:
211
			// When FCS_Field_Size is 2, the offset of 256 is added.
212
			d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
213
		case 4:
214
			d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
215
		case 8:
216
			d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
217
			d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
218
			d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
219
		}
220
		if debugDecoder {
221
			println("Read FCS:", d.FrameContentSize)
222
		}
223
	}
224

225
	// Move this to shared.
226
	d.HasCheckSum = fhd&(1<<2) != 0
227
	if d.HasCheckSum {
228
		if d.crc == nil {
229
			d.crc = xxhash.New()
230
		}
231
		d.crc.Reset()
232
	}
233

234
	if d.WindowSize == 0 && d.SingleSegment {
235
		// We may not need window in this case.
236
		d.WindowSize = d.FrameContentSize
237
		if d.WindowSize < MinWindowSize {
238
			d.WindowSize = MinWindowSize
239
		}
240
	}
241

242
	if d.WindowSize > uint64(d.o.maxWindowSize) {
243
		if debugDecoder {
244
			printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
245
		}
246
		return ErrWindowSizeExceeded
247
	}
248
	// The minimum Window_Size is 1 KB.
249
	if d.WindowSize < MinWindowSize {
250
		if debugDecoder {
251
			println("got window size: ", d.WindowSize)
252
		}
253
		return ErrWindowSizeTooSmall
254
	}
255
	d.history.windowSize = int(d.WindowSize)
256
	if d.o.lowMem && d.history.windowSize < maxBlockSize {
257
		d.history.allocFrameBuffer = d.history.windowSize * 2
258
		// TODO: Maybe use FrameContent size
259
	} else {
260
		d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
261
	}
262

263
	if debugDecoder {
264
		println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum)
265
	}
266

267
	// history contains input - maybe we do something
268
	d.rawInput = br
269
	return nil
270
}
271

272
// next will start decoding the next block from stream.
273
func (d *frameDec) next(block *blockDec) error {
274
	if debugDecoder {
275
		println("decoding new block")
276
	}
277
	err := block.reset(d.rawInput, d.WindowSize)
278
	if err != nil {
279
		println("block error:", err)
280
		// Signal the frame decoder we have a problem.
281
		block.sendErr(err)
282
		return err
283
	}
284
	return nil
285
}
286

287
// checkCRC will check the checksum if the frame has one.
288
// Will return ErrCRCMismatch if crc check failed, otherwise nil.
289
func (d *frameDec) checkCRC() error {
290
	if !d.HasCheckSum {
291
		return nil
292
	}
293
	var tmp [4]byte
294
	got := d.crc.Sum64()
295
	// Flip to match file order.
296
	tmp[0] = byte(got >> 0)
297
	tmp[1] = byte(got >> 8)
298
	tmp[2] = byte(got >> 16)
299
	tmp[3] = byte(got >> 24)
300

301
	// We can overwrite upper tmp now
302
	want, err := d.rawInput.readSmall(4)
303
	if err != nil {
304
		println("CRC missing?", err)
305
		return err
306
	}
307

308
	if !bytes.Equal(tmp[:], want) && !ignoreCRC {
309
		if debugDecoder {
310
			println("CRC Check Failed:", tmp[:], "!=", want)
311
		}
312
		return ErrCRCMismatch
313
	}
314
	if debugDecoder {
315
		println("CRC ok", tmp[:])
316
	}
317
	return nil
318
}
319

320
// runDecoder will create a sync decoder that will decode a block of data.
321
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
322
	saved := d.history.b
323

324
	// We use the history for output to avoid copying it.
325
	d.history.b = dst
326
	d.history.ignoreBuffer = len(dst)
327
	// Store input length, so we only check new data.
328
	crcStart := len(dst)
329
	var err error
330
	for {
331
		err = dec.reset(d.rawInput, d.WindowSize)
332
		if err != nil {
333
			break
334
		}
335
		if debugDecoder {
336
			println("next block:", dec)
337
		}
338
		err = dec.decodeBuf(&d.history)
339
		if err != nil {
340
			break
341
		}
342
		if uint64(len(d.history.b)) > d.o.maxDecodedSize {
343
			err = ErrDecoderSizeExceeded
344
			break
345
		}
346
		if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
347
			println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
348
			err = ErrFrameSizeExceeded
349
			break
350
		}
351
		if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
352
			println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
353
			err = ErrFrameSizeExceeded
354
			break
355
		}
356
		if dec.Last {
357
			break
358
		}
359
		if debugDecoder && d.FrameContentSize > 0 {
360
			println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
361
		}
362
	}
363
	dst = d.history.b
364
	if err == nil {
365
		if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
366
			err = ErrFrameSizeMismatch
367
		} else if d.HasCheckSum {
368
			var n int
369
			n, err = d.crc.Write(dst[crcStart:])
370
			if err == nil {
371
				if n != len(dst)-crcStart {
372
					err = io.ErrShortWrite
373
				} else {
374
					err = d.checkCRC()
375
				}
376
			}
377
		}
378
	}
379
	d.history.b = saved
380
	return dst, err
381
}
382

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.