cubefs
1// Copyright 2018 Klaus Post. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
5
6// Package fse provides Finite State Entropy encoding and decoding.
7//
8// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
9// for byte blocks as implemented in zstd.
10//
11// See https://github.com/klauspost/compress/tree/master/fse for more information.
12package fse13
14import (15"errors"16"fmt"17"math/bits"18)
19
20const (21/*!MEMORY_USAGE :22* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
23* Increasing memory usage improves compression ratio
24* Reduced memory usage can improve speed, due to cache effect
25* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
26maxMemoryUsage = 1427defaultMemoryUsage = 1328
29maxTableLog = maxMemoryUsage - 230maxTablesize = 1 << maxTableLog31defaultTablelog = defaultMemoryUsage - 232minTablelog = 533maxSymbolValue = 25534)
35
36var (37// ErrIncompressible is returned when input is judged to be too hard to compress.38ErrIncompressible = errors.New("input is not compressible")39
40// ErrUseRLE is returned from the compressor when the input is a single byte value repeated.41ErrUseRLE = errors.New("input is single value repeated")42)
43
44// Scratch provides temporary storage for compression and decompression.
45type Scratch struct {46// Private47count [maxSymbolValue + 1]uint3248norm [maxSymbolValue + 1]int1649br byteReader
50bits bitReader
51bw bitWriter
52ct cTable // Compression tables.53decTable []decSymbol // Decompression table.54maxCount int // count of the most probable symbol55
56// Per block parameters.57// These can be used to override compression parameters of the block.58// Do not touch, unless you know what you are doing.59
60// Out is output buffer.61// If the scratch is re-used before the caller is done processing the output,62// set this field to nil.63// Otherwise the output buffer will be re-used for next Compression/Decompression step64// and allocation will be avoided.65Out []byte66
67// DecompressLimit limits the maximum decoded size acceptable.68// If > 0 decompression will stop when approximately this many bytes69// has been decoded.70// If 0, maximum size will be 2GB.71DecompressLimit int72
73symbolLen uint16 // Length of active part of the symbol table.74actualTableLog uint8 // Selected tablelog.75zeroBits bool // no bits has prob > 50%.76clearCount bool // clear count77
78// MaxSymbolValue will override the maximum symbol value of the next block.79MaxSymbolValue uint880
81// TableLog will attempt to override the tablelog for the next block.82TableLog uint883}
84
85// Histogram allows to populate the histogram and skip that step in the compression,
86// It otherwise allows to inspect the histogram when compression is done.
87// To indicate that you have populated the histogram call HistogramFinished
88// with the value of the highest populated symbol, as well as the number of entries
89// in the most populated entry. These are accepted at face value.
90// The returned slice will always be length 256.
91func (s *Scratch) Histogram() []uint32 {92return s.count[:]93}
94
95// HistogramFinished can be called to indicate that the histogram has been populated.
96// maxSymbol is the index of the highest set symbol of the next data segment.
97// maxCount is the number of entries in the most populated entry.
98// These are accepted at face value.
99func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {100s.maxCount = maxCount101s.symbolLen = uint16(maxSymbol) + 1102s.clearCount = maxCount != 0103}
104
105// prepare will prepare and allocate scratch tables used for both compression and decompression.
106func (s *Scratch) prepare(in []byte) (*Scratch, error) {107if s == nil {108s = &Scratch{}109}110if s.MaxSymbolValue == 0 {111s.MaxSymbolValue = 255112}113if s.TableLog == 0 {114s.TableLog = defaultTablelog115}116if s.TableLog > maxTableLog {117return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)118}119if cap(s.Out) == 0 {120s.Out = make([]byte, 0, len(in))121}122if s.clearCount && s.maxCount == 0 {123for i := range s.count {124s.count[i] = 0125}126s.clearCount = false127}128s.br.init(in)129if s.DecompressLimit == 0 {130// Max size 2GB.131s.DecompressLimit = (2 << 30) - 1132}133
134return s, nil135}
136
137// tableStep returns the next table index.
138func tableStep(tableSize uint32) uint32 {139return (tableSize >> 1) + (tableSize >> 3) + 3140}
141
142func highBits(val uint32) (n uint32) {143return uint32(bits.Len32(val) - 1)144}
145