123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- // Copyright 2018 Klaus Post. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
- // Package fse provides Finite State Entropy encoding and decoding.
- //
- // Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
- // for byte blocks as implemented in zstd.
- //
- // See https://github.com/klauspost/compress/tree/master/fse for more information.
- package fse
- import (
- "errors"
- "fmt"
- "math/bits"
- )
- const (
- /*!MEMORY_USAGE :
- * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
- * Increasing memory usage improves compression ratio
- * Reduced memory usage can improve speed, due to cache effect
- * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
- maxMemoryUsage = 14
- defaultMemoryUsage = 13
- maxTableLog = maxMemoryUsage - 2
- maxTablesize = 1 << maxTableLog
- defaultTablelog = defaultMemoryUsage - 2
- minTablelog = 5
- maxSymbolValue = 255
- )
- var (
- // ErrIncompressible is returned when input is judged to be too hard to compress.
- ErrIncompressible = errors.New("input is not compressible")
- // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
- ErrUseRLE = errors.New("input is single value repeated")
- )
- // Scratch provides temporary storage for compression and decompression.
- type Scratch struct {
- // Private
- count [maxSymbolValue + 1]uint32
- norm [maxSymbolValue + 1]int16
- br byteReader
- bits bitReader
- bw bitWriter
- ct cTable // Compression tables.
- decTable []decSymbol // Decompression table.
- maxCount int // count of the most probable symbol
- // Per block parameters.
- // These can be used to override compression parameters of the block.
- // Do not touch, unless you know what you are doing.
- // Out is output buffer.
- // If the scratch is re-used before the caller is done processing the output,
- // set this field to nil.
- // Otherwise the output buffer will be re-used for next Compression/Decompression step
- // and allocation will be avoided.
- Out []byte
- // DecompressLimit limits the maximum decoded size acceptable.
- // If > 0 decompression will stop when approximately this many bytes
- // has been decoded.
- // If 0, maximum size will be 2GB.
- DecompressLimit int
- symbolLen uint16 // Length of active part of the symbol table.
- actualTableLog uint8 // Selected tablelog.
- zeroBits bool // no bits has prob > 50%.
- clearCount bool // clear count
- // MaxSymbolValue will override the maximum symbol value of the next block.
- MaxSymbolValue uint8
- // TableLog will attempt to override the tablelog for the next block.
- TableLog uint8
- }
- // Histogram allows to populate the histogram and skip that step in the compression,
- // It otherwise allows to inspect the histogram when compression is done.
- // To indicate that you have populated the histogram call HistogramFinished
- // with the value of the highest populated symbol, as well as the number of entries
- // in the most populated entry. These are accepted at face value.
- // The returned slice will always be length 256.
- func (s *Scratch) Histogram() []uint32 {
- return s.count[:]
- }
- // HistogramFinished can be called to indicate that the histogram has been populated.
- // maxSymbol is the index of the highest set symbol of the next data segment.
- // maxCount is the number of entries in the most populated entry.
- // These are accepted at face value.
- func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
- s.maxCount = maxCount
- s.symbolLen = uint16(maxSymbol) + 1
- s.clearCount = maxCount != 0
- }
- // prepare will prepare and allocate scratch tables used for both compression and decompression.
- func (s *Scratch) prepare(in []byte) (*Scratch, error) {
- if s == nil {
- s = &Scratch{}
- }
- if s.MaxSymbolValue == 0 {
- s.MaxSymbolValue = 255
- }
- if s.TableLog == 0 {
- s.TableLog = defaultTablelog
- }
- if s.TableLog > maxTableLog {
- return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
- }
- if cap(s.Out) == 0 {
- s.Out = make([]byte, 0, len(in))
- }
- if s.clearCount && s.maxCount == 0 {
- for i := range s.count {
- s.count[i] = 0
- }
- s.clearCount = false
- }
- s.br.init(in)
- if s.DecompressLimit == 0 {
- // Max size 2GB.
- s.DecompressLimit = (2 << 30) - 1
- }
- return s, nil
- }
- // tableStep returns the next table index.
- func tableStep(tableSize uint32) uint32 {
- return (tableSize >> 1) + (tableSize >> 3) + 3
- }
- func highBits(val uint32) (n uint32) {
- return uint32(bits.Len32(val) - 1)
- }
|