123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393 |
- // Copyright 2011 The Snappy-Go Authors. All rights reserved.
- // Copyright (c) 2019 Klaus Post. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package s2
- import (
- "encoding/binary"
- "math"
- "math/bits"
- )
- // Encode returns the encoded form of src. The returned slice may be a sub-
- // slice of dst if dst was large enough to hold the entire encoded block.
- // Otherwise, a newly allocated slice will be returned.
- //
- // The dst and src must not overlap. It is valid to pass a nil dst.
- //
- // The blocks will require the same amount of memory to decode as encoding,
- // and does not make for concurrent decoding.
- // Also note that blocks do not contain CRC information, so corruption may be undetected.
- //
- // If you need to encode larger amounts of data, consider using
- // the streaming interface which gives all of these features.
- func Encode(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlock(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- // EstimateBlockSize will perform a very fast compression
- // without outputting the result and return the compressed output size.
- // The function returns -1 if no improvement could be achieved.
- // Using actual compression will most often produce better compression than the estimate.
- func EstimateBlockSize(src []byte) (d int) {
- if len(src) < 6 || int64(len(src)) > 0xffffffff {
- return -1
- }
- if len(src) <= 1024 {
- d = calcBlockSizeSmall(src)
- } else {
- d = calcBlockSize(src)
- }
- if d == 0 {
- return -1
- }
- // Size of the varint encoded block size.
- d += (bits.Len64(uint64(len(src))) + 7) / 7
- if d >= len(src) {
- return -1
- }
- return d
- }
- // EncodeBetter returns the encoded form of src. The returned slice may be a sub-
- // slice of dst if dst was large enough to hold the entire encoded block.
- // Otherwise, a newly allocated slice will be returned.
- //
- // EncodeBetter compresses better than Encode but typically with a
- // 10-40% speed decrease on both compression and decompression.
- //
- // The dst and src must not overlap. It is valid to pass a nil dst.
- //
- // The blocks will require the same amount of memory to decode as encoding,
- // and does not make for concurrent decoding.
- // Also note that blocks do not contain CRC information, so corruption may be undetected.
- //
- // If you need to encode larger amounts of data, consider using
- // the streaming interface which gives all of these features.
- func EncodeBetter(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if len(dst) < n {
- dst = make([]byte, n)
- }
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockBetter(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- // EncodeBest returns the encoded form of src. The returned slice may be a sub-
- // slice of dst if dst was large enough to hold the entire encoded block.
- // Otherwise, a newly allocated slice will be returned.
- //
- // EncodeBest compresses as good as reasonably possible but with a
- // big speed decrease.
- //
- // The dst and src must not overlap. It is valid to pass a nil dst.
- //
- // The blocks will require the same amount of memory to decode as encoding,
- // and does not make for concurrent decoding.
- // Also note that blocks do not contain CRC information, so corruption may be undetected.
- //
- // If you need to encode larger amounts of data, consider using
- // the streaming interface which gives all of these features.
- func EncodeBest(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if len(dst) < n {
- dst = make([]byte, n)
- }
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockBest(dst[d:], src, nil)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- // EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
- // slice of dst if dst was large enough to hold the entire encoded block.
- // Otherwise, a newly allocated slice will be returned.
- //
- // The output is Snappy compatible and will likely decompress faster.
- //
- // The dst and src must not overlap. It is valid to pass a nil dst.
- //
- // The blocks will require the same amount of memory to decode as encoding,
- // and does not make for concurrent decoding.
- // Also note that blocks do not contain CRC information, so corruption may be undetected.
- //
- // If you need to encode larger amounts of data, consider using
- // the streaming interface which gives all of these features.
- func EncodeSnappy(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockSnappy(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
- // slice of dst if dst was large enough to hold the entire encoded block.
- // Otherwise, a newly allocated slice will be returned.
- //
- // The output is Snappy compatible and will likely decompress faster.
- //
- // The dst and src must not overlap. It is valid to pass a nil dst.
- //
- // The blocks will require the same amount of memory to decode as encoding,
- // and does not make for concurrent decoding.
- // Also note that blocks do not contain CRC information, so corruption may be undetected.
- //
- // If you need to encode larger amounts of data, consider using
- // the streaming interface which gives all of these features.
- func EncodeSnappyBetter(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockBetterSnappy(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
- // slice of dst if dst was large enough to hold the entire encoded block.
- // Otherwise, a newly allocated slice will be returned.
- //
- // The output is Snappy compatible and will likely decompress faster.
- //
- // The dst and src must not overlap. It is valid to pass a nil dst.
- //
- // The blocks will require the same amount of memory to decode as encoding,
- // and does not make for concurrent decoding.
- // Also note that blocks do not contain CRC information, so corruption may be undetected.
- //
- // If you need to encode larger amounts of data, consider using
- // the streaming interface which gives all of these features.
- func EncodeSnappyBest(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if cap(dst) < n {
- dst = make([]byte, n)
- } else {
- dst = dst[:n]
- }
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
- if len(src) == 0 {
- return dst[:d]
- }
- if len(src) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- n := encodeBlockBestSnappy(dst[d:], src)
- if n > 0 {
- d += n
- return dst[:d]
- }
- // Not compressible
- d += emitLiteral(dst[d:], src)
- return dst[:d]
- }
- // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
- // If the destination is nil or too small, a new will be allocated.
- // The blocks are not validated, so garbage in = garbage out.
- // dst may not overlap block data.
- // Any data in dst is preserved as is, so it will not be considered a block.
- func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
- totalSize := uint64(0)
- compSize := 0
- for _, b := range blocks {
- l, hdr, err := decodedLen(b)
- if err != nil {
- return nil, err
- }
- totalSize += uint64(l)
- compSize += len(b) - hdr
- }
- if totalSize == 0 {
- dst = append(dst, 0)
- return dst, nil
- }
- if totalSize > math.MaxUint32 {
- return nil, ErrTooLarge
- }
- var tmp [binary.MaxVarintLen32]byte
- hdrSize := binary.PutUvarint(tmp[:], totalSize)
- wantSize := hdrSize + compSize
- if cap(dst)-len(dst) < wantSize {
- dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
- }
- dst = append(dst, tmp[:hdrSize]...)
- for _, b := range blocks {
- _, hdr, err := decodedLen(b)
- if err != nil {
- return nil, err
- }
- dst = append(dst, b[hdr:]...)
- }
- return dst, nil
- }
- // inputMargin is the minimum number of extra input bytes to keep, inside
- // encodeBlock's inner loop. On some architectures, this margin lets us
- // implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
- // literals can be implemented as a single load to and store from a 16-byte
- // register. That literal's actual length can be as short as 1 byte, so this
- // can copy up to 15 bytes too much, but that's OK as subsequent iterations of
- // the encoding loop will fix up the copy overrun, and this inputMargin ensures
- // that we don't overrun the dst and src buffers.
- const inputMargin = 8
- // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
- // will be accepted by the encoder.
- const minNonLiteralBlockSize = 32
- const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
- // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
- // Blocks this big are highly discouraged, though.
- // Half the size on 32 bit systems.
- const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
- // MaxEncodedLen returns the maximum length of a snappy block, given its
- // uncompressed length.
- //
- // It will return a negative value if srcLen is too large to encode.
- // 32 bit platforms will have lower thresholds for rejecting big content.
- func MaxEncodedLen(srcLen int) int {
- n := uint64(srcLen)
- if intReduction == 1 {
- // 32 bits
- if n > math.MaxInt32 {
- // Also includes negative.
- return -1
- }
- } else if n > 0xffffffff {
- // 64 bits
- // Also includes negative.
- return -1
- }
- // Size of the varint encoded block size.
- n = n + uint64((bits.Len64(n)+7)/7)
- // Add maximum size of encoding block as literals.
- n += uint64(literalExtraSize(int64(srcLen)))
- if intReduction == 1 {
- // 32 bits
- if n > math.MaxInt32 {
- return -1
- }
- } else if n > 0xffffffff {
- // 64 bits
- // Also includes negative.
- return -1
- }
- return int(n)
- }
|