encode.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. // Copyright 2011 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "encoding/binary"
  8. "math"
  9. "math/bits"
  10. )
  11. // Encode returns the encoded form of src. The returned slice may be a sub-
  12. // slice of dst if dst was large enough to hold the entire encoded block.
  13. // Otherwise, a newly allocated slice will be returned.
  14. //
  15. // The dst and src must not overlap. It is valid to pass a nil dst.
  16. //
  17. // The blocks will require the same amount of memory to decode as encoding,
  18. // and does not make for concurrent decoding.
  19. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  20. //
  21. // If you need to encode larger amounts of data, consider using
  22. // the streaming interface which gives all of these features.
  23. func Encode(dst, src []byte) []byte {
  24. if n := MaxEncodedLen(len(src)); n < 0 {
  25. panic(ErrTooLarge)
  26. } else if cap(dst) < n {
  27. dst = make([]byte, n)
  28. } else {
  29. dst = dst[:n]
  30. }
  31. // The block starts with the varint-encoded length of the decompressed bytes.
  32. d := binary.PutUvarint(dst, uint64(len(src)))
  33. if len(src) == 0 {
  34. return dst[:d]
  35. }
  36. if len(src) < minNonLiteralBlockSize {
  37. d += emitLiteral(dst[d:], src)
  38. return dst[:d]
  39. }
  40. n := encodeBlock(dst[d:], src)
  41. if n > 0 {
  42. d += n
  43. return dst[:d]
  44. }
  45. // Not compressible
  46. d += emitLiteral(dst[d:], src)
  47. return dst[:d]
  48. }
  49. // EstimateBlockSize will perform a very fast compression
  50. // without outputting the result and return the compressed output size.
  51. // The function returns -1 if no improvement could be achieved.
  52. // Using actual compression will most often produce better compression than the estimate.
  53. func EstimateBlockSize(src []byte) (d int) {
  54. if len(src) < 6 || int64(len(src)) > 0xffffffff {
  55. return -1
  56. }
  57. if len(src) <= 1024 {
  58. d = calcBlockSizeSmall(src)
  59. } else {
  60. d = calcBlockSize(src)
  61. }
  62. if d == 0 {
  63. return -1
  64. }
  65. // Size of the varint encoded block size.
  66. d += (bits.Len64(uint64(len(src))) + 7) / 7
  67. if d >= len(src) {
  68. return -1
  69. }
  70. return d
  71. }
  72. // EncodeBetter returns the encoded form of src. The returned slice may be a sub-
  73. // slice of dst if dst was large enough to hold the entire encoded block.
  74. // Otherwise, a newly allocated slice will be returned.
  75. //
  76. // EncodeBetter compresses better than Encode but typically with a
  77. // 10-40% speed decrease on both compression and decompression.
  78. //
  79. // The dst and src must not overlap. It is valid to pass a nil dst.
  80. //
  81. // The blocks will require the same amount of memory to decode as encoding,
  82. // and does not make for concurrent decoding.
  83. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  84. //
  85. // If you need to encode larger amounts of data, consider using
  86. // the streaming interface which gives all of these features.
  87. func EncodeBetter(dst, src []byte) []byte {
  88. if n := MaxEncodedLen(len(src)); n < 0 {
  89. panic(ErrTooLarge)
  90. } else if len(dst) < n {
  91. dst = make([]byte, n)
  92. }
  93. // The block starts with the varint-encoded length of the decompressed bytes.
  94. d := binary.PutUvarint(dst, uint64(len(src)))
  95. if len(src) == 0 {
  96. return dst[:d]
  97. }
  98. if len(src) < minNonLiteralBlockSize {
  99. d += emitLiteral(dst[d:], src)
  100. return dst[:d]
  101. }
  102. n := encodeBlockBetter(dst[d:], src)
  103. if n > 0 {
  104. d += n
  105. return dst[:d]
  106. }
  107. // Not compressible
  108. d += emitLiteral(dst[d:], src)
  109. return dst[:d]
  110. }
  111. // EncodeBest returns the encoded form of src. The returned slice may be a sub-
  112. // slice of dst if dst was large enough to hold the entire encoded block.
  113. // Otherwise, a newly allocated slice will be returned.
  114. //
  115. // EncodeBest compresses as good as reasonably possible but with a
  116. // big speed decrease.
  117. //
  118. // The dst and src must not overlap. It is valid to pass a nil dst.
  119. //
  120. // The blocks will require the same amount of memory to decode as encoding,
  121. // and does not make for concurrent decoding.
  122. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  123. //
  124. // If you need to encode larger amounts of data, consider using
  125. // the streaming interface which gives all of these features.
  126. func EncodeBest(dst, src []byte) []byte {
  127. if n := MaxEncodedLen(len(src)); n < 0 {
  128. panic(ErrTooLarge)
  129. } else if len(dst) < n {
  130. dst = make([]byte, n)
  131. }
  132. // The block starts with the varint-encoded length of the decompressed bytes.
  133. d := binary.PutUvarint(dst, uint64(len(src)))
  134. if len(src) == 0 {
  135. return dst[:d]
  136. }
  137. if len(src) < minNonLiteralBlockSize {
  138. d += emitLiteral(dst[d:], src)
  139. return dst[:d]
  140. }
  141. n := encodeBlockBest(dst[d:], src, nil)
  142. if n > 0 {
  143. d += n
  144. return dst[:d]
  145. }
  146. // Not compressible
  147. d += emitLiteral(dst[d:], src)
  148. return dst[:d]
  149. }
  150. // EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
  151. // slice of dst if dst was large enough to hold the entire encoded block.
  152. // Otherwise, a newly allocated slice will be returned.
  153. //
  154. // The output is Snappy compatible and will likely decompress faster.
  155. //
  156. // The dst and src must not overlap. It is valid to pass a nil dst.
  157. //
  158. // The blocks will require the same amount of memory to decode as encoding,
  159. // and does not make for concurrent decoding.
  160. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  161. //
  162. // If you need to encode larger amounts of data, consider using
  163. // the streaming interface which gives all of these features.
  164. func EncodeSnappy(dst, src []byte) []byte {
  165. if n := MaxEncodedLen(len(src)); n < 0 {
  166. panic(ErrTooLarge)
  167. } else if cap(dst) < n {
  168. dst = make([]byte, n)
  169. } else {
  170. dst = dst[:n]
  171. }
  172. // The block starts with the varint-encoded length of the decompressed bytes.
  173. d := binary.PutUvarint(dst, uint64(len(src)))
  174. if len(src) == 0 {
  175. return dst[:d]
  176. }
  177. if len(src) < minNonLiteralBlockSize {
  178. d += emitLiteral(dst[d:], src)
  179. return dst[:d]
  180. }
  181. n := encodeBlockSnappy(dst[d:], src)
  182. if n > 0 {
  183. d += n
  184. return dst[:d]
  185. }
  186. // Not compressible
  187. d += emitLiteral(dst[d:], src)
  188. return dst[:d]
  189. }
  190. // EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
  191. // slice of dst if dst was large enough to hold the entire encoded block.
  192. // Otherwise, a newly allocated slice will be returned.
  193. //
  194. // The output is Snappy compatible and will likely decompress faster.
  195. //
  196. // The dst and src must not overlap. It is valid to pass a nil dst.
  197. //
  198. // The blocks will require the same amount of memory to decode as encoding,
  199. // and does not make for concurrent decoding.
  200. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  201. //
  202. // If you need to encode larger amounts of data, consider using
  203. // the streaming interface which gives all of these features.
  204. func EncodeSnappyBetter(dst, src []byte) []byte {
  205. if n := MaxEncodedLen(len(src)); n < 0 {
  206. panic(ErrTooLarge)
  207. } else if cap(dst) < n {
  208. dst = make([]byte, n)
  209. } else {
  210. dst = dst[:n]
  211. }
  212. // The block starts with the varint-encoded length of the decompressed bytes.
  213. d := binary.PutUvarint(dst, uint64(len(src)))
  214. if len(src) == 0 {
  215. return dst[:d]
  216. }
  217. if len(src) < minNonLiteralBlockSize {
  218. d += emitLiteral(dst[d:], src)
  219. return dst[:d]
  220. }
  221. n := encodeBlockBetterSnappy(dst[d:], src)
  222. if n > 0 {
  223. d += n
  224. return dst[:d]
  225. }
  226. // Not compressible
  227. d += emitLiteral(dst[d:], src)
  228. return dst[:d]
  229. }
  230. // EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
  231. // slice of dst if dst was large enough to hold the entire encoded block.
  232. // Otherwise, a newly allocated slice will be returned.
  233. //
  234. // The output is Snappy compatible and will likely decompress faster.
  235. //
  236. // The dst and src must not overlap. It is valid to pass a nil dst.
  237. //
  238. // The blocks will require the same amount of memory to decode as encoding,
  239. // and does not make for concurrent decoding.
  240. // Also note that blocks do not contain CRC information, so corruption may be undetected.
  241. //
  242. // If you need to encode larger amounts of data, consider using
  243. // the streaming interface which gives all of these features.
  244. func EncodeSnappyBest(dst, src []byte) []byte {
  245. if n := MaxEncodedLen(len(src)); n < 0 {
  246. panic(ErrTooLarge)
  247. } else if cap(dst) < n {
  248. dst = make([]byte, n)
  249. } else {
  250. dst = dst[:n]
  251. }
  252. // The block starts with the varint-encoded length of the decompressed bytes.
  253. d := binary.PutUvarint(dst, uint64(len(src)))
  254. if len(src) == 0 {
  255. return dst[:d]
  256. }
  257. if len(src) < minNonLiteralBlockSize {
  258. d += emitLiteral(dst[d:], src)
  259. return dst[:d]
  260. }
  261. n := encodeBlockBestSnappy(dst[d:], src)
  262. if n > 0 {
  263. d += n
  264. return dst[:d]
  265. }
  266. // Not compressible
  267. d += emitLiteral(dst[d:], src)
  268. return dst[:d]
  269. }
  270. // ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
  271. // If the destination is nil or too small, a new will be allocated.
  272. // The blocks are not validated, so garbage in = garbage out.
  273. // dst may not overlap block data.
  274. // Any data in dst is preserved as is, so it will not be considered a block.
  275. func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
  276. totalSize := uint64(0)
  277. compSize := 0
  278. for _, b := range blocks {
  279. l, hdr, err := decodedLen(b)
  280. if err != nil {
  281. return nil, err
  282. }
  283. totalSize += uint64(l)
  284. compSize += len(b) - hdr
  285. }
  286. if totalSize == 0 {
  287. dst = append(dst, 0)
  288. return dst, nil
  289. }
  290. if totalSize > math.MaxUint32 {
  291. return nil, ErrTooLarge
  292. }
  293. var tmp [binary.MaxVarintLen32]byte
  294. hdrSize := binary.PutUvarint(tmp[:], totalSize)
  295. wantSize := hdrSize + compSize
  296. if cap(dst)-len(dst) < wantSize {
  297. dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
  298. }
  299. dst = append(dst, tmp[:hdrSize]...)
  300. for _, b := range blocks {
  301. _, hdr, err := decodedLen(b)
  302. if err != nil {
  303. return nil, err
  304. }
  305. dst = append(dst, b[hdr:]...)
  306. }
  307. return dst, nil
  308. }
  309. // inputMargin is the minimum number of extra input bytes to keep, inside
  310. // encodeBlock's inner loop. On some architectures, this margin lets us
  311. // implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
  312. // literals can be implemented as a single load to and store from a 16-byte
  313. // register. That literal's actual length can be as short as 1 byte, so this
  314. // can copy up to 15 bytes too much, but that's OK as subsequent iterations of
  315. // the encoding loop will fix up the copy overrun, and this inputMargin ensures
  316. // that we don't overrun the dst and src buffers.
  317. const inputMargin = 8
  318. // minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
  319. // will be accepted by the encoder.
  320. const minNonLiteralBlockSize = 32
  321. const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
  322. // MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
  323. // Blocks this big are highly discouraged, though.
  324. // Half the size on 32 bit systems.
  325. const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
  326. // MaxEncodedLen returns the maximum length of a snappy block, given its
  327. // uncompressed length.
  328. //
  329. // It will return a negative value if srcLen is too large to encode.
  330. // 32 bit platforms will have lower thresholds for rejecting big content.
  331. func MaxEncodedLen(srcLen int) int {
  332. n := uint64(srcLen)
  333. if intReduction == 1 {
  334. // 32 bits
  335. if n > math.MaxInt32 {
  336. // Also includes negative.
  337. return -1
  338. }
  339. } else if n > 0xffffffff {
  340. // 64 bits
  341. // Also includes negative.
  342. return -1
  343. }
  344. // Size of the varint encoded block size.
  345. n = n + uint64((bits.Len64(n)+7)/7)
  346. // Add maximum size of encoding block as literals.
  347. n += uint64(literalExtraSize(int64(srcLen)))
  348. if intReduction == 1 {
  349. // 32 bits
  350. if n > math.MaxInt32 {
  351. return -1
  352. }
  353. } else if n > 0xffffffff {
  354. // 64 bits
  355. // Also includes negative.
  356. return -1
  357. }
  358. return int(n)
  359. }