lz4sconvert.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. // Copyright (c) 2022 Klaus Post. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package s2
  5. import (
  6. "encoding/binary"
  7. "fmt"
  8. )
  9. // LZ4sConverter provides conversion from LZ4s.
  10. // (Intel modified LZ4 Blocks)
  11. // https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf
  12. // LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format.
  13. // The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData.
  14. // The LZ4s block returned by the Intel® QAT hardware can be used by an external
  15. // software post-processing to generate other compressed data formats.
  16. // The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses
  17. // the same high-level formatting as LZ4 block format with the following encoding changes:
  18. // For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte.
  19. // ONLY "Min match of 4 bytes" is supported.
  20. type LZ4sConverter struct {
  21. }
  22. // ConvertBlock will convert an LZ4s block and append it as an S2
  23. // block without block length to dst.
  24. // The uncompressed size is returned as well.
  25. // dst must have capacity to contain the entire compressed block.
  26. func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
  27. if len(src) == 0 {
  28. return dst, 0, nil
  29. }
  30. const debug = false
  31. const inline = true
  32. const lz4MinMatch = 3
  33. s, d := 0, len(dst)
  34. dst = dst[:cap(dst)]
  35. if !debug && hasAmd64Asm {
  36. res, sz := cvtLZ4sBlockAsm(dst[d:], src)
  37. if res < 0 {
  38. const (
  39. errCorrupt = -1
  40. errDstTooSmall = -2
  41. )
  42. switch res {
  43. case errCorrupt:
  44. return nil, 0, ErrCorrupt
  45. case errDstTooSmall:
  46. return nil, 0, ErrDstTooSmall
  47. default:
  48. return nil, 0, fmt.Errorf("unexpected result: %d", res)
  49. }
  50. }
  51. if d+sz > len(dst) {
  52. return nil, 0, ErrDstTooSmall
  53. }
  54. return dst[:d+sz], res, nil
  55. }
  56. dLimit := len(dst) - 10
  57. var lastOffset uint16
  58. var uncompressed int
  59. if debug {
  60. fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
  61. }
  62. for {
  63. if s >= len(src) {
  64. return dst[:d], 0, ErrCorrupt
  65. }
  66. // Read literal info
  67. token := src[s]
  68. ll := int(token >> 4)
  69. ml := int(lz4MinMatch + (token & 0xf))
  70. // If upper nibble is 15, literal length is extended
  71. if token >= 0xf0 {
  72. for {
  73. s++
  74. if s >= len(src) {
  75. if debug {
  76. fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
  77. }
  78. return dst[:d], 0, ErrCorrupt
  79. }
  80. val := src[s]
  81. ll += int(val)
  82. if val != 255 {
  83. break
  84. }
  85. }
  86. }
  87. // Skip past token
  88. if s+ll >= len(src) {
  89. if debug {
  90. fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
  91. }
  92. return nil, 0, ErrCorrupt
  93. }
  94. s++
  95. if ll > 0 {
  96. if d+ll > dLimit {
  97. return nil, 0, ErrDstTooSmall
  98. }
  99. if debug {
  100. fmt.Printf("emit %d literals\n", ll)
  101. }
  102. d += emitLiteralGo(dst[d:], src[s:s+ll])
  103. s += ll
  104. uncompressed += ll
  105. }
  106. // Check if we are done...
  107. if ml == lz4MinMatch {
  108. if s == len(src) {
  109. break
  110. }
  111. // 0 bytes.
  112. continue
  113. }
  114. // 2 byte offset
  115. if s >= len(src)-2 {
  116. if debug {
  117. fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
  118. }
  119. return nil, 0, ErrCorrupt
  120. }
  121. offset := binary.LittleEndian.Uint16(src[s:])
  122. s += 2
  123. if offset == 0 {
  124. if debug {
  125. fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
  126. }
  127. return nil, 0, ErrCorrupt
  128. }
  129. if int(offset) > uncompressed {
  130. if debug {
  131. fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
  132. }
  133. return nil, 0, ErrCorrupt
  134. }
  135. if ml == lz4MinMatch+15 {
  136. for {
  137. if s >= len(src) {
  138. if debug {
  139. fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
  140. }
  141. return nil, 0, ErrCorrupt
  142. }
  143. val := src[s]
  144. s++
  145. ml += int(val)
  146. if val != 255 {
  147. if s >= len(src) {
  148. if debug {
  149. fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
  150. }
  151. return nil, 0, ErrCorrupt
  152. }
  153. break
  154. }
  155. }
  156. }
  157. if offset == lastOffset {
  158. if debug {
  159. fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
  160. }
  161. if !inline {
  162. d += emitRepeat16(dst[d:], offset, ml)
  163. } else {
  164. length := ml
  165. dst := dst[d:]
  166. for len(dst) > 5 {
  167. // Repeat offset, make length cheaper
  168. length -= 4
  169. if length <= 4 {
  170. dst[0] = uint8(length)<<2 | tagCopy1
  171. dst[1] = 0
  172. d += 2
  173. break
  174. }
  175. if length < 8 && offset < 2048 {
  176. // Encode WITH offset
  177. dst[1] = uint8(offset)
  178. dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
  179. d += 2
  180. break
  181. }
  182. if length < (1<<8)+4 {
  183. length -= 4
  184. dst[2] = uint8(length)
  185. dst[1] = 0
  186. dst[0] = 5<<2 | tagCopy1
  187. d += 3
  188. break
  189. }
  190. if length < (1<<16)+(1<<8) {
  191. length -= 1 << 8
  192. dst[3] = uint8(length >> 8)
  193. dst[2] = uint8(length >> 0)
  194. dst[1] = 0
  195. dst[0] = 6<<2 | tagCopy1
  196. d += 4
  197. break
  198. }
  199. const maxRepeat = (1 << 24) - 1
  200. length -= 1 << 16
  201. left := 0
  202. if length > maxRepeat {
  203. left = length - maxRepeat + 4
  204. length = maxRepeat - 4
  205. }
  206. dst[4] = uint8(length >> 16)
  207. dst[3] = uint8(length >> 8)
  208. dst[2] = uint8(length >> 0)
  209. dst[1] = 0
  210. dst[0] = 7<<2 | tagCopy1
  211. if left > 0 {
  212. d += 5 + emitRepeat16(dst[5:], offset, left)
  213. break
  214. }
  215. d += 5
  216. break
  217. }
  218. }
  219. } else {
  220. if debug {
  221. fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
  222. }
  223. if !inline {
  224. d += emitCopy16(dst[d:], offset, ml)
  225. } else {
  226. length := ml
  227. dst := dst[d:]
  228. for len(dst) > 5 {
  229. // Offset no more than 2 bytes.
  230. if length > 64 {
  231. off := 3
  232. if offset < 2048 {
  233. // emit 8 bytes as tagCopy1, rest as repeats.
  234. dst[1] = uint8(offset)
  235. dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
  236. length -= 8
  237. off = 2
  238. } else {
  239. // Emit a length 60 copy, encoded as 3 bytes.
  240. // Emit remaining as repeat value (minimum 4 bytes).
  241. dst[2] = uint8(offset >> 8)
  242. dst[1] = uint8(offset)
  243. dst[0] = 59<<2 | tagCopy2
  244. length -= 60
  245. }
  246. // Emit remaining as repeats, at least 4 bytes remain.
  247. d += off + emitRepeat16(dst[off:], offset, length)
  248. break
  249. }
  250. if length >= 12 || offset >= 2048 {
  251. // Emit the remaining copy, encoded as 3 bytes.
  252. dst[2] = uint8(offset >> 8)
  253. dst[1] = uint8(offset)
  254. dst[0] = uint8(length-1)<<2 | tagCopy2
  255. d += 3
  256. break
  257. }
  258. // Emit the remaining copy, encoded as 2 bytes.
  259. dst[1] = uint8(offset)
  260. dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
  261. d += 2
  262. break
  263. }
  264. }
  265. lastOffset = offset
  266. }
  267. uncompressed += ml
  268. if d > dLimit {
  269. return nil, 0, ErrDstTooSmall
  270. }
  271. }
  272. return dst[:d], uncompressed, nil
  273. }
  274. // ConvertBlockSnappy will convert an LZ4s block and append it
  275. // as a Snappy block without block length to dst.
  276. // The uncompressed size is returned as well.
  277. // dst must have capacity to contain the entire compressed block.
  278. func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
  279. if len(src) == 0 {
  280. return dst, 0, nil
  281. }
  282. const debug = false
  283. const lz4MinMatch = 3
  284. s, d := 0, len(dst)
  285. dst = dst[:cap(dst)]
  286. // Use assembly when possible
  287. if !debug && hasAmd64Asm {
  288. res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src)
  289. if res < 0 {
  290. const (
  291. errCorrupt = -1
  292. errDstTooSmall = -2
  293. )
  294. switch res {
  295. case errCorrupt:
  296. return nil, 0, ErrCorrupt
  297. case errDstTooSmall:
  298. return nil, 0, ErrDstTooSmall
  299. default:
  300. return nil, 0, fmt.Errorf("unexpected result: %d", res)
  301. }
  302. }
  303. if d+sz > len(dst) {
  304. return nil, 0, ErrDstTooSmall
  305. }
  306. return dst[:d+sz], res, nil
  307. }
  308. dLimit := len(dst) - 10
  309. var uncompressed int
  310. if debug {
  311. fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
  312. }
  313. for {
  314. if s >= len(src) {
  315. return nil, 0, ErrCorrupt
  316. }
  317. // Read literal info
  318. token := src[s]
  319. ll := int(token >> 4)
  320. ml := int(lz4MinMatch + (token & 0xf))
  321. // If upper nibble is 15, literal length is extended
  322. if token >= 0xf0 {
  323. for {
  324. s++
  325. if s >= len(src) {
  326. if debug {
  327. fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
  328. }
  329. return nil, 0, ErrCorrupt
  330. }
  331. val := src[s]
  332. ll += int(val)
  333. if val != 255 {
  334. break
  335. }
  336. }
  337. }
  338. // Skip past token
  339. if s+ll >= len(src) {
  340. if debug {
  341. fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
  342. }
  343. return nil, 0, ErrCorrupt
  344. }
  345. s++
  346. if ll > 0 {
  347. if d+ll > dLimit {
  348. return nil, 0, ErrDstTooSmall
  349. }
  350. if debug {
  351. fmt.Printf("emit %d literals\n", ll)
  352. }
  353. d += emitLiteralGo(dst[d:], src[s:s+ll])
  354. s += ll
  355. uncompressed += ll
  356. }
  357. // Check if we are done...
  358. if ml == lz4MinMatch {
  359. if s == len(src) {
  360. break
  361. }
  362. // 0 bytes.
  363. continue
  364. }
  365. // 2 byte offset
  366. if s >= len(src)-2 {
  367. if debug {
  368. fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
  369. }
  370. return nil, 0, ErrCorrupt
  371. }
  372. offset := binary.LittleEndian.Uint16(src[s:])
  373. s += 2
  374. if offset == 0 {
  375. if debug {
  376. fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
  377. }
  378. return nil, 0, ErrCorrupt
  379. }
  380. if int(offset) > uncompressed {
  381. if debug {
  382. fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
  383. }
  384. return nil, 0, ErrCorrupt
  385. }
  386. if ml == lz4MinMatch+15 {
  387. for {
  388. if s >= len(src) {
  389. if debug {
  390. fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
  391. }
  392. return nil, 0, ErrCorrupt
  393. }
  394. val := src[s]
  395. s++
  396. ml += int(val)
  397. if val != 255 {
  398. if s >= len(src) {
  399. if debug {
  400. fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
  401. }
  402. return nil, 0, ErrCorrupt
  403. }
  404. break
  405. }
  406. }
  407. }
  408. if debug {
  409. fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
  410. }
  411. length := ml
  412. // d += emitCopyNoRepeat(dst[d:], int(offset), ml)
  413. for length > 0 {
  414. if d >= dLimit {
  415. return nil, 0, ErrDstTooSmall
  416. }
  417. // Offset no more than 2 bytes.
  418. if length > 64 {
  419. // Emit a length 64 copy, encoded as 3 bytes.
  420. dst[d+2] = uint8(offset >> 8)
  421. dst[d+1] = uint8(offset)
  422. dst[d+0] = 63<<2 | tagCopy2
  423. length -= 64
  424. d += 3
  425. continue
  426. }
  427. if length >= 12 || offset >= 2048 || length < 4 {
  428. // Emit the remaining copy, encoded as 3 bytes.
  429. dst[d+2] = uint8(offset >> 8)
  430. dst[d+1] = uint8(offset)
  431. dst[d+0] = uint8(length-1)<<2 | tagCopy2
  432. d += 3
  433. break
  434. }
  435. // Emit the remaining copy, encoded as 2 bytes.
  436. dst[d+1] = uint8(offset)
  437. dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
  438. d += 2
  439. break
  440. }
  441. uncompressed += ml
  442. if d > dLimit {
  443. return nil, 0, ErrDstTooSmall
  444. }
  445. }
  446. return dst[:d], uncompressed, nil
  447. }