encode_all.go 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048
  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "bytes"
  8. "encoding/binary"
  9. "fmt"
  10. "math/bits"
  11. )
  12. func load32(b []byte, i int) uint32 {
  13. return binary.LittleEndian.Uint32(b[i:])
  14. }
  15. func load64(b []byte, i int) uint64 {
  16. return binary.LittleEndian.Uint64(b[i:])
  17. }
  18. // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
  19. // Preferably h should be a constant and should always be <64.
  20. func hash6(u uint64, h uint8) uint32 {
  21. const prime6bytes = 227718039650203
  22. return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
  23. }
  24. func encodeGo(dst, src []byte) []byte {
  25. if n := MaxEncodedLen(len(src)); n < 0 {
  26. panic(ErrTooLarge)
  27. } else if len(dst) < n {
  28. dst = make([]byte, n)
  29. }
  30. // The block starts with the varint-encoded length of the decompressed bytes.
  31. d := binary.PutUvarint(dst, uint64(len(src)))
  32. if len(src) == 0 {
  33. return dst[:d]
  34. }
  35. if len(src) < minNonLiteralBlockSize {
  36. d += emitLiteral(dst[d:], src)
  37. return dst[:d]
  38. }
  39. n := encodeBlockGo(dst[d:], src)
  40. if n > 0 {
  41. d += n
  42. return dst[:d]
  43. }
  44. // Not compressible
  45. d += emitLiteral(dst[d:], src)
  46. return dst[:d]
  47. }
  48. // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
  49. // assumes that the varint-encoded length of the decompressed bytes has already
  50. // been written.
  51. //
  52. // It also assumes that:
  53. //
  54. // len(dst) >= MaxEncodedLen(len(src)) &&
  55. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  56. func encodeBlockGo(dst, src []byte) (d int) {
  57. // Initialize the hash table.
  58. const (
  59. tableBits = 14
  60. maxTableSize = 1 << tableBits
  61. debug = false
  62. )
  63. var table [maxTableSize]uint32
  64. // sLimit is when to stop looking for offset/length copies. The inputMargin
  65. // lets us use a fast path for emitLiteral in the main loop, while we are
  66. // looking for copies.
  67. sLimit := len(src) - inputMargin
  68. // Bail if we can't compress to at least this.
  69. dstLimit := len(src) - len(src)>>5 - 5
  70. // nextEmit is where in src the next emitLiteral should start from.
  71. nextEmit := 0
  72. // The encoded form must start with a literal, as there are no previous
  73. // bytes to copy, so we start looking for hash matches at s == 1.
  74. s := 1
  75. cv := load64(src, s)
  76. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  77. repeat := 1
  78. for {
  79. candidate := 0
  80. for {
  81. // Next src position to check
  82. nextS := s + (s-nextEmit)>>6 + 4
  83. if nextS > sLimit {
  84. goto emitRemainder
  85. }
  86. hash0 := hash6(cv, tableBits)
  87. hash1 := hash6(cv>>8, tableBits)
  88. candidate = int(table[hash0])
  89. candidate2 := int(table[hash1])
  90. table[hash0] = uint32(s)
  91. table[hash1] = uint32(s + 1)
  92. hash2 := hash6(cv>>16, tableBits)
  93. // Check repeat at offset checkRep.
  94. const checkRep = 1
  95. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  96. base := s + checkRep
  97. // Extend back
  98. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  99. i--
  100. base--
  101. }
  102. d += emitLiteral(dst[d:], src[nextEmit:base])
  103. // Extend forward
  104. candidate := s - repeat + 4 + checkRep
  105. s += 4 + checkRep
  106. for s <= sLimit {
  107. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  108. s += bits.TrailingZeros64(diff) >> 3
  109. break
  110. }
  111. s += 8
  112. candidate += 8
  113. }
  114. if debug {
  115. // Validate match.
  116. if s <= candidate {
  117. panic("s <= candidate")
  118. }
  119. a := src[base:s]
  120. b := src[base-repeat : base-repeat+(s-base)]
  121. if !bytes.Equal(a, b) {
  122. panic("mismatch")
  123. }
  124. }
  125. if nextEmit > 0 {
  126. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  127. d += emitRepeat(dst[d:], repeat, s-base)
  128. } else {
  129. // First match, cannot be repeat.
  130. d += emitCopy(dst[d:], repeat, s-base)
  131. }
  132. nextEmit = s
  133. if s >= sLimit {
  134. goto emitRemainder
  135. }
  136. cv = load64(src, s)
  137. continue
  138. }
  139. if uint32(cv) == load32(src, candidate) {
  140. break
  141. }
  142. candidate = int(table[hash2])
  143. if uint32(cv>>8) == load32(src, candidate2) {
  144. table[hash2] = uint32(s + 2)
  145. candidate = candidate2
  146. s++
  147. break
  148. }
  149. table[hash2] = uint32(s + 2)
  150. if uint32(cv>>16) == load32(src, candidate) {
  151. s += 2
  152. break
  153. }
  154. cv = load64(src, nextS)
  155. s = nextS
  156. }
  157. // Extend backwards.
  158. // The top bytes will be rechecked to get the full match.
  159. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  160. candidate--
  161. s--
  162. }
  163. // Bail if we exceed the maximum size.
  164. if d+(s-nextEmit) > dstLimit {
  165. return 0
  166. }
  167. // A 4-byte match has been found. We'll later see if more than 4 bytes
  168. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  169. // them as literal bytes.
  170. d += emitLiteral(dst[d:], src[nextEmit:s])
  171. // Call emitCopy, and then see if another emitCopy could be our next
  172. // move. Repeat until we find no match for the input immediately after
  173. // what was consumed by the last emitCopy call.
  174. //
  175. // If we exit this loop normally then we need to call emitLiteral next,
  176. // though we don't yet know how big the literal will be. We handle that
  177. // by proceeding to the next iteration of the main loop. We also can
  178. // exit this loop via goto if we get close to exhausting the input.
  179. for {
  180. // Invariant: we have a 4-byte match at s, and no need to emit any
  181. // literal bytes prior to s.
  182. base := s
  183. repeat = base - candidate
  184. // Extend the 4-byte match as long as possible.
  185. s += 4
  186. candidate += 4
  187. for s <= len(src)-8 {
  188. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  189. s += bits.TrailingZeros64(diff) >> 3
  190. break
  191. }
  192. s += 8
  193. candidate += 8
  194. }
  195. d += emitCopy(dst[d:], repeat, s-base)
  196. if debug {
  197. // Validate match.
  198. if s <= candidate {
  199. panic("s <= candidate")
  200. }
  201. a := src[base:s]
  202. b := src[base-repeat : base-repeat+(s-base)]
  203. if !bytes.Equal(a, b) {
  204. panic("mismatch")
  205. }
  206. }
  207. nextEmit = s
  208. if s >= sLimit {
  209. goto emitRemainder
  210. }
  211. if d > dstLimit {
  212. // Do we have space for more, if not bail.
  213. return 0
  214. }
  215. // Check for an immediate match, otherwise start search at s+1
  216. x := load64(src, s-2)
  217. m2Hash := hash6(x, tableBits)
  218. currHash := hash6(x>>16, tableBits)
  219. candidate = int(table[currHash])
  220. table[m2Hash] = uint32(s - 2)
  221. table[currHash] = uint32(s)
  222. if debug && s == candidate {
  223. panic("s == candidate")
  224. }
  225. if uint32(x>>16) != load32(src, candidate) {
  226. cv = load64(src, s+1)
  227. s++
  228. break
  229. }
  230. }
  231. }
  232. emitRemainder:
  233. if nextEmit < len(src) {
  234. // Bail if we exceed the maximum size.
  235. if d+len(src)-nextEmit > dstLimit {
  236. return 0
  237. }
  238. d += emitLiteral(dst[d:], src[nextEmit:])
  239. }
  240. return d
  241. }
  242. func encodeBlockSnappyGo(dst, src []byte) (d int) {
  243. // Initialize the hash table.
  244. const (
  245. tableBits = 14
  246. maxTableSize = 1 << tableBits
  247. )
  248. var table [maxTableSize]uint32
  249. // sLimit is when to stop looking for offset/length copies. The inputMargin
  250. // lets us use a fast path for emitLiteral in the main loop, while we are
  251. // looking for copies.
  252. sLimit := len(src) - inputMargin
  253. // Bail if we can't compress to at least this.
  254. dstLimit := len(src) - len(src)>>5 - 5
  255. // nextEmit is where in src the next emitLiteral should start from.
  256. nextEmit := 0
  257. // The encoded form must start with a literal, as there are no previous
  258. // bytes to copy, so we start looking for hash matches at s == 1.
  259. s := 1
  260. cv := load64(src, s)
  261. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  262. repeat := 1
  263. for {
  264. candidate := 0
  265. for {
  266. // Next src position to check
  267. nextS := s + (s-nextEmit)>>6 + 4
  268. if nextS > sLimit {
  269. goto emitRemainder
  270. }
  271. hash0 := hash6(cv, tableBits)
  272. hash1 := hash6(cv>>8, tableBits)
  273. candidate = int(table[hash0])
  274. candidate2 := int(table[hash1])
  275. table[hash0] = uint32(s)
  276. table[hash1] = uint32(s + 1)
  277. hash2 := hash6(cv>>16, tableBits)
  278. // Check repeat at offset checkRep.
  279. const checkRep = 1
  280. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  281. base := s + checkRep
  282. // Extend back
  283. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  284. i--
  285. base--
  286. }
  287. d += emitLiteral(dst[d:], src[nextEmit:base])
  288. // Extend forward
  289. candidate := s - repeat + 4 + checkRep
  290. s += 4 + checkRep
  291. for s <= sLimit {
  292. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  293. s += bits.TrailingZeros64(diff) >> 3
  294. break
  295. }
  296. s += 8
  297. candidate += 8
  298. }
  299. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  300. nextEmit = s
  301. if s >= sLimit {
  302. goto emitRemainder
  303. }
  304. cv = load64(src, s)
  305. continue
  306. }
  307. if uint32(cv) == load32(src, candidate) {
  308. break
  309. }
  310. candidate = int(table[hash2])
  311. if uint32(cv>>8) == load32(src, candidate2) {
  312. table[hash2] = uint32(s + 2)
  313. candidate = candidate2
  314. s++
  315. break
  316. }
  317. table[hash2] = uint32(s + 2)
  318. if uint32(cv>>16) == load32(src, candidate) {
  319. s += 2
  320. break
  321. }
  322. cv = load64(src, nextS)
  323. s = nextS
  324. }
  325. // Extend backwards
  326. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  327. candidate--
  328. s--
  329. }
  330. // Bail if we exceed the maximum size.
  331. if d+(s-nextEmit) > dstLimit {
  332. return 0
  333. }
  334. // A 4-byte match has been found. We'll later see if more than 4 bytes
  335. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  336. // them as literal bytes.
  337. d += emitLiteral(dst[d:], src[nextEmit:s])
  338. // Call emitCopy, and then see if another emitCopy could be our next
  339. // move. Repeat until we find no match for the input immediately after
  340. // what was consumed by the last emitCopy call.
  341. //
  342. // If we exit this loop normally then we need to call emitLiteral next,
  343. // though we don't yet know how big the literal will be. We handle that
  344. // by proceeding to the next iteration of the main loop. We also can
  345. // exit this loop via goto if we get close to exhausting the input.
  346. for {
  347. // Invariant: we have a 4-byte match at s, and no need to emit any
  348. // literal bytes prior to s.
  349. base := s
  350. repeat = base - candidate
  351. // Extend the 4-byte match as long as possible.
  352. s += 4
  353. candidate += 4
  354. for s <= len(src)-8 {
  355. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  356. s += bits.TrailingZeros64(diff) >> 3
  357. break
  358. }
  359. s += 8
  360. candidate += 8
  361. }
  362. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  363. if false {
  364. // Validate match.
  365. a := src[base:s]
  366. b := src[base-repeat : base-repeat+(s-base)]
  367. if !bytes.Equal(a, b) {
  368. panic("mismatch")
  369. }
  370. }
  371. nextEmit = s
  372. if s >= sLimit {
  373. goto emitRemainder
  374. }
  375. if d > dstLimit {
  376. // Do we have space for more, if not bail.
  377. return 0
  378. }
  379. // Check for an immediate match, otherwise start search at s+1
  380. x := load64(src, s-2)
  381. m2Hash := hash6(x, tableBits)
  382. currHash := hash6(x>>16, tableBits)
  383. candidate = int(table[currHash])
  384. table[m2Hash] = uint32(s - 2)
  385. table[currHash] = uint32(s)
  386. if uint32(x>>16) != load32(src, candidate) {
  387. cv = load64(src, s+1)
  388. s++
  389. break
  390. }
  391. }
  392. }
  393. emitRemainder:
  394. if nextEmit < len(src) {
  395. // Bail if we exceed the maximum size.
  396. if d+len(src)-nextEmit > dstLimit {
  397. return 0
  398. }
  399. d += emitLiteral(dst[d:], src[nextEmit:])
  400. }
  401. return d
  402. }
  403. // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
  404. // assumes that the varint-encoded length of the decompressed bytes has already
  405. // been written.
  406. //
  407. // It also assumes that:
  408. //
  409. // len(dst) >= MaxEncodedLen(len(src)) &&
  410. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  411. func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
  412. // Initialize the hash table.
  413. const (
  414. tableBits = 14
  415. maxTableSize = 1 << tableBits
  416. maxAhead = 8 // maximum bytes ahead without checking sLimit
  417. debug = false
  418. )
  419. dict.initFast()
  420. var table [maxTableSize]uint32
  421. // sLimit is when to stop looking for offset/length copies. The inputMargin
  422. // lets us use a fast path for emitLiteral in the main loop, while we are
  423. // looking for copies.
  424. sLimit := len(src) - inputMargin
  425. if sLimit > MaxDictSrcOffset-maxAhead {
  426. sLimit = MaxDictSrcOffset - maxAhead
  427. }
  428. // Bail if we can't compress to at least this.
  429. dstLimit := len(src) - len(src)>>5 - 5
  430. // nextEmit is where in src the next emitLiteral should start from.
  431. nextEmit := 0
  432. // The encoded form can start with a dict entry (copy or repeat).
  433. s := 0
  434. // Convert dict repeat to offset
  435. repeat := len(dict.dict) - dict.repeat
  436. cv := load64(src, 0)
  437. // While in dict
  438. searchDict:
  439. for {
  440. // Next src position to check
  441. nextS := s + (s-nextEmit)>>6 + 4
  442. hash0 := hash6(cv, tableBits)
  443. hash1 := hash6(cv>>8, tableBits)
  444. if nextS > sLimit {
  445. if debug {
  446. fmt.Println("slimit reached", s, nextS)
  447. }
  448. break searchDict
  449. }
  450. candidateDict := int(dict.fastTable[hash0])
  451. candidateDict2 := int(dict.fastTable[hash1])
  452. candidate2 := int(table[hash1])
  453. candidate := int(table[hash0])
  454. table[hash0] = uint32(s)
  455. table[hash1] = uint32(s + 1)
  456. hash2 := hash6(cv>>16, tableBits)
  457. // Check repeat at offset checkRep.
  458. const checkRep = 1
  459. if repeat > s {
  460. candidate := len(dict.dict) - repeat + s
  461. if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
  462. // Extend back
  463. base := s
  464. for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
  465. i--
  466. base--
  467. }
  468. d += emitLiteral(dst[d:], src[nextEmit:base])
  469. if debug && nextEmit != base {
  470. fmt.Println("emitted ", base-nextEmit, "literals")
  471. }
  472. s += 4
  473. candidate += 4
  474. for candidate < len(dict.dict)-8 && s <= len(src)-8 {
  475. if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
  476. s += bits.TrailingZeros64(diff) >> 3
  477. break
  478. }
  479. s += 8
  480. candidate += 8
  481. }
  482. d += emitRepeat(dst[d:], repeat, s-base)
  483. if debug {
  484. fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
  485. }
  486. nextEmit = s
  487. if s >= sLimit {
  488. break searchDict
  489. }
  490. cv = load64(src, s)
  491. continue
  492. }
  493. } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  494. base := s + checkRep
  495. // Extend back
  496. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  497. i--
  498. base--
  499. }
  500. d += emitLiteral(dst[d:], src[nextEmit:base])
  501. if debug && nextEmit != base {
  502. fmt.Println("emitted ", base-nextEmit, "literals")
  503. }
  504. // Extend forward
  505. candidate := s - repeat + 4 + checkRep
  506. s += 4 + checkRep
  507. for s <= sLimit {
  508. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  509. s += bits.TrailingZeros64(diff) >> 3
  510. break
  511. }
  512. s += 8
  513. candidate += 8
  514. }
  515. if debug {
  516. // Validate match.
  517. if s <= candidate {
  518. panic("s <= candidate")
  519. }
  520. a := src[base:s]
  521. b := src[base-repeat : base-repeat+(s-base)]
  522. if !bytes.Equal(a, b) {
  523. panic("mismatch")
  524. }
  525. }
  526. if nextEmit > 0 {
  527. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  528. d += emitRepeat(dst[d:], repeat, s-base)
  529. } else {
  530. // First match, cannot be repeat.
  531. d += emitCopy(dst[d:], repeat, s-base)
  532. }
  533. nextEmit = s
  534. if s >= sLimit {
  535. break searchDict
  536. }
  537. if debug {
  538. fmt.Println("emitted reg repeat", s-base, "s:", s)
  539. }
  540. cv = load64(src, s)
  541. continue searchDict
  542. }
  543. if s == 0 {
  544. cv = load64(src, nextS)
  545. s = nextS
  546. continue searchDict
  547. }
  548. // Start with table. These matches will always be closer.
  549. if uint32(cv) == load32(src, candidate) {
  550. goto emitMatch
  551. }
  552. candidate = int(table[hash2])
  553. if uint32(cv>>8) == load32(src, candidate2) {
  554. table[hash2] = uint32(s + 2)
  555. candidate = candidate2
  556. s++
  557. goto emitMatch
  558. }
  559. // Check dict. Dicts have longer offsets, so we want longer matches.
  560. if cv == load64(dict.dict, candidateDict) {
  561. table[hash2] = uint32(s + 2)
  562. goto emitDict
  563. }
  564. candidateDict = int(dict.fastTable[hash2])
  565. // Check if upper 7 bytes match
  566. if candidateDict2 >= 1 {
  567. if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
  568. table[hash2] = uint32(s + 2)
  569. candidateDict = candidateDict2
  570. s++
  571. goto emitDict
  572. }
  573. }
  574. table[hash2] = uint32(s + 2)
  575. if uint32(cv>>16) == load32(src, candidate) {
  576. s += 2
  577. goto emitMatch
  578. }
  579. if candidateDict >= 2 {
  580. // Check if upper 6 bytes match
  581. if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
  582. s += 2
  583. goto emitDict
  584. }
  585. }
  586. cv = load64(src, nextS)
  587. s = nextS
  588. continue searchDict
  589. emitDict:
  590. {
  591. if debug {
  592. if load32(dict.dict, candidateDict) != load32(src, s) {
  593. panic("dict emit mismatch")
  594. }
  595. }
  596. // Extend backwards.
  597. // The top bytes will be rechecked to get the full match.
  598. for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
  599. candidateDict--
  600. s--
  601. }
  602. // Bail if we exceed the maximum size.
  603. if d+(s-nextEmit) > dstLimit {
  604. return 0
  605. }
  606. // A 4-byte match has been found. We'll later see if more than 4 bytes
  607. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  608. // them as literal bytes.
  609. d += emitLiteral(dst[d:], src[nextEmit:s])
  610. if debug && nextEmit != s {
  611. fmt.Println("emitted ", s-nextEmit, "literals")
  612. }
  613. {
  614. // Invariant: we have a 4-byte match at s, and no need to emit any
  615. // literal bytes prior to s.
  616. base := s
  617. repeat = s + (len(dict.dict)) - candidateDict
  618. // Extend the 4-byte match as long as possible.
  619. s += 4
  620. candidateDict += 4
  621. for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
  622. if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
  623. s += bits.TrailingZeros64(diff) >> 3
  624. break
  625. }
  626. s += 8
  627. candidateDict += 8
  628. }
  629. // Matches longer than 64 are split.
  630. if s <= sLimit || s-base < 8 {
  631. d += emitCopy(dst[d:], repeat, s-base)
  632. } else {
  633. // Split to ensure we don't start a copy within next block
  634. d += emitCopy(dst[d:], repeat, 4)
  635. d += emitRepeat(dst[d:], repeat, s-base-4)
  636. }
  637. if false {
  638. // Validate match.
  639. if s <= candidate {
  640. panic("s <= candidate")
  641. }
  642. a := src[base:s]
  643. b := dict.dict[base-repeat : base-repeat+(s-base)]
  644. if !bytes.Equal(a, b) {
  645. panic("mismatch")
  646. }
  647. }
  648. if debug {
  649. fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
  650. }
  651. nextEmit = s
  652. if s >= sLimit {
  653. break searchDict
  654. }
  655. if d > dstLimit {
  656. // Do we have space for more, if not bail.
  657. return 0
  658. }
  659. // Index and continue loop to try new candidate.
  660. x := load64(src, s-2)
  661. m2Hash := hash6(x, tableBits)
  662. currHash := hash6(x>>8, tableBits)
  663. table[m2Hash] = uint32(s - 2)
  664. table[currHash] = uint32(s - 1)
  665. cv = load64(src, s)
  666. }
  667. continue
  668. }
  669. emitMatch:
  670. // Extend backwards.
  671. // The top bytes will be rechecked to get the full match.
  672. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  673. candidate--
  674. s--
  675. }
  676. // Bail if we exceed the maximum size.
  677. if d+(s-nextEmit) > dstLimit {
  678. return 0
  679. }
  680. // A 4-byte match has been found. We'll later see if more than 4 bytes
  681. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  682. // them as literal bytes.
  683. d += emitLiteral(dst[d:], src[nextEmit:s])
  684. if debug && nextEmit != s {
  685. fmt.Println("emitted ", s-nextEmit, "literals")
  686. }
  687. // Call emitCopy, and then see if another emitCopy could be our next
  688. // move. Repeat until we find no match for the input immediately after
  689. // what was consumed by the last emitCopy call.
  690. //
  691. // If we exit this loop normally then we need to call emitLiteral next,
  692. // though we don't yet know how big the literal will be. We handle that
  693. // by proceeding to the next iteration of the main loop. We also can
  694. // exit this loop via goto if we get close to exhausting the input.
  695. for {
  696. // Invariant: we have a 4-byte match at s, and no need to emit any
  697. // literal bytes prior to s.
  698. base := s
  699. repeat = base - candidate
  700. // Extend the 4-byte match as long as possible.
  701. s += 4
  702. candidate += 4
  703. for s <= len(src)-8 {
  704. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  705. s += bits.TrailingZeros64(diff) >> 3
  706. break
  707. }
  708. s += 8
  709. candidate += 8
  710. }
  711. d += emitCopy(dst[d:], repeat, s-base)
  712. if debug {
  713. // Validate match.
  714. if s <= candidate {
  715. panic("s <= candidate")
  716. }
  717. a := src[base:s]
  718. b := src[base-repeat : base-repeat+(s-base)]
  719. if !bytes.Equal(a, b) {
  720. panic("mismatch")
  721. }
  722. }
  723. if debug {
  724. fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
  725. }
  726. nextEmit = s
  727. if s >= sLimit {
  728. break searchDict
  729. }
  730. if d > dstLimit {
  731. // Do we have space for more, if not bail.
  732. return 0
  733. }
  734. // Check for an immediate match, otherwise start search at s+1
  735. x := load64(src, s-2)
  736. m2Hash := hash6(x, tableBits)
  737. currHash := hash6(x>>16, tableBits)
  738. candidate = int(table[currHash])
  739. table[m2Hash] = uint32(s - 2)
  740. table[currHash] = uint32(s)
  741. if debug && s == candidate {
  742. panic("s == candidate")
  743. }
  744. if uint32(x>>16) != load32(src, candidate) {
  745. cv = load64(src, s+1)
  746. s++
  747. break
  748. }
  749. }
  750. }
  751. // Search without dict:
  752. if repeat > s {
  753. repeat = 0
  754. }
  755. // No more dict
  756. sLimit = len(src) - inputMargin
  757. if s >= sLimit {
  758. goto emitRemainder
  759. }
  760. if debug {
  761. fmt.Println("non-dict matching at", s, "repeat:", repeat)
  762. }
  763. cv = load64(src, s)
  764. if debug {
  765. fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
  766. }
  767. for {
  768. candidate := 0
  769. for {
  770. // Next src position to check
  771. nextS := s + (s-nextEmit)>>6 + 4
  772. if nextS > sLimit {
  773. goto emitRemainder
  774. }
  775. hash0 := hash6(cv, tableBits)
  776. hash1 := hash6(cv>>8, tableBits)
  777. candidate = int(table[hash0])
  778. candidate2 := int(table[hash1])
  779. table[hash0] = uint32(s)
  780. table[hash1] = uint32(s + 1)
  781. hash2 := hash6(cv>>16, tableBits)
  782. // Check repeat at offset checkRep.
  783. const checkRep = 1
  784. if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  785. base := s + checkRep
  786. // Extend back
  787. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  788. i--
  789. base--
  790. }
  791. d += emitLiteral(dst[d:], src[nextEmit:base])
  792. if debug && nextEmit != base {
  793. fmt.Println("emitted ", base-nextEmit, "literals")
  794. }
  795. // Extend forward
  796. candidate := s - repeat + 4 + checkRep
  797. s += 4 + checkRep
  798. for s <= sLimit {
  799. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  800. s += bits.TrailingZeros64(diff) >> 3
  801. break
  802. }
  803. s += 8
  804. candidate += 8
  805. }
  806. if debug {
  807. // Validate match.
  808. if s <= candidate {
  809. panic("s <= candidate")
  810. }
  811. a := src[base:s]
  812. b := src[base-repeat : base-repeat+(s-base)]
  813. if !bytes.Equal(a, b) {
  814. panic("mismatch")
  815. }
  816. }
  817. if nextEmit > 0 {
  818. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  819. d += emitRepeat(dst[d:], repeat, s-base)
  820. } else {
  821. // First match, cannot be repeat.
  822. d += emitCopy(dst[d:], repeat, s-base)
  823. }
  824. if debug {
  825. fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
  826. }
  827. nextEmit = s
  828. if s >= sLimit {
  829. goto emitRemainder
  830. }
  831. cv = load64(src, s)
  832. continue
  833. }
  834. if uint32(cv) == load32(src, candidate) {
  835. break
  836. }
  837. candidate = int(table[hash2])
  838. if uint32(cv>>8) == load32(src, candidate2) {
  839. table[hash2] = uint32(s + 2)
  840. candidate = candidate2
  841. s++
  842. break
  843. }
  844. table[hash2] = uint32(s + 2)
  845. if uint32(cv>>16) == load32(src, candidate) {
  846. s += 2
  847. break
  848. }
  849. cv = load64(src, nextS)
  850. s = nextS
  851. }
  852. // Extend backwards.
  853. // The top bytes will be rechecked to get the full match.
  854. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  855. candidate--
  856. s--
  857. }
  858. // Bail if we exceed the maximum size.
  859. if d+(s-nextEmit) > dstLimit {
  860. return 0
  861. }
  862. // A 4-byte match has been found. We'll later see if more than 4 bytes
  863. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  864. // them as literal bytes.
  865. d += emitLiteral(dst[d:], src[nextEmit:s])
  866. if debug && nextEmit != s {
  867. fmt.Println("emitted ", s-nextEmit, "literals")
  868. }
  869. // Call emitCopy, and then see if another emitCopy could be our next
  870. // move. Repeat until we find no match for the input immediately after
  871. // what was consumed by the last emitCopy call.
  872. //
  873. // If we exit this loop normally then we need to call emitLiteral next,
  874. // though we don't yet know how big the literal will be. We handle that
  875. // by proceeding to the next iteration of the main loop. We also can
  876. // exit this loop via goto if we get close to exhausting the input.
  877. for {
  878. // Invariant: we have a 4-byte match at s, and no need to emit any
  879. // literal bytes prior to s.
  880. base := s
  881. repeat = base - candidate
  882. // Extend the 4-byte match as long as possible.
  883. s += 4
  884. candidate += 4
  885. for s <= len(src)-8 {
  886. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  887. s += bits.TrailingZeros64(diff) >> 3
  888. break
  889. }
  890. s += 8
  891. candidate += 8
  892. }
  893. d += emitCopy(dst[d:], repeat, s-base)
  894. if debug {
  895. // Validate match.
  896. if s <= candidate {
  897. panic("s <= candidate")
  898. }
  899. a := src[base:s]
  900. b := src[base-repeat : base-repeat+(s-base)]
  901. if !bytes.Equal(a, b) {
  902. panic("mismatch")
  903. }
  904. }
  905. if debug {
  906. fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
  907. }
  908. nextEmit = s
  909. if s >= sLimit {
  910. goto emitRemainder
  911. }
  912. if d > dstLimit {
  913. // Do we have space for more, if not bail.
  914. return 0
  915. }
  916. // Check for an immediate match, otherwise start search at s+1
  917. x := load64(src, s-2)
  918. m2Hash := hash6(x, tableBits)
  919. currHash := hash6(x>>16, tableBits)
  920. candidate = int(table[currHash])
  921. table[m2Hash] = uint32(s - 2)
  922. table[currHash] = uint32(s)
  923. if debug && s == candidate {
  924. panic("s == candidate")
  925. }
  926. if uint32(x>>16) != load32(src, candidate) {
  927. cv = load64(src, s+1)
  928. s++
  929. break
  930. }
  931. }
  932. }
  933. emitRemainder:
  934. if nextEmit < len(src) {
  935. // Bail if we exceed the maximum size.
  936. if d+len(src)-nextEmit > dstLimit {
  937. return 0
  938. }
  939. d += emitLiteral(dst[d:], src[nextEmit:])
  940. if debug && nextEmit != s {
  941. fmt.Println("emitted ", len(src)-nextEmit, "literals")
  942. }
  943. }
  944. return d
  945. }