enc_fast.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "fmt"
  7. )
  8. const (
  9. tableBits = 15 // Bits used in the table
  10. tableSize = 1 << tableBits // Size of the table
  11. tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
  12. tableShardSize = tableSize / tableShardCnt // Size of an individual shard
  13. tableFastHashLen = 6
  14. tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
  15. maxMatchLength = 131074
  16. )
  17. type tableEntry struct {
  18. val uint32
  19. offset int32
  20. }
  21. type fastEncoder struct {
  22. fastBase
  23. table [tableSize]tableEntry
  24. }
  25. type fastEncoderDict struct {
  26. fastEncoder
  27. dictTable []tableEntry
  28. tableShardDirty [tableShardCnt]bool
  29. allDirty bool
  30. }
  31. // Encode mimmics functionality in zstd_fast.c
  32. func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
  33. const (
  34. inputMargin = 8
  35. minNonLiteralBlockSize = 1 + 1 + inputMargin
  36. )
  37. // Protect against e.cur wraparound.
  38. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  39. if len(e.hist) == 0 {
  40. for i := range e.table[:] {
  41. e.table[i] = tableEntry{}
  42. }
  43. e.cur = e.maxMatchOff
  44. break
  45. }
  46. // Shift down everything in the table that isn't already too far away.
  47. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  48. for i := range e.table[:] {
  49. v := e.table[i].offset
  50. if v < minOff {
  51. v = 0
  52. } else {
  53. v = v - e.cur + e.maxMatchOff
  54. }
  55. e.table[i].offset = v
  56. }
  57. e.cur = e.maxMatchOff
  58. break
  59. }
  60. s := e.addBlock(src)
  61. blk.size = len(src)
  62. if len(src) < minNonLiteralBlockSize {
  63. blk.extraLits = len(src)
  64. blk.literals = blk.literals[:len(src)]
  65. copy(blk.literals, src)
  66. return
  67. }
  68. // Override src
  69. src = e.hist
  70. sLimit := int32(len(src)) - inputMargin
  71. // stepSize is the number of bytes to skip on every main loop iteration.
  72. // It should be >= 2.
  73. const stepSize = 2
  74. // TEMPLATE
  75. const hashLog = tableBits
  76. // seems global, but would be nice to tweak.
  77. const kSearchStrength = 6
  78. // nextEmit is where in src the next emitLiteral should start from.
  79. nextEmit := s
  80. cv := load6432(src, s)
  81. // Relative offsets
  82. offset1 := int32(blk.recentOffsets[0])
  83. offset2 := int32(blk.recentOffsets[1])
  84. addLiterals := func(s *seq, until int32) {
  85. if until == nextEmit {
  86. return
  87. }
  88. blk.literals = append(blk.literals, src[nextEmit:until]...)
  89. s.litLen = uint32(until - nextEmit)
  90. }
  91. if debugEncoder {
  92. println("recent offsets:", blk.recentOffsets)
  93. }
  94. encodeLoop:
  95. for {
  96. // t will contain the match offset when we find one.
  97. // When existing the search loop, we have already checked 4 bytes.
  98. var t int32
  99. // We will not use repeat offsets across blocks.
  100. // By not using them for the first 3 matches
  101. canRepeat := len(blk.sequences) > 2
  102. for {
  103. if debugAsserts && canRepeat && offset1 == 0 {
  104. panic("offset0 was 0")
  105. }
  106. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  107. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  108. candidate := e.table[nextHash]
  109. candidate2 := e.table[nextHash2]
  110. repIndex := s - offset1 + 2
  111. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  112. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  113. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  114. // Consider history as well.
  115. var seq seq
  116. length := 4 + e.matchlen(s+6, repIndex+4, src)
  117. seq.matchLen = uint32(length - zstdMinMatch)
  118. // We might be able to match backwards.
  119. // Extend as long as we can.
  120. start := s + 2
  121. // We end the search early, so we don't risk 0 literals
  122. // and have to do special offset treatment.
  123. startLimit := nextEmit + 1
  124. sMin := s - e.maxMatchOff
  125. if sMin < 0 {
  126. sMin = 0
  127. }
  128. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  129. repIndex--
  130. start--
  131. seq.matchLen++
  132. }
  133. addLiterals(&seq, start)
  134. // rep 0
  135. seq.offset = 1
  136. if debugSequences {
  137. println("repeat sequence", seq, "next s:", s)
  138. }
  139. blk.sequences = append(blk.sequences, seq)
  140. s += length + 2
  141. nextEmit = s
  142. if s >= sLimit {
  143. if debugEncoder {
  144. println("repeat ended", s, length)
  145. }
  146. break encodeLoop
  147. }
  148. cv = load6432(src, s)
  149. continue
  150. }
  151. coffset0 := s - (candidate.offset - e.cur)
  152. coffset1 := s - (candidate2.offset - e.cur) + 1
  153. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  154. // found a regular match
  155. t = candidate.offset - e.cur
  156. if debugAsserts && s <= t {
  157. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  158. }
  159. if debugAsserts && s-t > e.maxMatchOff {
  160. panic("s - t >e.maxMatchOff")
  161. }
  162. break
  163. }
  164. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  165. // found a regular match
  166. t = candidate2.offset - e.cur
  167. s++
  168. if debugAsserts && s <= t {
  169. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  170. }
  171. if debugAsserts && s-t > e.maxMatchOff {
  172. panic("s - t >e.maxMatchOff")
  173. }
  174. if debugAsserts && t < 0 {
  175. panic("t<0")
  176. }
  177. break
  178. }
  179. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  180. if s >= sLimit {
  181. break encodeLoop
  182. }
  183. cv = load6432(src, s)
  184. }
  185. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  186. offset2 = offset1
  187. offset1 = s - t
  188. if debugAsserts && s <= t {
  189. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  190. }
  191. if debugAsserts && canRepeat && int(offset1) > len(src) {
  192. panic("invalid offset")
  193. }
  194. // Extend the 4-byte match as long as possible.
  195. l := e.matchlen(s+4, t+4, src) + 4
  196. // Extend backwards
  197. tMin := s - e.maxMatchOff
  198. if tMin < 0 {
  199. tMin = 0
  200. }
  201. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  202. s--
  203. t--
  204. l++
  205. }
  206. // Write our sequence.
  207. var seq seq
  208. seq.litLen = uint32(s - nextEmit)
  209. seq.matchLen = uint32(l - zstdMinMatch)
  210. if seq.litLen > 0 {
  211. blk.literals = append(blk.literals, src[nextEmit:s]...)
  212. }
  213. // Don't use repeat offsets
  214. seq.offset = uint32(s-t) + 3
  215. s += l
  216. if debugSequences {
  217. println("sequence", seq, "next s:", s)
  218. }
  219. blk.sequences = append(blk.sequences, seq)
  220. nextEmit = s
  221. if s >= sLimit {
  222. break encodeLoop
  223. }
  224. cv = load6432(src, s)
  225. // Check offset 2
  226. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  227. // We have at least 4 byte match.
  228. // No need to check backwards. We come straight from a match
  229. l := 4 + e.matchlen(s+4, o2+4, src)
  230. // Store this, since we have it.
  231. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  232. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  233. seq.matchLen = uint32(l) - zstdMinMatch
  234. seq.litLen = 0
  235. // Since litlen is always 0, this is offset 1.
  236. seq.offset = 1
  237. s += l
  238. nextEmit = s
  239. if debugSequences {
  240. println("sequence", seq, "next s:", s)
  241. }
  242. blk.sequences = append(blk.sequences, seq)
  243. // Swap offset 1 and 2.
  244. offset1, offset2 = offset2, offset1
  245. if s >= sLimit {
  246. break encodeLoop
  247. }
  248. // Prepare next loop.
  249. cv = load6432(src, s)
  250. }
  251. }
  252. if int(nextEmit) < len(src) {
  253. blk.literals = append(blk.literals, src[nextEmit:]...)
  254. blk.extraLits = len(src) - int(nextEmit)
  255. }
  256. blk.recentOffsets[0] = uint32(offset1)
  257. blk.recentOffsets[1] = uint32(offset2)
  258. if debugEncoder {
  259. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  260. }
  261. }
  262. // EncodeNoHist will encode a block with no history and no following blocks.
  263. // Most notable difference is that src will not be copied for history and
  264. // we do not need to check for max match length.
  265. func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  266. const (
  267. inputMargin = 8
  268. minNonLiteralBlockSize = 1 + 1 + inputMargin
  269. )
  270. if debugEncoder {
  271. if len(src) > maxCompressedBlockSize {
  272. panic("src too big")
  273. }
  274. }
  275. // Protect against e.cur wraparound.
  276. if e.cur >= e.bufferReset {
  277. for i := range e.table[:] {
  278. e.table[i] = tableEntry{}
  279. }
  280. e.cur = e.maxMatchOff
  281. }
  282. s := int32(0)
  283. blk.size = len(src)
  284. if len(src) < minNonLiteralBlockSize {
  285. blk.extraLits = len(src)
  286. blk.literals = blk.literals[:len(src)]
  287. copy(blk.literals, src)
  288. return
  289. }
  290. sLimit := int32(len(src)) - inputMargin
  291. // stepSize is the number of bytes to skip on every main loop iteration.
  292. // It should be >= 2.
  293. const stepSize = 2
  294. // TEMPLATE
  295. const hashLog = tableBits
  296. // seems global, but would be nice to tweak.
  297. const kSearchStrength = 6
  298. // nextEmit is where in src the next emitLiteral should start from.
  299. nextEmit := s
  300. cv := load6432(src, s)
  301. // Relative offsets
  302. offset1 := int32(blk.recentOffsets[0])
  303. offset2 := int32(blk.recentOffsets[1])
  304. addLiterals := func(s *seq, until int32) {
  305. if until == nextEmit {
  306. return
  307. }
  308. blk.literals = append(blk.literals, src[nextEmit:until]...)
  309. s.litLen = uint32(until - nextEmit)
  310. }
  311. if debugEncoder {
  312. println("recent offsets:", blk.recentOffsets)
  313. }
  314. encodeLoop:
  315. for {
  316. // t will contain the match offset when we find one.
  317. // When existing the search loop, we have already checked 4 bytes.
  318. var t int32
  319. // We will not use repeat offsets across blocks.
  320. // By not using them for the first 3 matches
  321. for {
  322. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  323. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  324. candidate := e.table[nextHash]
  325. candidate2 := e.table[nextHash2]
  326. repIndex := s - offset1 + 2
  327. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  328. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  329. if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
  330. // Consider history as well.
  331. var seq seq
  332. length := 4 + e.matchlen(s+6, repIndex+4, src)
  333. seq.matchLen = uint32(length - zstdMinMatch)
  334. // We might be able to match backwards.
  335. // Extend as long as we can.
  336. start := s + 2
  337. // We end the search early, so we don't risk 0 literals
  338. // and have to do special offset treatment.
  339. startLimit := nextEmit + 1
  340. sMin := s - e.maxMatchOff
  341. if sMin < 0 {
  342. sMin = 0
  343. }
  344. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
  345. repIndex--
  346. start--
  347. seq.matchLen++
  348. }
  349. addLiterals(&seq, start)
  350. // rep 0
  351. seq.offset = 1
  352. if debugSequences {
  353. println("repeat sequence", seq, "next s:", s)
  354. }
  355. blk.sequences = append(blk.sequences, seq)
  356. s += length + 2
  357. nextEmit = s
  358. if s >= sLimit {
  359. if debugEncoder {
  360. println("repeat ended", s, length)
  361. }
  362. break encodeLoop
  363. }
  364. cv = load6432(src, s)
  365. continue
  366. }
  367. coffset0 := s - (candidate.offset - e.cur)
  368. coffset1 := s - (candidate2.offset - e.cur) + 1
  369. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  370. // found a regular match
  371. t = candidate.offset - e.cur
  372. if debugAsserts && s <= t {
  373. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  374. }
  375. if debugAsserts && s-t > e.maxMatchOff {
  376. panic("s - t >e.maxMatchOff")
  377. }
  378. if debugAsserts && t < 0 {
  379. panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
  380. }
  381. break
  382. }
  383. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  384. // found a regular match
  385. t = candidate2.offset - e.cur
  386. s++
  387. if debugAsserts && s <= t {
  388. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  389. }
  390. if debugAsserts && s-t > e.maxMatchOff {
  391. panic("s - t >e.maxMatchOff")
  392. }
  393. if debugAsserts && t < 0 {
  394. panic("t<0")
  395. }
  396. break
  397. }
  398. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  399. if s >= sLimit {
  400. break encodeLoop
  401. }
  402. cv = load6432(src, s)
  403. }
  404. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  405. offset2 = offset1
  406. offset1 = s - t
  407. if debugAsserts && s <= t {
  408. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  409. }
  410. if debugAsserts && t < 0 {
  411. panic(fmt.Sprintf("t (%d) < 0 ", t))
  412. }
  413. // Extend the 4-byte match as long as possible.
  414. l := e.matchlen(s+4, t+4, src) + 4
  415. // Extend backwards
  416. tMin := s - e.maxMatchOff
  417. if tMin < 0 {
  418. tMin = 0
  419. }
  420. for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
  421. s--
  422. t--
  423. l++
  424. }
  425. // Write our sequence.
  426. var seq seq
  427. seq.litLen = uint32(s - nextEmit)
  428. seq.matchLen = uint32(l - zstdMinMatch)
  429. if seq.litLen > 0 {
  430. blk.literals = append(blk.literals, src[nextEmit:s]...)
  431. }
  432. // Don't use repeat offsets
  433. seq.offset = uint32(s-t) + 3
  434. s += l
  435. if debugSequences {
  436. println("sequence", seq, "next s:", s)
  437. }
  438. blk.sequences = append(blk.sequences, seq)
  439. nextEmit = s
  440. if s >= sLimit {
  441. break encodeLoop
  442. }
  443. cv = load6432(src, s)
  444. // Check offset 2
  445. if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
  446. // We have at least 4 byte match.
  447. // No need to check backwards. We come straight from a match
  448. l := 4 + e.matchlen(s+4, o2+4, src)
  449. // Store this, since we have it.
  450. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  451. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  452. seq.matchLen = uint32(l) - zstdMinMatch
  453. seq.litLen = 0
  454. // Since litlen is always 0, this is offset 1.
  455. seq.offset = 1
  456. s += l
  457. nextEmit = s
  458. if debugSequences {
  459. println("sequence", seq, "next s:", s)
  460. }
  461. blk.sequences = append(blk.sequences, seq)
  462. // Swap offset 1 and 2.
  463. offset1, offset2 = offset2, offset1
  464. if s >= sLimit {
  465. break encodeLoop
  466. }
  467. // Prepare next loop.
  468. cv = load6432(src, s)
  469. }
  470. }
  471. if int(nextEmit) < len(src) {
  472. blk.literals = append(blk.literals, src[nextEmit:]...)
  473. blk.extraLits = len(src) - int(nextEmit)
  474. }
  475. if debugEncoder {
  476. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  477. }
  478. // We do not store history, so we must offset e.cur to avoid false matches for next user.
  479. if e.cur < e.bufferReset {
  480. e.cur += int32(len(src))
  481. }
  482. }
  483. // Encode will encode the content, with a dictionary if initialized for it.
  484. func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
  485. const (
  486. inputMargin = 8
  487. minNonLiteralBlockSize = 1 + 1 + inputMargin
  488. )
  489. if e.allDirty || len(src) > 32<<10 {
  490. e.fastEncoder.Encode(blk, src)
  491. e.allDirty = true
  492. return
  493. }
  494. // Protect against e.cur wraparound.
  495. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  496. if len(e.hist) == 0 {
  497. e.table = [tableSize]tableEntry{}
  498. e.cur = e.maxMatchOff
  499. break
  500. }
  501. // Shift down everything in the table that isn't already too far away.
  502. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  503. for i := range e.table[:] {
  504. v := e.table[i].offset
  505. if v < minOff {
  506. v = 0
  507. } else {
  508. v = v - e.cur + e.maxMatchOff
  509. }
  510. e.table[i].offset = v
  511. }
  512. e.cur = e.maxMatchOff
  513. break
  514. }
  515. s := e.addBlock(src)
  516. blk.size = len(src)
  517. if len(src) < minNonLiteralBlockSize {
  518. blk.extraLits = len(src)
  519. blk.literals = blk.literals[:len(src)]
  520. copy(blk.literals, src)
  521. return
  522. }
  523. // Override src
  524. src = e.hist
  525. sLimit := int32(len(src)) - inputMargin
  526. // stepSize is the number of bytes to skip on every main loop iteration.
  527. // It should be >= 2.
  528. const stepSize = 2
  529. // TEMPLATE
  530. const hashLog = tableBits
  531. // seems global, but would be nice to tweak.
  532. const kSearchStrength = 7
  533. // nextEmit is where in src the next emitLiteral should start from.
  534. nextEmit := s
  535. cv := load6432(src, s)
  536. // Relative offsets
  537. offset1 := int32(blk.recentOffsets[0])
  538. offset2 := int32(blk.recentOffsets[1])
  539. addLiterals := func(s *seq, until int32) {
  540. if until == nextEmit {
  541. return
  542. }
  543. blk.literals = append(blk.literals, src[nextEmit:until]...)
  544. s.litLen = uint32(until - nextEmit)
  545. }
  546. if debugEncoder {
  547. println("recent offsets:", blk.recentOffsets)
  548. }
  549. encodeLoop:
  550. for {
  551. // t will contain the match offset when we find one.
  552. // When existing the search loop, we have already checked 4 bytes.
  553. var t int32
  554. // We will not use repeat offsets across blocks.
  555. // By not using them for the first 3 matches
  556. canRepeat := len(blk.sequences) > 2
  557. for {
  558. if debugAsserts && canRepeat && offset1 == 0 {
  559. panic("offset0 was 0")
  560. }
  561. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  562. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  563. candidate := e.table[nextHash]
  564. candidate2 := e.table[nextHash2]
  565. repIndex := s - offset1 + 2
  566. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  567. e.markShardDirty(nextHash)
  568. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  569. e.markShardDirty(nextHash2)
  570. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  571. // Consider history as well.
  572. var seq seq
  573. length := 4 + e.matchlen(s+6, repIndex+4, src)
  574. seq.matchLen = uint32(length - zstdMinMatch)
  575. // We might be able to match backwards.
  576. // Extend as long as we can.
  577. start := s + 2
  578. // We end the search early, so we don't risk 0 literals
  579. // and have to do special offset treatment.
  580. startLimit := nextEmit + 1
  581. sMin := s - e.maxMatchOff
  582. if sMin < 0 {
  583. sMin = 0
  584. }
  585. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  586. repIndex--
  587. start--
  588. seq.matchLen++
  589. }
  590. addLiterals(&seq, start)
  591. // rep 0
  592. seq.offset = 1
  593. if debugSequences {
  594. println("repeat sequence", seq, "next s:", s)
  595. }
  596. blk.sequences = append(blk.sequences, seq)
  597. s += length + 2
  598. nextEmit = s
  599. if s >= sLimit {
  600. if debugEncoder {
  601. println("repeat ended", s, length)
  602. }
  603. break encodeLoop
  604. }
  605. cv = load6432(src, s)
  606. continue
  607. }
  608. coffset0 := s - (candidate.offset - e.cur)
  609. coffset1 := s - (candidate2.offset - e.cur) + 1
  610. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  611. // found a regular match
  612. t = candidate.offset - e.cur
  613. if debugAsserts && s <= t {
  614. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  615. }
  616. if debugAsserts && s-t > e.maxMatchOff {
  617. panic("s - t >e.maxMatchOff")
  618. }
  619. break
  620. }
  621. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  622. // found a regular match
  623. t = candidate2.offset - e.cur
  624. s++
  625. if debugAsserts && s <= t {
  626. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  627. }
  628. if debugAsserts && s-t > e.maxMatchOff {
  629. panic("s - t >e.maxMatchOff")
  630. }
  631. if debugAsserts && t < 0 {
  632. panic("t<0")
  633. }
  634. break
  635. }
  636. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  637. if s >= sLimit {
  638. break encodeLoop
  639. }
  640. cv = load6432(src, s)
  641. }
  642. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  643. offset2 = offset1
  644. offset1 = s - t
  645. if debugAsserts && s <= t {
  646. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  647. }
  648. if debugAsserts && canRepeat && int(offset1) > len(src) {
  649. panic("invalid offset")
  650. }
  651. // Extend the 4-byte match as long as possible.
  652. l := e.matchlen(s+4, t+4, src) + 4
  653. // Extend backwards
  654. tMin := s - e.maxMatchOff
  655. if tMin < 0 {
  656. tMin = 0
  657. }
  658. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  659. s--
  660. t--
  661. l++
  662. }
  663. // Write our sequence.
  664. var seq seq
  665. seq.litLen = uint32(s - nextEmit)
  666. seq.matchLen = uint32(l - zstdMinMatch)
  667. if seq.litLen > 0 {
  668. blk.literals = append(blk.literals, src[nextEmit:s]...)
  669. }
  670. // Don't use repeat offsets
  671. seq.offset = uint32(s-t) + 3
  672. s += l
  673. if debugSequences {
  674. println("sequence", seq, "next s:", s)
  675. }
  676. blk.sequences = append(blk.sequences, seq)
  677. nextEmit = s
  678. if s >= sLimit {
  679. break encodeLoop
  680. }
  681. cv = load6432(src, s)
  682. // Check offset 2
  683. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  684. // We have at least 4 byte match.
  685. // No need to check backwards. We come straight from a match
  686. l := 4 + e.matchlen(s+4, o2+4, src)
  687. // Store this, since we have it.
  688. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  689. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  690. e.markShardDirty(nextHash)
  691. seq.matchLen = uint32(l) - zstdMinMatch
  692. seq.litLen = 0
  693. // Since litlen is always 0, this is offset 1.
  694. seq.offset = 1
  695. s += l
  696. nextEmit = s
  697. if debugSequences {
  698. println("sequence", seq, "next s:", s)
  699. }
  700. blk.sequences = append(blk.sequences, seq)
  701. // Swap offset 1 and 2.
  702. offset1, offset2 = offset2, offset1
  703. if s >= sLimit {
  704. break encodeLoop
  705. }
  706. // Prepare next loop.
  707. cv = load6432(src, s)
  708. }
  709. }
  710. if int(nextEmit) < len(src) {
  711. blk.literals = append(blk.literals, src[nextEmit:]...)
  712. blk.extraLits = len(src) - int(nextEmit)
  713. }
  714. blk.recentOffsets[0] = uint32(offset1)
  715. blk.recentOffsets[1] = uint32(offset2)
  716. if debugEncoder {
  717. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  718. }
  719. }
  720. // ResetDict will reset and set a dictionary if not nil
  721. func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
  722. e.resetBase(d, singleBlock)
  723. if d != nil {
  724. panic("fastEncoder: Reset with dict")
  725. }
  726. }
  727. // ResetDict will reset and set a dictionary if not nil
  728. func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
  729. e.resetBase(d, singleBlock)
  730. if d == nil {
  731. return
  732. }
  733. // Init or copy dict table
  734. if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
  735. if len(e.dictTable) != len(e.table) {
  736. e.dictTable = make([]tableEntry, len(e.table))
  737. }
  738. if true {
  739. end := e.maxMatchOff + int32(len(d.content)) - 8
  740. for i := e.maxMatchOff; i < end; i += 2 {
  741. const hashLog = tableBits
  742. cv := load6432(d.content, i-e.maxMatchOff)
  743. nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 6
  744. nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
  745. e.dictTable[nextHash] = tableEntry{
  746. val: uint32(cv),
  747. offset: i,
  748. }
  749. e.dictTable[nextHash1] = tableEntry{
  750. val: uint32(cv >> 8),
  751. offset: i + 1,
  752. }
  753. }
  754. }
  755. e.lastDictID = d.id
  756. e.allDirty = true
  757. }
  758. e.cur = e.maxMatchOff
  759. dirtyShardCnt := 0
  760. if !e.allDirty {
  761. for i := range e.tableShardDirty {
  762. if e.tableShardDirty[i] {
  763. dirtyShardCnt++
  764. }
  765. }
  766. }
  767. const shardCnt = tableShardCnt
  768. const shardSize = tableShardSize
  769. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  770. //copy(e.table[:], e.dictTable)
  771. e.table = *(*[tableSize]tableEntry)(e.dictTable)
  772. for i := range e.tableShardDirty {
  773. e.tableShardDirty[i] = false
  774. }
  775. e.allDirty = false
  776. return
  777. }
  778. for i := range e.tableShardDirty {
  779. if !e.tableShardDirty[i] {
  780. continue
  781. }
  782. //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
  783. *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
  784. e.tableShardDirty[i] = false
  785. }
  786. e.allDirty = false
  787. }
  788. func (e *fastEncoderDict) markAllShardsDirty() {
  789. e.allDirty = true
  790. }
  791. func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
  792. e.tableShardDirty[entryNum/tableShardSize] = true
  793. }