zstd_opt.c 66 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472
  1. /*
  2. * Copyright (c) Meta Platforms, Inc. and affiliates.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. #include "zstd_compress_internal.h"
  11. #include "hist.h"
  12. #include "zstd_opt.h"
  13. #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
  14. #define ZSTD_MAX_PRICE (1<<30)
  15. #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
  16. /*-*************************************
  17. * Price functions for optimal parser
  18. ***************************************/
  19. #if 0 /* approximation at bit level (for tests) */
  20. # define BITCOST_ACCURACY 0
  21. # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
  22. # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
  23. #elif 0 /* fractional bit accuracy (for tests) */
  24. # define BITCOST_ACCURACY 8
  25. # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
  26. # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
  27. #else /* opt==approx, ultra==accurate */
  28. # define BITCOST_ACCURACY 8
  29. # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
  30. # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
  31. #endif
  32. /* ZSTD_bitWeight() :
  33. * provide estimated "cost" of a stat in full bits only */
  34. MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
  35. {
  36. return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
  37. }
  38. /* ZSTD_fracWeight() :
  39. * provide fractional-bit "cost" of a stat,
  40. * using linear interpolation approximation */
  41. MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
  42. {
  43. U32 const stat = rawStat + 1;
  44. U32 const hb = ZSTD_highbit32(stat);
  45. U32 const BWeight = hb * BITCOST_MULTIPLIER;
  46. /* Fweight was meant for "Fractional weight"
  47. * but it's effectively a value between 1 and 2
  48. * using fixed point arithmetic */
  49. U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
  50. U32 const weight = BWeight + FWeight;
  51. assert(hb + BITCOST_ACCURACY < 31);
  52. return weight;
  53. }
  54. #if (DEBUGLEVEL>=2)
  55. /* debugging function,
  56. * @return price in bytes as fractional value
  57. * for debug messages only */
  58. MEM_STATIC double ZSTD_fCost(int price)
  59. {
  60. return (double)price / (BITCOST_MULTIPLIER*8);
  61. }
  62. #endif
  63. static int ZSTD_compressedLiterals(optState_t const* const optPtr)
  64. {
  65. return optPtr->literalCompressionMode != ZSTD_ps_disable;
  66. }
  67. static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
  68. {
  69. if (ZSTD_compressedLiterals(optPtr))
  70. optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
  71. optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
  72. optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
  73. optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
  74. }
  75. static U32 sum_u32(const unsigned table[], size_t nbElts)
  76. {
  77. size_t n;
  78. U32 total = 0;
  79. for (n=0; n<nbElts; n++) {
  80. total += table[n];
  81. }
  82. return total;
  83. }
  84. typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
  85. static U32
  86. ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
  87. {
  88. U32 s, sum=0;
  89. DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
  90. (unsigned)lastEltIndex+1, (unsigned)shift );
  91. assert(shift < 30);
  92. for (s=0; s<lastEltIndex+1; s++) {
  93. unsigned const base = base1 ? 1 : (table[s]>0);
  94. unsigned const newStat = base + (table[s] >> shift);
  95. sum += newStat;
  96. table[s] = newStat;
  97. }
  98. return sum;
  99. }
  100. /* ZSTD_scaleStats() :
  101. * reduce all elt frequencies in table if sum too large
  102. * return the resulting sum of elements */
  103. static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
  104. {
  105. U32 const prevsum = sum_u32(table, lastEltIndex+1);
  106. U32 const factor = prevsum >> logTarget;
  107. DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
  108. assert(logTarget < 30);
  109. if (factor <= 1) return prevsum;
  110. return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
  111. }
  112. /* ZSTD_rescaleFreqs() :
  113. * if first block (detected by optPtr->litLengthSum == 0) : init statistics
  114. * take hints from dictionary if there is one
  115. * and init from zero if there is none,
  116. * using src for literals stats, and baseline stats for sequence symbols
  117. * otherwise downscale existing stats, to be used as seed for next block.
  118. */
  119. static void
  120. ZSTD_rescaleFreqs(optState_t* const optPtr,
  121. const BYTE* const src, size_t const srcSize,
  122. int const optLevel)
  123. {
  124. int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
  125. DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
  126. optPtr->priceType = zop_dynamic;
  127. if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
  128. /* heuristic: use pre-defined stats for too small inputs */
  129. if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
  130. DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
  131. optPtr->priceType = zop_predef;
  132. }
  133. assert(optPtr->symbolCosts != NULL);
  134. if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
  135. /* huffman stats covering the full value set : table presumed generated by dictionary */
  136. optPtr->priceType = zop_dynamic;
  137. if (compressedLiterals) {
  138. /* generate literals statistics from huffman table */
  139. unsigned lit;
  140. assert(optPtr->litFreq != NULL);
  141. optPtr->litSum = 0;
  142. for (lit=0; lit<=MaxLit; lit++) {
  143. U32 const scaleLog = 11; /* scale to 2K */
  144. U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
  145. assert(bitCost <= scaleLog);
  146. optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
  147. optPtr->litSum += optPtr->litFreq[lit];
  148. } }
  149. { unsigned ll;
  150. FSE_CState_t llstate;
  151. FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
  152. optPtr->litLengthSum = 0;
  153. for (ll=0; ll<=MaxLL; ll++) {
  154. U32 const scaleLog = 10; /* scale to 1K */
  155. U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
  156. assert(bitCost < scaleLog);
  157. optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
  158. optPtr->litLengthSum += optPtr->litLengthFreq[ll];
  159. } }
  160. { unsigned ml;
  161. FSE_CState_t mlstate;
  162. FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
  163. optPtr->matchLengthSum = 0;
  164. for (ml=0; ml<=MaxML; ml++) {
  165. U32 const scaleLog = 10;
  166. U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
  167. assert(bitCost < scaleLog);
  168. optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
  169. optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
  170. } }
  171. { unsigned of;
  172. FSE_CState_t ofstate;
  173. FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
  174. optPtr->offCodeSum = 0;
  175. for (of=0; of<=MaxOff; of++) {
  176. U32 const scaleLog = 10;
  177. U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
  178. assert(bitCost < scaleLog);
  179. optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
  180. optPtr->offCodeSum += optPtr->offCodeFreq[of];
  181. } }
  182. } else { /* first block, no dictionary */
  183. assert(optPtr->litFreq != NULL);
  184. if (compressedLiterals) {
  185. /* base initial cost of literals on direct frequency within src */
  186. unsigned lit = MaxLit;
  187. HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
  188. optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
  189. }
  190. { unsigned const baseLLfreqs[MaxLL+1] = {
  191. 4, 2, 1, 1, 1, 1, 1, 1,
  192. 1, 1, 1, 1, 1, 1, 1, 1,
  193. 1, 1, 1, 1, 1, 1, 1, 1,
  194. 1, 1, 1, 1, 1, 1, 1, 1,
  195. 1, 1, 1, 1
  196. };
  197. ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
  198. optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
  199. }
  200. { unsigned ml;
  201. for (ml=0; ml<=MaxML; ml++)
  202. optPtr->matchLengthFreq[ml] = 1;
  203. }
  204. optPtr->matchLengthSum = MaxML+1;
  205. { unsigned const baseOFCfreqs[MaxOff+1] = {
  206. 6, 2, 1, 1, 2, 3, 4, 4,
  207. 4, 3, 2, 1, 1, 1, 1, 1,
  208. 1, 1, 1, 1, 1, 1, 1, 1,
  209. 1, 1, 1, 1, 1, 1, 1, 1
  210. };
  211. ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
  212. optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
  213. }
  214. }
  215. } else { /* new block : scale down accumulated statistics */
  216. if (compressedLiterals)
  217. optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
  218. optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
  219. optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
  220. optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
  221. }
  222. ZSTD_setBasePrices(optPtr, optLevel);
  223. }
  224. /* ZSTD_rawLiteralsCost() :
  225. * price of literals (only) in specified segment (which length can be 0).
  226. * does not include price of literalLength symbol */
  227. static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
  228. const optState_t* const optPtr,
  229. int optLevel)
  230. {
  231. if (litLength == 0) return 0;
  232. if (!ZSTD_compressedLiterals(optPtr))
  233. return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */
  234. if (optPtr->priceType == zop_predef)
  235. return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
  236. /* dynamic statistics */
  237. { U32 price = optPtr->litSumBasePrice * litLength;
  238. U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
  239. U32 u;
  240. assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
  241. for (u=0; u < litLength; u++) {
  242. U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
  243. if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
  244. price -= litPrice;
  245. }
  246. return price;
  247. }
  248. }
  249. /* ZSTD_litLengthPrice() :
  250. * cost of literalLength symbol */
  251. static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
  252. {
  253. assert(litLength <= ZSTD_BLOCKSIZE_MAX);
  254. if (optPtr->priceType == zop_predef)
  255. return WEIGHT(litLength, optLevel);
  256. /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
  257. * because it isn't representable in the zstd format.
  258. * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
  259. * In such a case, the block would be all literals.
  260. */
  261. if (litLength == ZSTD_BLOCKSIZE_MAX)
  262. return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
  263. /* dynamic statistics */
  264. { U32 const llCode = ZSTD_LLcode(litLength);
  265. return (LL_bits[llCode] * BITCOST_MULTIPLIER)
  266. + optPtr->litLengthSumBasePrice
  267. - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
  268. }
  269. }
  270. /* ZSTD_getMatchPrice() :
  271. * Provides the cost of the match part (offset + matchLength) of a sequence.
  272. * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
  273. * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
  274. * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
  275. */
  276. FORCE_INLINE_TEMPLATE U32
  277. ZSTD_getMatchPrice(U32 const offBase,
  278. U32 const matchLength,
  279. const optState_t* const optPtr,
  280. int const optLevel)
  281. {
  282. U32 price;
  283. U32 const offCode = ZSTD_highbit32(offBase);
  284. U32 const mlBase = matchLength - MINMATCH;
  285. assert(matchLength >= MINMATCH);
  286. if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
  287. return WEIGHT(mlBase, optLevel)
  288. + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
  289. /* dynamic statistics */
  290. price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
  291. if ((optLevel<2) /*static*/ && offCode >= 20)
  292. price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
  293. /* match Length */
  294. { U32 const mlCode = ZSTD_MLcode(mlBase);
  295. price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
  296. }
  297. price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
  298. DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
  299. return price;
  300. }
  301. /* ZSTD_updateStats() :
  302. * assumption : literals + litLength <= iend */
  303. static void ZSTD_updateStats(optState_t* const optPtr,
  304. U32 litLength, const BYTE* literals,
  305. U32 offBase, U32 matchLength)
  306. {
  307. /* literals */
  308. if (ZSTD_compressedLiterals(optPtr)) {
  309. U32 u;
  310. for (u=0; u < litLength; u++)
  311. optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
  312. optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
  313. }
  314. /* literal Length */
  315. { U32 const llCode = ZSTD_LLcode(litLength);
  316. optPtr->litLengthFreq[llCode]++;
  317. optPtr->litLengthSum++;
  318. }
  319. /* offset code : follows storeSeq() numeric representation */
  320. { U32 const offCode = ZSTD_highbit32(offBase);
  321. assert(offCode <= MaxOff);
  322. optPtr->offCodeFreq[offCode]++;
  323. optPtr->offCodeSum++;
  324. }
  325. /* match Length */
  326. { U32 const mlBase = matchLength - MINMATCH;
  327. U32 const mlCode = ZSTD_MLcode(mlBase);
  328. optPtr->matchLengthFreq[mlCode]++;
  329. optPtr->matchLengthSum++;
  330. }
  331. }
  332. /* ZSTD_readMINMATCH() :
  333. * function safe only for comparisons
  334. * assumption : memPtr must be at least 4 bytes before end of buffer */
  335. MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
  336. {
  337. switch (length)
  338. {
  339. default :
  340. case 4 : return MEM_read32(memPtr);
  341. case 3 : if (MEM_isLittleEndian())
  342. return MEM_read32(memPtr)<<8;
  343. else
  344. return MEM_read32(memPtr)>>8;
  345. }
  346. }
  347. /* Update hashTable3 up to ip (excluded)
  348. Assumption : always within prefix (i.e. not within extDict) */
  349. static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
  350. U32* nextToUpdate3,
  351. const BYTE* const ip)
  352. {
  353. U32* const hashTable3 = ms->hashTable3;
  354. U32 const hashLog3 = ms->hashLog3;
  355. const BYTE* const base = ms->window.base;
  356. U32 idx = *nextToUpdate3;
  357. U32 const target = (U32)(ip - base);
  358. size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
  359. assert(hashLog3 > 0);
  360. while(idx < target) {
  361. hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
  362. idx++;
  363. }
  364. *nextToUpdate3 = target;
  365. return hashTable3[hash3];
  366. }
  367. /*-*************************************
  368. * Binary Tree search
  369. ***************************************/
  370. /** ZSTD_insertBt1() : add one or multiple positions to tree.
  371. * @param ip assumed <= iend-8 .
  372. * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
  373. * @return : nb of positions added */
  374. static U32 ZSTD_insertBt1(
  375. const ZSTD_matchState_t* ms,
  376. const BYTE* const ip, const BYTE* const iend,
  377. U32 const target,
  378. U32 const mls, const int extDict)
  379. {
  380. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  381. U32* const hashTable = ms->hashTable;
  382. U32 const hashLog = cParams->hashLog;
  383. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  384. U32* const bt = ms->chainTable;
  385. U32 const btLog = cParams->chainLog - 1;
  386. U32 const btMask = (1 << btLog) - 1;
  387. U32 matchIndex = hashTable[h];
  388. size_t commonLengthSmaller=0, commonLengthLarger=0;
  389. const BYTE* const base = ms->window.base;
  390. const BYTE* const dictBase = ms->window.dictBase;
  391. const U32 dictLimit = ms->window.dictLimit;
  392. const BYTE* const dictEnd = dictBase + dictLimit;
  393. const BYTE* const prefixStart = base + dictLimit;
  394. const BYTE* match;
  395. const U32 curr = (U32)(ip-base);
  396. const U32 btLow = btMask >= curr ? 0 : curr - btMask;
  397. U32* smallerPtr = bt + 2*(curr&btMask);
  398. U32* largerPtr = smallerPtr + 1;
  399. U32 dummy32; /* to be nullified at the end */
  400. /* windowLow is based on target because
  401. * we only need positions that will be in the window at the end of the tree update.
  402. */
  403. U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
  404. U32 matchEndIdx = curr+8+1;
  405. size_t bestLength = 8;
  406. U32 nbCompares = 1U << cParams->searchLog;
  407. #ifdef ZSTD_C_PREDICT
  408. U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
  409. U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
  410. predictedSmall += (predictedSmall>0);
  411. predictedLarge += (predictedLarge>0);
  412. #endif /* ZSTD_C_PREDICT */
  413. DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
  414. assert(curr <= target);
  415. assert(ip <= iend-8); /* required for h calculation */
  416. hashTable[h] = curr; /* Update Hash Table */
  417. assert(windowLow > 0);
  418. for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
  419. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  420. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  421. assert(matchIndex < curr);
  422. #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
  423. const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
  424. if (matchIndex == predictedSmall) {
  425. /* no need to check length, result known */
  426. *smallerPtr = matchIndex;
  427. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  428. smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
  429. matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  430. predictedSmall = predictPtr[1] + (predictPtr[1]>0);
  431. continue;
  432. }
  433. if (matchIndex == predictedLarge) {
  434. *largerPtr = matchIndex;
  435. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  436. largerPtr = nextPtr;
  437. matchIndex = nextPtr[0];
  438. predictedLarge = predictPtr[0] + (predictPtr[0]>0);
  439. continue;
  440. }
  441. #endif
  442. if (!extDict || (matchIndex+matchLength >= dictLimit)) {
  443. assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
  444. match = base + matchIndex;
  445. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
  446. } else {
  447. match = dictBase + matchIndex;
  448. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
  449. if (matchIndex+matchLength >= dictLimit)
  450. match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
  451. }
  452. if (matchLength > bestLength) {
  453. bestLength = matchLength;
  454. if (matchLength > matchEndIdx - matchIndex)
  455. matchEndIdx = matchIndex + (U32)matchLength;
  456. }
  457. if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
  458. break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
  459. }
  460. if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
  461. /* match is smaller than current */
  462. *smallerPtr = matchIndex; /* update smaller idx */
  463. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  464. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  465. smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
  466. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
  467. } else {
  468. /* match is larger than current */
  469. *largerPtr = matchIndex;
  470. commonLengthLarger = matchLength;
  471. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
  472. largerPtr = nextPtr;
  473. matchIndex = nextPtr[0];
  474. } }
  475. *smallerPtr = *largerPtr = 0;
  476. { U32 positions = 0;
  477. if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
  478. assert(matchEndIdx > curr + 8);
  479. return MAX(positions, matchEndIdx - (curr + 8));
  480. }
  481. }
  482. FORCE_INLINE_TEMPLATE
  483. void ZSTD_updateTree_internal(
  484. ZSTD_matchState_t* ms,
  485. const BYTE* const ip, const BYTE* const iend,
  486. const U32 mls, const ZSTD_dictMode_e dictMode)
  487. {
  488. const BYTE* const base = ms->window.base;
  489. U32 const target = (U32)(ip - base);
  490. U32 idx = ms->nextToUpdate;
  491. DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
  492. idx, target, dictMode);
  493. while(idx < target) {
  494. U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
  495. assert(idx < (U32)(idx + forward));
  496. idx += forward;
  497. }
  498. assert((size_t)(ip - base) <= (size_t)(U32)(-1));
  499. assert((size_t)(iend - base) <= (size_t)(U32)(-1));
  500. ms->nextToUpdate = target;
  501. }
  502. void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
  503. ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
  504. }
  505. FORCE_INLINE_TEMPLATE U32
  506. ZSTD_insertBtAndGetAllMatches (
  507. ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
  508. ZSTD_matchState_t* ms,
  509. U32* nextToUpdate3,
  510. const BYTE* const ip, const BYTE* const iLimit,
  511. const ZSTD_dictMode_e dictMode,
  512. const U32 rep[ZSTD_REP_NUM],
  513. const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
  514. const U32 lengthToBeat,
  515. const U32 mls /* template */)
  516. {
  517. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  518. U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
  519. const BYTE* const base = ms->window.base;
  520. U32 const curr = (U32)(ip-base);
  521. U32 const hashLog = cParams->hashLog;
  522. U32 const minMatch = (mls==3) ? 3 : 4;
  523. U32* const hashTable = ms->hashTable;
  524. size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
  525. U32 matchIndex = hashTable[h];
  526. U32* const bt = ms->chainTable;
  527. U32 const btLog = cParams->chainLog - 1;
  528. U32 const btMask= (1U << btLog) - 1;
  529. size_t commonLengthSmaller=0, commonLengthLarger=0;
  530. const BYTE* const dictBase = ms->window.dictBase;
  531. U32 const dictLimit = ms->window.dictLimit;
  532. const BYTE* const dictEnd = dictBase + dictLimit;
  533. const BYTE* const prefixStart = base + dictLimit;
  534. U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
  535. U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
  536. U32 const matchLow = windowLow ? windowLow : 1;
  537. U32* smallerPtr = bt + 2*(curr&btMask);
  538. U32* largerPtr = bt + 2*(curr&btMask) + 1;
  539. U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
  540. U32 dummy32; /* to be nullified at the end */
  541. U32 mnum = 0;
  542. U32 nbCompares = 1U << cParams->searchLog;
  543. const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
  544. const ZSTD_compressionParameters* const dmsCParams =
  545. dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
  546. const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
  547. const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
  548. U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
  549. U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
  550. U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
  551. U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
  552. U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
  553. U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
  554. U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
  555. size_t bestLength = lengthToBeat-1;
  556. DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
  557. /* check repCode */
  558. assert(ll0 <= 1); /* necessarily 1 or 0 */
  559. { U32 const lastR = ZSTD_REP_NUM + ll0;
  560. U32 repCode;
  561. for (repCode = ll0; repCode < lastR; repCode++) {
  562. U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
  563. U32 const repIndex = curr - repOffset;
  564. U32 repLen = 0;
  565. assert(curr >= dictLimit);
  566. if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
  567. /* We must validate the repcode offset because when we're using a dictionary the
  568. * valid offset range shrinks when the dictionary goes out of bounds.
  569. */
  570. if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
  571. repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
  572. }
  573. } else { /* repIndex < dictLimit || repIndex >= curr */
  574. const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
  575. dmsBase + repIndex - dmsIndexDelta :
  576. dictBase + repIndex;
  577. assert(curr >= windowLow);
  578. if ( dictMode == ZSTD_extDict
  579. && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
  580. & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
  581. && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
  582. repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
  583. }
  584. if (dictMode == ZSTD_dictMatchState
  585. && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
  586. & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
  587. && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
  588. repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
  589. } }
  590. /* save longer solution */
  591. if (repLen > bestLength) {
  592. DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
  593. repCode, ll0, repOffset, repLen);
  594. bestLength = repLen;
  595. matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
  596. matches[mnum].len = (U32)repLen;
  597. mnum++;
  598. if ( (repLen > sufficient_len)
  599. | (ip+repLen == iLimit) ) { /* best possible */
  600. return mnum;
  601. } } } }
  602. /* HC3 match finder */
  603. if ((mls == 3) /*static*/ && (bestLength < mls)) {
  604. U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
  605. if ((matchIndex3 >= matchLow)
  606. & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
  607. size_t mlen;
  608. if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
  609. const BYTE* const match = base + matchIndex3;
  610. mlen = ZSTD_count(ip, match, iLimit);
  611. } else {
  612. const BYTE* const match = dictBase + matchIndex3;
  613. mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
  614. }
  615. /* save best solution */
  616. if (mlen >= mls /* == 3 > bestLength */) {
  617. DEBUGLOG(8, "found small match with hlog3, of length %u",
  618. (U32)mlen);
  619. bestLength = mlen;
  620. assert(curr > matchIndex3);
  621. assert(mnum==0); /* no prior solution */
  622. matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
  623. matches[0].len = (U32)mlen;
  624. mnum = 1;
  625. if ( (mlen > sufficient_len) |
  626. (ip+mlen == iLimit) ) { /* best possible length */
  627. ms->nextToUpdate = curr+1; /* skip insertion */
  628. return 1;
  629. } } }
  630. /* no dictMatchState lookup: dicts don't have a populated HC3 table */
  631. } /* if (mls == 3) */
  632. hashTable[h] = curr; /* Update Hash Table */
  633. for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
  634. U32* const nextPtr = bt + 2*(matchIndex & btMask);
  635. const BYTE* match;
  636. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  637. assert(curr > matchIndex);
  638. if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
  639. assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
  640. match = base + matchIndex;
  641. if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
  642. matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
  643. } else {
  644. match = dictBase + matchIndex;
  645. assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */
  646. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
  647. if (matchIndex+matchLength >= dictLimit)
  648. match = base + matchIndex; /* prepare for match[matchLength] read */
  649. }
  650. if (matchLength > bestLength) {
  651. DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
  652. (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
  653. assert(matchEndIdx > matchIndex);
  654. if (matchLength > matchEndIdx - matchIndex)
  655. matchEndIdx = matchIndex + (U32)matchLength;
  656. bestLength = matchLength;
  657. matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
  658. matches[mnum].len = (U32)matchLength;
  659. mnum++;
  660. if ( (matchLength > ZSTD_OPT_NUM)
  661. | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
  662. if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
  663. break; /* drop, to preserve bt consistency (miss a little bit of compression) */
  664. } }
  665. if (match[matchLength] < ip[matchLength]) {
  666. /* match smaller than current */
  667. *smallerPtr = matchIndex; /* update smaller idx */
  668. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  669. if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  670. smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */
  671. matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */
  672. } else {
  673. *largerPtr = matchIndex;
  674. commonLengthLarger = matchLength;
  675. if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
  676. largerPtr = nextPtr;
  677. matchIndex = nextPtr[0];
  678. } }
  679. *smallerPtr = *largerPtr = 0;
  680. assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
  681. if (dictMode == ZSTD_dictMatchState && nbCompares) {
  682. size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
  683. U32 dictMatchIndex = dms->hashTable[dmsH];
  684. const U32* const dmsBt = dms->chainTable;
  685. commonLengthSmaller = commonLengthLarger = 0;
  686. for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
  687. const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
  688. size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
  689. const BYTE* match = dmsBase + dictMatchIndex;
  690. matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
  691. if (dictMatchIndex+matchLength >= dmsHighLimit)
  692. match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
  693. if (matchLength > bestLength) {
  694. matchIndex = dictMatchIndex + dmsIndexDelta;
  695. DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
  696. (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
  697. if (matchLength > matchEndIdx - matchIndex)
  698. matchEndIdx = matchIndex + (U32)matchLength;
  699. bestLength = matchLength;
  700. matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
  701. matches[mnum].len = (U32)matchLength;
  702. mnum++;
  703. if ( (matchLength > ZSTD_OPT_NUM)
  704. | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
  705. break; /* drop, to guarantee consistency (miss a little bit of compression) */
  706. } }
  707. if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
  708. if (match[matchLength] < ip[matchLength]) {
  709. commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
  710. dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
  711. } else {
  712. /* match is larger than current */
  713. commonLengthLarger = matchLength;
  714. dictMatchIndex = nextPtr[0];
  715. } } } /* if (dictMode == ZSTD_dictMatchState) */
  716. assert(matchEndIdx > curr+8);
  717. ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
  718. return mnum;
  719. }
  720. typedef U32 (*ZSTD_getAllMatchesFn)(
  721. ZSTD_match_t*,
  722. ZSTD_matchState_t*,
  723. U32*,
  724. const BYTE*,
  725. const BYTE*,
  726. const U32 rep[ZSTD_REP_NUM],
  727. U32 const ll0,
  728. U32 const lengthToBeat);
  729. FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
  730. ZSTD_match_t* matches,
  731. ZSTD_matchState_t* ms,
  732. U32* nextToUpdate3,
  733. const BYTE* ip,
  734. const BYTE* const iHighLimit,
  735. const U32 rep[ZSTD_REP_NUM],
  736. U32 const ll0,
  737. U32 const lengthToBeat,
  738. const ZSTD_dictMode_e dictMode,
  739. const U32 mls)
  740. {
  741. assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
  742. DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
  743. if (ip < ms->window.base + ms->nextToUpdate)
  744. return 0; /* skipped area */
  745. ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
  746. return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
  747. }
  748. #define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
  749. #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
  750. static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
  751. ZSTD_match_t* matches, \
  752. ZSTD_matchState_t* ms, \
  753. U32* nextToUpdate3, \
  754. const BYTE* ip, \
  755. const BYTE* const iHighLimit, \
  756. const U32 rep[ZSTD_REP_NUM], \
  757. U32 const ll0, \
  758. U32 const lengthToBeat) \
  759. { \
  760. return ZSTD_btGetAllMatches_internal( \
  761. matches, ms, nextToUpdate3, ip, iHighLimit, \
  762. rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
  763. }
  764. #define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
  765. GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
  766. GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
  767. GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
  768. GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
  769. GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
  770. GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
  771. GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
  772. #define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
  773. { \
  774. ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
  775. ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
  776. ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
  777. ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
  778. }
  779. static ZSTD_getAllMatchesFn
  780. ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
  781. {
  782. ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
  783. ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
  784. ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
  785. ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
  786. };
  787. U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
  788. assert((U32)dictMode < 3);
  789. assert(mls - 3 < 4);
  790. return getAllMatchesFns[(int)dictMode][mls - 3];
  791. }
  792. /*************************
  793. * LDM helper functions *
  794. *************************/
  795. /* Struct containing info needed to make decision about ldm inclusion */
  796. typedef struct {
  797. rawSeqStore_t seqStore; /* External match candidates store for this block */
  798. U32 startPosInBlock; /* Start position of the current match candidate */
  799. U32 endPosInBlock; /* End position of the current match candidate */
  800. U32 offset; /* Offset of the match candidate */
  801. } ZSTD_optLdm_t;
  802. /* ZSTD_optLdm_skipRawSeqStoreBytes():
  803. * Moves forward in @rawSeqStore by @nbBytes,
  804. * which will update the fields 'pos' and 'posInSequence'.
  805. */
  806. static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
  807. {
  808. U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
  809. while (currPos && rawSeqStore->pos < rawSeqStore->size) {
  810. rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
  811. if (currPos >= currSeq.litLength + currSeq.matchLength) {
  812. currPos -= currSeq.litLength + currSeq.matchLength;
  813. rawSeqStore->pos++;
  814. } else {
  815. rawSeqStore->posInSequence = currPos;
  816. break;
  817. }
  818. }
  819. if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
  820. rawSeqStore->posInSequence = 0;
  821. }
  822. }
  823. /* ZSTD_opt_getNextMatchAndUpdateSeqStore():
  824. * Calculates the beginning and end of the next match in the current block.
  825. * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
  826. */
  827. static void
  828. ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
  829. U32 blockBytesRemaining)
  830. {
  831. rawSeq currSeq;
  832. U32 currBlockEndPos;
  833. U32 literalsBytesRemaining;
  834. U32 matchBytesRemaining;
  835. /* Setting match end position to MAX to ensure we never use an LDM during this block */
  836. if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
  837. optLdm->startPosInBlock = UINT_MAX;
  838. optLdm->endPosInBlock = UINT_MAX;
  839. return;
  840. }
  841. /* Calculate appropriate bytes left in matchLength and litLength
  842. * after adjusting based on ldmSeqStore->posInSequence */
  843. currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
  844. assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
  845. currBlockEndPos = currPosInBlock + blockBytesRemaining;
  846. literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
  847. currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
  848. 0;
  849. matchBytesRemaining = (literalsBytesRemaining == 0) ?
  850. currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
  851. currSeq.matchLength;
  852. /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
  853. if (literalsBytesRemaining >= blockBytesRemaining) {
  854. optLdm->startPosInBlock = UINT_MAX;
  855. optLdm->endPosInBlock = UINT_MAX;
  856. ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
  857. return;
  858. }
  859. /* Matches may be < MINMATCH by this process. In that case, we will reject them
  860. when we are deciding whether or not to add the ldm */
  861. optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
  862. optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
  863. optLdm->offset = currSeq.offset;
  864. if (optLdm->endPosInBlock > currBlockEndPos) {
  865. /* Match ends after the block ends, we can't use the whole match */
  866. optLdm->endPosInBlock = currBlockEndPos;
  867. ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
  868. } else {
  869. /* Consume nb of bytes equal to size of sequence left */
  870. ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
  871. }
  872. }
  873. /* ZSTD_optLdm_maybeAddMatch():
  874. * Adds a match if it's long enough,
  875. * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
  876. * into 'matches'. Maintains the correct ordering of 'matches'.
  877. */
  878. static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
  879. const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
  880. {
  881. U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
  882. /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
  883. U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
  884. /* Ensure that current block position is not outside of the match */
  885. if (currPosInBlock < optLdm->startPosInBlock
  886. || currPosInBlock >= optLdm->endPosInBlock
  887. || candidateMatchLength < MINMATCH) {
  888. return;
  889. }
  890. if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
  891. U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
  892. DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
  893. candidateOffBase, candidateMatchLength, currPosInBlock);
  894. matches[*nbMatches].len = candidateMatchLength;
  895. matches[*nbMatches].off = candidateOffBase;
  896. (*nbMatches)++;
  897. }
  898. }
  899. /* ZSTD_optLdm_processMatchCandidate():
  900. * Wrapper function to update ldm seq store and call ldm functions as necessary.
  901. */
  902. static void
  903. ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
  904. ZSTD_match_t* matches, U32* nbMatches,
  905. U32 currPosInBlock, U32 remainingBytes)
  906. {
  907. if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
  908. return;
  909. }
  910. if (currPosInBlock >= optLdm->endPosInBlock) {
  911. if (currPosInBlock > optLdm->endPosInBlock) {
  912. /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
  913. * at the end of a match from the ldm seq store, and will often be some bytes
  914. * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
  915. */
  916. U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
  917. ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
  918. }
  919. ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
  920. }
  921. ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
  922. }
  923. /*-*******************************
  924. * Optimal parser
  925. *********************************/
  926. static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
  927. {
  928. return sol.litlen + sol.mlen;
  929. }
  930. #if 0 /* debug */
  931. static void
  932. listStats(const U32* table, int lastEltID)
  933. {
  934. int const nbElts = lastEltID + 1;
  935. int enb;
  936. for (enb=0; enb < nbElts; enb++) {
  937. (void)table;
  938. /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
  939. RAWLOG(2, "%4i,", table[enb]);
  940. }
  941. RAWLOG(2, " \n");
  942. }
  943. #endif
  944. FORCE_INLINE_TEMPLATE size_t
  945. ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
  946. seqStore_t* seqStore,
  947. U32 rep[ZSTD_REP_NUM],
  948. const void* src, size_t srcSize,
  949. const int optLevel,
  950. const ZSTD_dictMode_e dictMode)
  951. {
  952. optState_t* const optStatePtr = &ms->opt;
  953. const BYTE* const istart = (const BYTE*)src;
  954. const BYTE* ip = istart;
  955. const BYTE* anchor = istart;
  956. const BYTE* const iend = istart + srcSize;
  957. const BYTE* const ilimit = iend - 8;
  958. const BYTE* const base = ms->window.base;
  959. const BYTE* const prefixStart = base + ms->window.dictLimit;
  960. const ZSTD_compressionParameters* const cParams = &ms->cParams;
  961. ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
  962. U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
  963. U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
  964. U32 nextToUpdate3 = ms->nextToUpdate;
  965. ZSTD_optimal_t* const opt = optStatePtr->priceTable;
  966. ZSTD_match_t* const matches = optStatePtr->matchTable;
  967. ZSTD_optimal_t lastSequence;
  968. ZSTD_optLdm_t optLdm;
  969. ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
  970. optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
  971. optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
  972. ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
  973. /* init */
  974. DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
  975. (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
  976. assert(optLevel <= 2);
  977. ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
  978. ip += (ip==prefixStart);
  979. /* Match Loop */
  980. while (ip < ilimit) {
  981. U32 cur, last_pos = 0;
  982. /* find first match */
  983. { U32 const litlen = (U32)(ip - anchor);
  984. U32 const ll0 = !litlen;
  985. U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
  986. ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
  987. (U32)(ip-istart), (U32)(iend - ip));
  988. if (!nbMatches) { ip++; continue; }
  989. /* initialize opt[0] */
  990. { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
  991. opt[0].mlen = 0; /* means is_a_literal */
  992. opt[0].litlen = litlen;
  993. /* We don't need to include the actual price of the literals because
  994. * it is static for the duration of the forward pass, and is included
  995. * in every price. We include the literal length to avoid negative
  996. * prices when we subtract the previous literal length.
  997. */
  998. opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
  999. /* large match -> immediate encoding */
  1000. { U32 const maxML = matches[nbMatches-1].len;
  1001. U32 const maxOffBase = matches[nbMatches-1].off;
  1002. DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
  1003. nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
  1004. if (maxML > sufficient_len) {
  1005. lastSequence.litlen = litlen;
  1006. lastSequence.mlen = maxML;
  1007. lastSequence.off = maxOffBase;
  1008. DEBUGLOG(6, "large match (%u>%u), immediate encoding",
  1009. maxML, sufficient_len);
  1010. cur = 0;
  1011. last_pos = ZSTD_totalLen(lastSequence);
  1012. goto _shortestPath;
  1013. } }
  1014. /* set prices for first matches starting position == 0 */
  1015. assert(opt[0].price >= 0);
  1016. { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
  1017. U32 pos;
  1018. U32 matchNb;
  1019. for (pos = 1; pos < minMatch; pos++) {
  1020. opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
  1021. }
  1022. for (matchNb = 0; matchNb < nbMatches; matchNb++) {
  1023. U32 const offBase = matches[matchNb].off;
  1024. U32 const end = matches[matchNb].len;
  1025. for ( ; pos <= end ; pos++ ) {
  1026. U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
  1027. U32 const sequencePrice = literalsPrice + matchPrice;
  1028. DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
  1029. pos, ZSTD_fCost((int)sequencePrice));
  1030. opt[pos].mlen = pos;
  1031. opt[pos].off = offBase;
  1032. opt[pos].litlen = litlen;
  1033. opt[pos].price = (int)sequencePrice;
  1034. } }
  1035. last_pos = pos-1;
  1036. }
  1037. }
  1038. /* check further positions */
  1039. for (cur = 1; cur <= last_pos; cur++) {
  1040. const BYTE* const inr = ip + cur;
  1041. assert(cur < ZSTD_OPT_NUM);
  1042. DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
  1043. /* Fix current position with one literal if cheaper */
  1044. { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
  1045. int const price = opt[cur-1].price
  1046. + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
  1047. + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
  1048. - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
  1049. assert(price < 1000000000); /* overflow check */
  1050. if (price <= opt[cur].price) {
  1051. DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
  1052. inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
  1053. opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
  1054. opt[cur].mlen = 0;
  1055. opt[cur].off = 0;
  1056. opt[cur].litlen = litlen;
  1057. opt[cur].price = price;
  1058. } else {
  1059. DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
  1060. inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
  1061. opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
  1062. }
  1063. }
  1064. /* Set the repcodes of the current position. We must do it here
  1065. * because we rely on the repcodes of the 2nd to last sequence being
  1066. * correct to set the next chunks repcodes during the backward
  1067. * traversal.
  1068. */
  1069. ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
  1070. assert(cur >= opt[cur].mlen);
  1071. if (opt[cur].mlen != 0) {
  1072. U32 const prev = cur - opt[cur].mlen;
  1073. repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
  1074. ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
  1075. } else {
  1076. ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
  1077. }
  1078. /* last match must start at a minimum distance of 8 from oend */
  1079. if (inr > ilimit) continue;
  1080. if (cur == last_pos) break;
  1081. if ( (optLevel==0) /*static_test*/
  1082. && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
  1083. DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
  1084. continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
  1085. }
  1086. assert(opt[cur].price >= 0);
  1087. { U32 const ll0 = (opt[cur].mlen != 0);
  1088. U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
  1089. U32 const previousPrice = (U32)opt[cur].price;
  1090. U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
  1091. U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
  1092. U32 matchNb;
  1093. ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
  1094. (U32)(inr-istart), (U32)(iend-inr));
  1095. if (!nbMatches) {
  1096. DEBUGLOG(7, "rPos:%u : no match found", cur);
  1097. continue;
  1098. }
  1099. { U32 const maxML = matches[nbMatches-1].len;
  1100. DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
  1101. inr-istart, cur, nbMatches, maxML);
  1102. if ( (maxML > sufficient_len)
  1103. || (cur + maxML >= ZSTD_OPT_NUM) ) {
  1104. lastSequence.mlen = maxML;
  1105. lastSequence.off = matches[nbMatches-1].off;
  1106. lastSequence.litlen = litlen;
  1107. cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
  1108. last_pos = cur + ZSTD_totalLen(lastSequence);
  1109. if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
  1110. goto _shortestPath;
  1111. } }
  1112. /* set prices using matches found at position == cur */
  1113. for (matchNb = 0; matchNb < nbMatches; matchNb++) {
  1114. U32 const offset = matches[matchNb].off;
  1115. U32 const lastML = matches[matchNb].len;
  1116. U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
  1117. U32 mlen;
  1118. DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
  1119. matchNb, matches[matchNb].off, lastML, litlen);
  1120. for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
  1121. U32 const pos = cur + mlen;
  1122. int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
  1123. if ((pos > last_pos) || (price < opt[pos].price)) {
  1124. DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
  1125. pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
  1126. while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
  1127. opt[pos].mlen = mlen;
  1128. opt[pos].off = offset;
  1129. opt[pos].litlen = litlen;
  1130. opt[pos].price = price;
  1131. } else {
  1132. DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
  1133. pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
  1134. if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
  1135. }
  1136. } } }
  1137. } /* for (cur = 1; cur <= last_pos; cur++) */
  1138. lastSequence = opt[last_pos];
  1139. cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
  1140. assert(cur < ZSTD_OPT_NUM); /* control overflow*/
  1141. _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
  1142. assert(opt[0].mlen == 0);
  1143. /* Set the next chunk's repcodes based on the repcodes of the beginning
  1144. * of the last match, and the last sequence. This avoids us having to
  1145. * update them while traversing the sequences.
  1146. */
  1147. if (lastSequence.mlen != 0) {
  1148. repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
  1149. ZSTD_memcpy(rep, &reps, sizeof(reps));
  1150. } else {
  1151. ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
  1152. }
  1153. { U32 const storeEnd = cur + 1;
  1154. U32 storeStart = storeEnd;
  1155. U32 seqPos = cur;
  1156. DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
  1157. last_pos, cur); (void)last_pos;
  1158. assert(storeEnd < ZSTD_OPT_NUM);
  1159. DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
  1160. storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
  1161. opt[storeEnd] = lastSequence;
  1162. while (seqPos > 0) {
  1163. U32 const backDist = ZSTD_totalLen(opt[seqPos]);
  1164. storeStart--;
  1165. DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
  1166. seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
  1167. opt[storeStart] = opt[seqPos];
  1168. seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
  1169. }
  1170. /* save sequences */
  1171. DEBUGLOG(6, "sending selected sequences into seqStore")
  1172. { U32 storePos;
  1173. for (storePos=storeStart; storePos <= storeEnd; storePos++) {
  1174. U32 const llen = opt[storePos].litlen;
  1175. U32 const mlen = opt[storePos].mlen;
  1176. U32 const offBase = opt[storePos].off;
  1177. U32 const advance = llen + mlen;
  1178. DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
  1179. anchor - istart, (unsigned)llen, (unsigned)mlen);
  1180. if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
  1181. assert(storePos == storeEnd); /* must be last sequence */
  1182. ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
  1183. continue; /* will finish */
  1184. }
  1185. assert(anchor + llen <= iend);
  1186. ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
  1187. ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
  1188. anchor += advance;
  1189. ip = anchor;
  1190. } }
  1191. ZSTD_setBasePrices(optStatePtr, optLevel);
  1192. }
  1193. } /* while (ip < ilimit) */
  1194. /* Return the last literals size */
  1195. return (size_t)(iend - anchor);
  1196. }
  1197. static size_t ZSTD_compressBlock_opt0(
  1198. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1199. const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
  1200. {
  1201. return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
  1202. }
  1203. static size_t ZSTD_compressBlock_opt2(
  1204. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1205. const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
  1206. {
  1207. return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
  1208. }
  1209. size_t ZSTD_compressBlock_btopt(
  1210. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1211. const void* src, size_t srcSize)
  1212. {
  1213. DEBUGLOG(5, "ZSTD_compressBlock_btopt");
  1214. return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
  1215. }
  1216. /* ZSTD_initStats_ultra():
  1217. * make a first compression pass, just to seed stats with more accurate starting values.
  1218. * only works on first block, with no dictionary and no ldm.
  1219. * this function cannot error out, its narrow contract must be respected.
  1220. */
  1221. static void
  1222. ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
  1223. seqStore_t* seqStore,
  1224. U32 rep[ZSTD_REP_NUM],
  1225. const void* src, size_t srcSize)
  1226. {
  1227. U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
  1228. ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
  1229. DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
  1230. assert(ms->opt.litLengthSum == 0); /* first block */
  1231. assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
  1232. assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
  1233. assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
  1234. ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
  1235. /* invalidate first scan from history, only keep entropy stats */
  1236. ZSTD_resetSeqStore(seqStore);
  1237. ms->window.base -= srcSize;
  1238. ms->window.dictLimit += (U32)srcSize;
  1239. ms->window.lowLimit = ms->window.dictLimit;
  1240. ms->nextToUpdate = ms->window.dictLimit;
  1241. }
  1242. size_t ZSTD_compressBlock_btultra(
  1243. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1244. const void* src, size_t srcSize)
  1245. {
  1246. DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
  1247. return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
  1248. }
  1249. size_t ZSTD_compressBlock_btultra2(
  1250. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1251. const void* src, size_t srcSize)
  1252. {
  1253. U32 const curr = (U32)((const BYTE*)src - ms->window.base);
  1254. DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
  1255. /* 2-passes strategy:
  1256. * this strategy makes a first pass over first block to collect statistics
  1257. * in order to seed next round's statistics with it.
  1258. * After 1st pass, function forgets history, and starts a new block.
  1259. * Consequently, this can only work if no data has been previously loaded in tables,
  1260. * aka, no dictionary, no prefix, no ldm preprocessing.
  1261. * The compression ratio gain is generally small (~0.5% on first block),
  1262. ** the cost is 2x cpu time on first block. */
  1263. assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
  1264. if ( (ms->opt.litLengthSum==0) /* first block */
  1265. && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
  1266. && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
  1267. && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
  1268. && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
  1269. ) {
  1270. ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
  1271. }
  1272. return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
  1273. }
  1274. size_t ZSTD_compressBlock_btopt_dictMatchState(
  1275. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1276. const void* src, size_t srcSize)
  1277. {
  1278. return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
  1279. }
  1280. size_t ZSTD_compressBlock_btultra_dictMatchState(
  1281. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1282. const void* src, size_t srcSize)
  1283. {
  1284. return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
  1285. }
  1286. size_t ZSTD_compressBlock_btopt_extDict(
  1287. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1288. const void* src, size_t srcSize)
  1289. {
  1290. return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
  1291. }
  1292. size_t ZSTD_compressBlock_btultra_extDict(
  1293. ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
  1294. const void* src, size_t srcSize)
  1295. {
  1296. return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
  1297. }
  1298. /* note : no btultra2 variant for extDict nor dictMatchState,
  1299. * because btultra2 is not meant to work with dictionaries
  1300. * and is only specific for the first block (no prefix) */