huf_decompress.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. /* ******************************************************************
  2. Huffman decoder, part of New Generation Entropy library
  3. Copyright (C) 2013-2016, Yann Collet.
  4. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions are
  7. met:
  8. * Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. * Redistributions in binary form must reproduce the above
  11. copyright notice, this list of conditions and the following disclaimer
  12. in the documentation and/or other materials provided with the
  13. distribution.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. You can contact the author at :
  26. - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
  27. - Public forum : https://groups.google.com/forum/#!forum/lz4c
  28. ****************************************************************** */
  29. /* **************************************************************
  30. * Compiler specifics
  31. ****************************************************************/
  32. #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
  33. /* inline is defined */
  34. #elif defined(_MSC_VER)
  35. # define inline __inline
  36. #else
  37. # define inline /* disable inline */
  38. #endif
  39. #ifdef _MSC_VER /* Visual Studio */
  40. # define FORCE_INLINE static __forceinline
  41. # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  42. #else
  43. # ifdef __GNUC__
  44. # define FORCE_INLINE static inline __attribute__((always_inline))
  45. # else
  46. # define FORCE_INLINE static inline
  47. # endif
  48. #endif
  49. /* **************************************************************
  50. * Includes
  51. ****************************************************************/
  52. #include <stdlib.h> /* malloc, free, qsort */
  53. #include <string.h> /* memcpy, memset */
  54. #include <stdio.h> /* printf (debug) */
  55. #include "huf_static.h"
  56. #include "bitstream.h"
  57. #include "fse.h" /* header compression */
  58. /* **************************************************************
  59. * Error Management
  60. ****************************************************************/
  61. #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
  62. /* *******************************************************
  63. * HUF : Huffman block decompression
  64. *********************************************************/
  65. typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
  66. typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
  67. typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
  68. /*-***************************/
  69. /* single-symbol decoding */
  70. /*-***************************/
  71. size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize)
  72. {
  73. BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1];
  74. U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */
  75. U32 tableLog = 0;
  76. size_t iSize;
  77. U32 nbSymbols = 0;
  78. U32 n;
  79. U32 nextRankStart;
  80. void* const dtPtr = DTable + 1;
  81. HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
  82. HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */
  83. //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */
  84. iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
  85. if (HUF_isError(iSize)) return iSize;
  86. /* check result */
  87. if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */
  88. DTable[0] = (U16)tableLog; /* maybe should separate sizeof allocated DTable, from used size of DTable, in case of re-use */
  89. /* Prepare ranks */
  90. nextRankStart = 0;
  91. for (n=1; n<tableLog+1; n++) {
  92. U32 current = nextRankStart;
  93. nextRankStart += (rankVal[n] << (n-1));
  94. rankVal[n] = current;
  95. }
  96. /* fill DTable */
  97. for (n=0; n<nbSymbols; n++) {
  98. const U32 w = huffWeight[n];
  99. const U32 length = (1 << w) >> 1;
  100. U32 i;
  101. HUF_DEltX2 D;
  102. D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
  103. for (i = rankVal[w]; i < rankVal[w] + length; i++)
  104. dt[i] = D;
  105. rankVal[w] += length;
  106. }
  107. return iSize;
  108. }
  109. static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
  110. {
  111. const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
  112. const BYTE c = dt[val].byte;
  113. BIT_skipBits(Dstream, dt[val].nbBits);
  114. return c;
  115. }
  116. #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
  117. *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
  118. #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
  119. if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
  120. HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
  121. #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
  122. if (MEM_64bits()) \
  123. HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
  124. static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
  125. {
  126. BYTE* const pStart = p;
  127. /* up to 4 symbols at a time */
  128. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
  129. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  130. HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
  131. HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
  132. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  133. }
  134. /* closer to the end */
  135. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
  136. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  137. /* no more data to retrieve from bitstream, hence no need to reload */
  138. while (p < pEnd)
  139. HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
  140. return pEnd-pStart;
  141. }
  142. size_t HUF_decompress1X2_usingDTable(
  143. void* dst, size_t dstSize,
  144. const void* cSrc, size_t cSrcSize,
  145. const U16* DTable)
  146. {
  147. BYTE* op = (BYTE*)dst;
  148. BYTE* const oend = op + dstSize;
  149. const U32 dtLog = DTable[0];
  150. const void* dtPtr = DTable;
  151. const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr)+1;
  152. BIT_DStream_t bitD;
  153. { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
  154. if (HUF_isError(errorCode)) return errorCode; }
  155. HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
  156. /* check */
  157. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  158. return dstSize;
  159. }
  160. size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  161. {
  162. HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
  163. const BYTE* ip = (const BYTE*) cSrc;
  164. size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
  165. if (HUF_isError(errorCode)) return errorCode;
  166. if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
  167. ip += errorCode;
  168. cSrcSize -= errorCode;
  169. return HUF_decompress1X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
  170. }
  171. size_t HUF_decompress4X2_usingDTable(
  172. void* dst, size_t dstSize,
  173. const void* cSrc, size_t cSrcSize,
  174. const U16* DTable)
  175. {
  176. /* Check */
  177. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  178. { const BYTE* const istart = (const BYTE*) cSrc;
  179. BYTE* const ostart = (BYTE*) dst;
  180. BYTE* const oend = ostart + dstSize;
  181. const void* const dtPtr = DTable;
  182. const HUF_DEltX2* const dt = ((const HUF_DEltX2*)dtPtr) +1;
  183. const U32 dtLog = DTable[0];
  184. size_t errorCode;
  185. /* Init */
  186. BIT_DStream_t bitD1;
  187. BIT_DStream_t bitD2;
  188. BIT_DStream_t bitD3;
  189. BIT_DStream_t bitD4;
  190. const size_t length1 = MEM_readLE16(istart);
  191. const size_t length2 = MEM_readLE16(istart+2);
  192. const size_t length3 = MEM_readLE16(istart+4);
  193. size_t length4;
  194. const BYTE* const istart1 = istart + 6; /* jumpTable */
  195. const BYTE* const istart2 = istart1 + length1;
  196. const BYTE* const istart3 = istart2 + length2;
  197. const BYTE* const istart4 = istart3 + length3;
  198. const size_t segmentSize = (dstSize+3) / 4;
  199. BYTE* const opStart2 = ostart + segmentSize;
  200. BYTE* const opStart3 = opStart2 + segmentSize;
  201. BYTE* const opStart4 = opStart3 + segmentSize;
  202. BYTE* op1 = ostart;
  203. BYTE* op2 = opStart2;
  204. BYTE* op3 = opStart3;
  205. BYTE* op4 = opStart4;
  206. U32 endSignal;
  207. length4 = cSrcSize - (length1 + length2 + length3 + 6);
  208. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  209. errorCode = BIT_initDStream(&bitD1, istart1, length1);
  210. if (HUF_isError(errorCode)) return errorCode;
  211. errorCode = BIT_initDStream(&bitD2, istart2, length2);
  212. if (HUF_isError(errorCode)) return errorCode;
  213. errorCode = BIT_initDStream(&bitD3, istart3, length3);
  214. if (HUF_isError(errorCode)) return errorCode;
  215. errorCode = BIT_initDStream(&bitD4, istart4, length4);
  216. if (HUF_isError(errorCode)) return errorCode;
  217. /* 16-32 symbols per loop (4-8 symbols per stream) */
  218. endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
  219. for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
  220. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  221. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  222. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  223. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  224. HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
  225. HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
  226. HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
  227. HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
  228. HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
  229. HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
  230. HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
  231. HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
  232. HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
  233. HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
  234. HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
  235. HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
  236. endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
  237. }
  238. /* check corruption */
  239. if (op1 > opStart2) return ERROR(corruption_detected);
  240. if (op2 > opStart3) return ERROR(corruption_detected);
  241. if (op3 > opStart4) return ERROR(corruption_detected);
  242. /* note : op4 supposed already verified within main loop */
  243. /* finish bitStreams one by one */
  244. HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
  245. HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
  246. HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
  247. HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
  248. /* check */
  249. endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  250. if (!endSignal) return ERROR(corruption_detected);
  251. /* decoded size */
  252. return dstSize;
  253. }
  254. }
  255. size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  256. {
  257. HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG);
  258. const BYTE* ip = (const BYTE*) cSrc;
  259. size_t const errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize);
  260. if (HUF_isError(errorCode)) return errorCode;
  261. if (errorCode >= cSrcSize) return ERROR(srcSize_wrong);
  262. ip += errorCode;
  263. cSrcSize -= errorCode;
  264. return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
  265. }
  266. /* *************************/
  267. /* double-symbols decoding */
  268. /* *************************/
  269. static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
  270. const U32* rankValOrigin, const int minWeight,
  271. const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
  272. U32 nbBitsBaseline, U16 baseSeq)
  273. {
  274. HUF_DEltX4 DElt;
  275. U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
  276. /* get pre-calculated rankVal */
  277. memcpy(rankVal, rankValOrigin, sizeof(rankVal));
  278. /* fill skipped values */
  279. if (minWeight>1) {
  280. U32 i, skipSize = rankVal[minWeight];
  281. MEM_writeLE16(&(DElt.sequence), baseSeq);
  282. DElt.nbBits = (BYTE)(consumed);
  283. DElt.length = 1;
  284. for (i = 0; i < skipSize; i++)
  285. DTable[i] = DElt;
  286. }
  287. /* fill DTable */
  288. { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
  289. const U32 symbol = sortedSymbols[s].symbol;
  290. const U32 weight = sortedSymbols[s].weight;
  291. const U32 nbBits = nbBitsBaseline - weight;
  292. const U32 length = 1 << (sizeLog-nbBits);
  293. const U32 start = rankVal[weight];
  294. U32 i = start;
  295. const U32 end = start + length;
  296. MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
  297. DElt.nbBits = (BYTE)(nbBits + consumed);
  298. DElt.length = 2;
  299. do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
  300. rankVal[weight] += length;
  301. }}
  302. }
  303. typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1];
  304. static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
  305. const sortedSymbol_t* sortedList, const U32 sortedListSize,
  306. const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
  307. const U32 nbBitsBaseline)
  308. {
  309. U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1];
  310. const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
  311. const U32 minBits = nbBitsBaseline - maxWeight;
  312. U32 s;
  313. memcpy(rankVal, rankValOrigin, sizeof(rankVal));
  314. /* fill DTable */
  315. for (s=0; s<sortedListSize; s++) {
  316. const U16 symbol = sortedList[s].symbol;
  317. const U32 weight = sortedList[s].weight;
  318. const U32 nbBits = nbBitsBaseline - weight;
  319. const U32 start = rankVal[weight];
  320. const U32 length = 1 << (targetLog-nbBits);
  321. if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
  322. U32 sortedRank;
  323. int minWeight = nbBits + scaleLog;
  324. if (minWeight < 1) minWeight = 1;
  325. sortedRank = rankStart[minWeight];
  326. HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
  327. rankValOrigin[nbBits], minWeight,
  328. sortedList+sortedRank, sortedListSize-sortedRank,
  329. nbBitsBaseline, symbol);
  330. } else {
  331. HUF_DEltX4 DElt;
  332. MEM_writeLE16(&(DElt.sequence), symbol);
  333. DElt.nbBits = (BYTE)(nbBits);
  334. DElt.length = 1;
  335. { U32 u;
  336. const U32 end = start + length;
  337. for (u = start; u < end; u++) DTable[u] = DElt;
  338. } }
  339. rankVal[weight] += length;
  340. }
  341. }
  342. size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
  343. {
  344. BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1];
  345. sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1];
  346. U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 };
  347. U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 };
  348. U32* const rankStart = rankStart0+1;
  349. rankVal_t rankVal;
  350. U32 tableLog, maxW, sizeOfSort, nbSymbols;
  351. const U32 memLog = DTable[0];
  352. size_t iSize;
  353. void* dtPtr = DTable;
  354. HUF_DEltX4* const dt = ((HUF_DEltX4*)dtPtr) + 1;
  355. HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
  356. if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
  357. //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
  358. iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
  359. if (HUF_isError(iSize)) return iSize;
  360. /* check result */
  361. if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
  362. /* find maxWeight */
  363. for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
  364. /* Get start index of each weight */
  365. { U32 w, nextRankStart = 0;
  366. for (w=1; w<maxW+1; w++) {
  367. U32 current = nextRankStart;
  368. nextRankStart += rankStats[w];
  369. rankStart[w] = current;
  370. }
  371. rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
  372. sizeOfSort = nextRankStart;
  373. }
  374. /* sort symbols by weight */
  375. { U32 s;
  376. for (s=0; s<nbSymbols; s++) {
  377. U32 const w = weightList[s];
  378. U32 const r = rankStart[w]++;
  379. sortedSymbol[r].symbol = (BYTE)s;
  380. sortedSymbol[r].weight = (BYTE)w;
  381. }
  382. rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
  383. }
  384. /* Build rankVal */
  385. { U32* const rankVal0 = rankVal[0];
  386. { int const rescale = (memLog-tableLog) - 1; /* tableLog <= memLog */
  387. U32 nextRankVal = 0;
  388. U32 w;
  389. for (w=1; w<maxW+1; w++) {
  390. U32 current = nextRankVal;
  391. nextRankVal += rankStats[w] << (w+rescale);
  392. rankVal0[w] = current;
  393. } }
  394. { U32 const minBits = tableLog+1 - maxW;
  395. U32 consumed;
  396. for (consumed = minBits; consumed < memLog - minBits + 1; consumed++) {
  397. U32* const rankValPtr = rankVal[consumed];
  398. U32 w;
  399. for (w = 1; w < maxW+1; w++) {
  400. rankValPtr[w] = rankVal0[w] >> consumed;
  401. } } } }
  402. HUF_fillDTableX4(dt, memLog,
  403. sortedSymbol, sizeOfSort,
  404. rankStart0, rankVal, maxW,
  405. tableLog+1);
  406. return iSize;
  407. }
  408. static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
  409. {
  410. const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  411. memcpy(op, dt+val, 2);
  412. BIT_skipBits(DStream, dt[val].nbBits);
  413. return dt[val].length;
  414. }
  415. static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
  416. {
  417. const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
  418. memcpy(op, dt+val, 1);
  419. if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
  420. else {
  421. if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
  422. BIT_skipBits(DStream, dt[val].nbBits);
  423. if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
  424. DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
  425. } }
  426. return 1;
  427. }
  428. #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
  429. ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
  430. #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
  431. if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \
  432. ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
  433. #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
  434. if (MEM_64bits()) \
  435. ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
  436. static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
  437. {
  438. BYTE* const pStart = p;
  439. /* up to 8 symbols at a time */
  440. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) {
  441. HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
  442. HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
  443. HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
  444. HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
  445. }
  446. /* closer to the end */
  447. while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2))
  448. HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
  449. while (p <= pEnd-2)
  450. HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
  451. if (p < pEnd)
  452. p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
  453. return p-pStart;
  454. }
  455. size_t HUF_decompress1X4_usingDTable(
  456. void* dst, size_t dstSize,
  457. const void* cSrc, size_t cSrcSize,
  458. const U32* DTable)
  459. {
  460. const BYTE* const istart = (const BYTE*) cSrc;
  461. BYTE* const ostart = (BYTE*) dst;
  462. BYTE* const oend = ostart + dstSize;
  463. const U32 dtLog = DTable[0];
  464. const void* const dtPtr = DTable;
  465. const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
  466. /* Init */
  467. BIT_DStream_t bitD;
  468. { size_t const errorCode = BIT_initDStream(&bitD, istart, cSrcSize);
  469. if (HUF_isError(errorCode)) return errorCode; }
  470. /* decode */
  471. HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtLog);
  472. /* check */
  473. if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
  474. /* decoded size */
  475. return dstSize;
  476. }
  477. size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  478. {
  479. HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
  480. const BYTE* ip = (const BYTE*) cSrc;
  481. size_t const hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
  482. if (HUF_isError(hSize)) return hSize;
  483. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  484. ip += hSize;
  485. cSrcSize -= hSize;
  486. return HUF_decompress1X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
  487. }
  488. size_t HUF_decompress4X4_usingDTable(
  489. void* dst, size_t dstSize,
  490. const void* cSrc, size_t cSrcSize,
  491. const U32* DTable)
  492. {
  493. if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
  494. { const BYTE* const istart = (const BYTE*) cSrc;
  495. BYTE* const ostart = (BYTE*) dst;
  496. BYTE* const oend = ostart + dstSize;
  497. const void* const dtPtr = DTable;
  498. const HUF_DEltX4* const dt = ((const HUF_DEltX4*)dtPtr) +1;
  499. const U32 dtLog = DTable[0];
  500. size_t errorCode;
  501. /* Init */
  502. BIT_DStream_t bitD1;
  503. BIT_DStream_t bitD2;
  504. BIT_DStream_t bitD3;
  505. BIT_DStream_t bitD4;
  506. const size_t length1 = MEM_readLE16(istart);
  507. const size_t length2 = MEM_readLE16(istart+2);
  508. const size_t length3 = MEM_readLE16(istart+4);
  509. size_t length4;
  510. const BYTE* const istart1 = istart + 6; /* jumpTable */
  511. const BYTE* const istart2 = istart1 + length1;
  512. const BYTE* const istart3 = istart2 + length2;
  513. const BYTE* const istart4 = istart3 + length3;
  514. const size_t segmentSize = (dstSize+3) / 4;
  515. BYTE* const opStart2 = ostart + segmentSize;
  516. BYTE* const opStart3 = opStart2 + segmentSize;
  517. BYTE* const opStart4 = opStart3 + segmentSize;
  518. BYTE* op1 = ostart;
  519. BYTE* op2 = opStart2;
  520. BYTE* op3 = opStart3;
  521. BYTE* op4 = opStart4;
  522. U32 endSignal;
  523. length4 = cSrcSize - (length1 + length2 + length3 + 6);
  524. if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
  525. errorCode = BIT_initDStream(&bitD1, istart1, length1);
  526. if (HUF_isError(errorCode)) return errorCode;
  527. errorCode = BIT_initDStream(&bitD2, istart2, length2);
  528. if (HUF_isError(errorCode)) return errorCode;
  529. errorCode = BIT_initDStream(&bitD3, istart3, length3);
  530. if (HUF_isError(errorCode)) return errorCode;
  531. errorCode = BIT_initDStream(&bitD4, istart4, length4);
  532. if (HUF_isError(errorCode)) return errorCode;
  533. /* 16-32 symbols per loop (4-8 symbols per stream) */
  534. endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
  535. for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
  536. HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
  537. HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
  538. HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
  539. HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
  540. HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
  541. HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
  542. HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
  543. HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
  544. HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
  545. HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
  546. HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
  547. HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
  548. HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
  549. HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
  550. HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
  551. HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
  552. endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
  553. }
  554. /* check corruption */
  555. if (op1 > opStart2) return ERROR(corruption_detected);
  556. if (op2 > opStart3) return ERROR(corruption_detected);
  557. if (op3 > opStart4) return ERROR(corruption_detected);
  558. /* note : op4 supposed already verified within main loop */
  559. /* finish bitStreams one by one */
  560. HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
  561. HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
  562. HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
  563. HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
  564. /* check */
  565. endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
  566. if (!endSignal) return ERROR(corruption_detected);
  567. /* decoded size */
  568. return dstSize;
  569. }
  570. }
  571. size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  572. {
  573. HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG);
  574. const BYTE* ip = (const BYTE*) cSrc;
  575. size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize);
  576. if (HUF_isError(hSize)) return hSize;
  577. if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
  578. ip += hSize;
  579. cSrcSize -= hSize;
  580. return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable);
  581. }
  582. /* ********************************/
  583. /* Generic decompression selector */
  584. /* ********************************/
  585. typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
  586. static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
  587. {
  588. /* single, double, quad */
  589. {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
  590. {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
  591. {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
  592. {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
  593. {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
  594. {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
  595. {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
  596. {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
  597. {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
  598. {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
  599. {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
  600. {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
  601. {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
  602. {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
  603. {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
  604. {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
  605. };
  606. typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
  607. size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
  608. {
  609. static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
  610. U32 Dtime[3]; /* decompression time estimation */
  611. /* validation checks */
  612. if (dstSize == 0) return ERROR(dstSize_tooSmall);
  613. if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
  614. if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
  615. if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
  616. /* decoder timing evaluation */
  617. { U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
  618. U32 const D256 = (U32)(dstSize >> 8);
  619. U32 n; for (n=0; n<3; n++)
  620. Dtime[n] = algoTime[Q][n].tableTime + (algoTime[Q][n].decode256Time * D256);
  621. }
  622. Dtime[1] += Dtime[1] >> 4; Dtime[2] += Dtime[2] >> 3; /* advantage to algorithms using less memory, for cache eviction */
  623. { U32 algoNb = 0;
  624. if (Dtime[1] < Dtime[0]) algoNb = 1;
  625. return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
  626. }
  627. }