fse_decompress.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. /* ******************************************************************
  2. * FSE : Finite State Entropy decoder
  3. * Copyright (c) Yann Collet, Facebook, Inc.
  4. *
  5. * You can contact the author at :
  6. * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
  7. * - Public forum : https://groups.google.com/forum/#!forum/lz4c
  8. *
  9. * This source code is licensed under both the BSD-style license (found in the
  10. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  11. * in the COPYING file in the root directory of this source tree).
  12. * You may select, at your option, one of the above-listed licenses.
  13. ****************************************************************** */
  14. /* **************************************************************
  15. * Includes
  16. ****************************************************************/
  17. #include "debug.h" /* assert */
  18. #include "bitstream.h"
  19. #include "compiler.h"
  20. #define FSE_STATIC_LINKING_ONLY
  21. #include "fse.h"
  22. #include "error_private.h"
  23. #define ZSTD_DEPS_NEED_MALLOC
  24. #include "zstd_deps.h"
  25. /* **************************************************************
  26. * Error Management
  27. ****************************************************************/
  28. #define FSE_isError ERR_isError
  29. #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
  30. /* **************************************************************
  31. * Templates
  32. ****************************************************************/
  33. /*
  34. designed to be included
  35. for type-specific functions (template emulation in C)
  36. Objective is to write these functions only once, for improved maintenance
  37. */
  38. /* safety checks */
  39. #ifndef FSE_FUNCTION_EXTENSION
  40. # error "FSE_FUNCTION_EXTENSION must be defined"
  41. #endif
  42. #ifndef FSE_FUNCTION_TYPE
  43. # error "FSE_FUNCTION_TYPE must be defined"
  44. #endif
  45. /* Function names */
  46. #define FSE_CAT(X,Y) X##Y
  47. #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
  48. #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
  49. /* Function templates */
  50. FSE_DTable* FSE_createDTable (unsigned tableLog)
  51. {
  52. if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
  53. return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
  54. }
  55. void FSE_freeDTable (FSE_DTable* dt)
  56. {
  57. ZSTD_free(dt);
  58. }
  59. static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
  60. {
  61. void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
  62. FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
  63. U16* symbolNext = (U16*)workSpace;
  64. BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
  65. U32 const maxSV1 = maxSymbolValue + 1;
  66. U32 const tableSize = 1 << tableLog;
  67. U32 highThreshold = tableSize-1;
  68. /* Sanity Checks */
  69. if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
  70. if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
  71. if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
  72. /* Init, lay down lowprob symbols */
  73. { FSE_DTableHeader DTableH;
  74. DTableH.tableLog = (U16)tableLog;
  75. DTableH.fastMode = 1;
  76. { S16 const largeLimit= (S16)(1 << (tableLog-1));
  77. U32 s;
  78. for (s=0; s<maxSV1; s++) {
  79. if (normalizedCounter[s]==-1) {
  80. tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
  81. symbolNext[s] = 1;
  82. } else {
  83. if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
  84. symbolNext[s] = normalizedCounter[s];
  85. } } }
  86. ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
  87. }
  88. /* Spread symbols */
  89. if (highThreshold == tableSize - 1) {
  90. size_t const tableMask = tableSize-1;
  91. size_t const step = FSE_TABLESTEP(tableSize);
  92. /* First lay down the symbols in order.
  93. * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
  94. * misses since small blocks generally have small table logs, so nearly
  95. * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
  96. * our buffer to handle the over-write.
  97. */
  98. {
  99. U64 const add = 0x0101010101010101ull;
  100. size_t pos = 0;
  101. U64 sv = 0;
  102. U32 s;
  103. for (s=0; s<maxSV1; ++s, sv += add) {
  104. int i;
  105. int const n = normalizedCounter[s];
  106. MEM_write64(spread + pos, sv);
  107. for (i = 8; i < n; i += 8) {
  108. MEM_write64(spread + pos + i, sv);
  109. }
  110. pos += n;
  111. }
  112. }
  113. /* Now we spread those positions across the table.
  114. * The benefit of doing it in two stages is that we avoid the the
  115. * variable size inner loop, which caused lots of branch misses.
  116. * Now we can run through all the positions without any branch misses.
  117. * We unroll the loop twice, since that is what emperically worked best.
  118. */
  119. {
  120. size_t position = 0;
  121. size_t s;
  122. size_t const unroll = 2;
  123. assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
  124. for (s = 0; s < (size_t)tableSize; s += unroll) {
  125. size_t u;
  126. for (u = 0; u < unroll; ++u) {
  127. size_t const uPosition = (position + (u * step)) & tableMask;
  128. tableDecode[uPosition].symbol = spread[s + u];
  129. }
  130. position = (position + (unroll * step)) & tableMask;
  131. }
  132. assert(position == 0);
  133. }
  134. } else {
  135. U32 const tableMask = tableSize-1;
  136. U32 const step = FSE_TABLESTEP(tableSize);
  137. U32 s, position = 0;
  138. for (s=0; s<maxSV1; s++) {
  139. int i;
  140. for (i=0; i<normalizedCounter[s]; i++) {
  141. tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
  142. position = (position + step) & tableMask;
  143. while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
  144. } }
  145. if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
  146. }
  147. /* Build Decoding table */
  148. { U32 u;
  149. for (u=0; u<tableSize; u++) {
  150. FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
  151. U32 const nextState = symbolNext[symbol]++;
  152. tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
  153. tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
  154. } }
  155. return 0;
  156. }
  157. size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
  158. {
  159. return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
  160. }
  161. #ifndef FSE_COMMONDEFS_ONLY
  162. /*-*******************************************************
  163. * Decompression (Byte symbols)
  164. *********************************************************/
  165. size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
  166. {
  167. void* ptr = dt;
  168. FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
  169. void* dPtr = dt + 1;
  170. FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
  171. DTableH->tableLog = 0;
  172. DTableH->fastMode = 0;
  173. cell->newState = 0;
  174. cell->symbol = symbolValue;
  175. cell->nbBits = 0;
  176. return 0;
  177. }
  178. size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
  179. {
  180. void* ptr = dt;
  181. FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
  182. void* dPtr = dt + 1;
  183. FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
  184. const unsigned tableSize = 1 << nbBits;
  185. const unsigned tableMask = tableSize - 1;
  186. const unsigned maxSV1 = tableMask+1;
  187. unsigned s;
  188. /* Sanity checks */
  189. if (nbBits < 1) return ERROR(GENERIC); /* min size */
  190. /* Build Decoding Table */
  191. DTableH->tableLog = (U16)nbBits;
  192. DTableH->fastMode = 1;
  193. for (s=0; s<maxSV1; s++) {
  194. dinfo[s].newState = 0;
  195. dinfo[s].symbol = (BYTE)s;
  196. dinfo[s].nbBits = (BYTE)nbBits;
  197. }
  198. return 0;
  199. }
  200. FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
  201. void* dst, size_t maxDstSize,
  202. const void* cSrc, size_t cSrcSize,
  203. const FSE_DTable* dt, const unsigned fast)
  204. {
  205. BYTE* const ostart = (BYTE*) dst;
  206. BYTE* op = ostart;
  207. BYTE* const omax = op + maxDstSize;
  208. BYTE* const olimit = omax-3;
  209. BIT_DStream_t bitD;
  210. FSE_DState_t state1;
  211. FSE_DState_t state2;
  212. /* Init */
  213. CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
  214. FSE_initDState(&state1, &bitD, dt);
  215. FSE_initDState(&state2, &bitD, dt);
  216. #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
  217. /* 4 symbols per loop */
  218. for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
  219. op[0] = FSE_GETSYMBOL(&state1);
  220. if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
  221. BIT_reloadDStream(&bitD);
  222. op[1] = FSE_GETSYMBOL(&state2);
  223. if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
  224. { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
  225. op[2] = FSE_GETSYMBOL(&state1);
  226. if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
  227. BIT_reloadDStream(&bitD);
  228. op[3] = FSE_GETSYMBOL(&state2);
  229. }
  230. /* tail */
  231. /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
  232. while (1) {
  233. if (op>(omax-2)) return ERROR(dstSize_tooSmall);
  234. *op++ = FSE_GETSYMBOL(&state1);
  235. if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
  236. *op++ = FSE_GETSYMBOL(&state2);
  237. break;
  238. }
  239. if (op>(omax-2)) return ERROR(dstSize_tooSmall);
  240. *op++ = FSE_GETSYMBOL(&state2);
  241. if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
  242. *op++ = FSE_GETSYMBOL(&state1);
  243. break;
  244. } }
  245. return op-ostart;
  246. }
  247. size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
  248. const void* cSrc, size_t cSrcSize,
  249. const FSE_DTable* dt)
  250. {
  251. const void* ptr = dt;
  252. const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
  253. const U32 fastMode = DTableH->fastMode;
  254. /* select fast mode (static) */
  255. if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
  256. return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
  257. }
  258. size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
  259. {
  260. return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
  261. }
  262. typedef struct {
  263. short ncount[FSE_MAX_SYMBOL_VALUE + 1];
  264. FSE_DTable dtable[1]; /* Dynamically sized */
  265. } FSE_DecompressWksp;
  266. FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
  267. void* dst, size_t dstCapacity,
  268. const void* cSrc, size_t cSrcSize,
  269. unsigned maxLog, void* workSpace, size_t wkspSize,
  270. int bmi2)
  271. {
  272. const BYTE* const istart = (const BYTE*)cSrc;
  273. const BYTE* ip = istart;
  274. unsigned tableLog;
  275. unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
  276. FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
  277. DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
  278. if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
  279. /* normal FSE decoding mode */
  280. {
  281. size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
  282. if (FSE_isError(NCountLength)) return NCountLength;
  283. if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
  284. assert(NCountLength <= cSrcSize);
  285. ip += NCountLength;
  286. cSrcSize -= NCountLength;
  287. }
  288. if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
  289. workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
  290. wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
  291. CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
  292. {
  293. const void* ptr = wksp->dtable;
  294. const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
  295. const U32 fastMode = DTableH->fastMode;
  296. /* select fast mode (static) */
  297. if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
  298. return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
  299. }
  300. }
  301. /* Avoids the FORCE_INLINE of the _body() function. */
  302. static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
  303. {
  304. return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
  305. }
  306. #if DYNAMIC_BMI2
  307. BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
  308. {
  309. return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
  310. }
  311. #endif
  312. size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
  313. {
  314. #if DYNAMIC_BMI2
  315. if (bmi2) {
  316. return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
  317. }
  318. #endif
  319. (void)bmi2;
  320. return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
  321. }
  322. typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
  323. #ifndef ZSTD_NO_UNUSED_FUNCTIONS
  324. size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) {
  325. U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)];
  326. return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp));
  327. }
  328. size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize)
  329. {
  330. /* Static analyzer seems unable to understand this table will be properly initialized later */
  331. U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
  332. return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp));
  333. }
  334. #endif
  335. #endif /* FSE_COMMONDEFS_ONLY */