zstd_ddict.c 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. /*
  2. * Copyright (c) Meta Platforms, Inc. and affiliates.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* zstd_ddict.c :
  11. * concentrates all logic that needs to know the internals of ZSTD_DDict object */
  12. /*-*******************************************************
  13. * Dependencies
  14. *********************************************************/
  15. #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */
  16. #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
  17. #include "../common/cpu.h" /* bmi2 */
  18. #include "../common/mem.h" /* low level memory routines */
  19. #define FSE_STATIC_LINKING_ONLY
  20. #include "../common/fse.h"
  21. #include "../common/huf.h"
  22. #include "zstd_decompress_internal.h"
  23. #include "zstd_ddict.h"
  24. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
  25. # include "../legacy/zstd_legacy.h"
  26. #endif
  27. /*-*******************************************************
  28. * Types
  29. *********************************************************/
  30. struct ZSTD_DDict_s {
  31. void* dictBuffer;
  32. const void* dictContent;
  33. size_t dictSize;
  34. ZSTD_entropyDTables_t entropy;
  35. U32 dictID;
  36. U32 entropyPresent;
  37. ZSTD_customMem cMem;
  38. }; /* typedef'd to ZSTD_DDict within "zstd.h" */
  39. const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
  40. {
  41. assert(ddict != NULL);
  42. return ddict->dictContent;
  43. }
  44. size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
  45. {
  46. assert(ddict != NULL);
  47. return ddict->dictSize;
  48. }
  49. void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
  50. {
  51. DEBUGLOG(4, "ZSTD_copyDDictParameters");
  52. assert(dctx != NULL);
  53. assert(ddict != NULL);
  54. dctx->dictID = ddict->dictID;
  55. dctx->prefixStart = ddict->dictContent;
  56. dctx->virtualStart = ddict->dictContent;
  57. dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
  58. dctx->previousDstEnd = dctx->dictEnd;
  59. #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  60. dctx->dictContentBeginForFuzzing = dctx->prefixStart;
  61. dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
  62. #endif
  63. if (ddict->entropyPresent) {
  64. dctx->litEntropy = 1;
  65. dctx->fseEntropy = 1;
  66. dctx->LLTptr = ddict->entropy.LLTable;
  67. dctx->MLTptr = ddict->entropy.MLTable;
  68. dctx->OFTptr = ddict->entropy.OFTable;
  69. dctx->HUFptr = ddict->entropy.hufTable;
  70. dctx->entropy.rep[0] = ddict->entropy.rep[0];
  71. dctx->entropy.rep[1] = ddict->entropy.rep[1];
  72. dctx->entropy.rep[2] = ddict->entropy.rep[2];
  73. } else {
  74. dctx->litEntropy = 0;
  75. dctx->fseEntropy = 0;
  76. }
  77. }
  78. static size_t
  79. ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
  80. ZSTD_dictContentType_e dictContentType)
  81. {
  82. ddict->dictID = 0;
  83. ddict->entropyPresent = 0;
  84. if (dictContentType == ZSTD_dct_rawContent) return 0;
  85. if (ddict->dictSize < 8) {
  86. if (dictContentType == ZSTD_dct_fullDict)
  87. return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
  88. return 0; /* pure content mode */
  89. }
  90. { U32 const magic = MEM_readLE32(ddict->dictContent);
  91. if (magic != ZSTD_MAGIC_DICTIONARY) {
  92. if (dictContentType == ZSTD_dct_fullDict)
  93. return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
  94. return 0; /* pure content mode */
  95. }
  96. }
  97. ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
  98. /* load entropy tables */
  99. RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
  100. &ddict->entropy, ddict->dictContent, ddict->dictSize)),
  101. dictionary_corrupted, "");
  102. ddict->entropyPresent = 1;
  103. return 0;
  104. }
  105. static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
  106. const void* dict, size_t dictSize,
  107. ZSTD_dictLoadMethod_e dictLoadMethod,
  108. ZSTD_dictContentType_e dictContentType)
  109. {
  110. if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
  111. ddict->dictBuffer = NULL;
  112. ddict->dictContent = dict;
  113. if (!dict) dictSize = 0;
  114. } else {
  115. void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
  116. ddict->dictBuffer = internalBuffer;
  117. ddict->dictContent = internalBuffer;
  118. if (!internalBuffer) return ERROR(memory_allocation);
  119. ZSTD_memcpy(internalBuffer, dict, dictSize);
  120. }
  121. ddict->dictSize = dictSize;
  122. ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */
  123. /* parse dictionary content */
  124. FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
  125. return 0;
  126. }
  127. ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
  128. ZSTD_dictLoadMethod_e dictLoadMethod,
  129. ZSTD_dictContentType_e dictContentType,
  130. ZSTD_customMem customMem)
  131. {
  132. if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
  133. { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
  134. if (ddict == NULL) return NULL;
  135. ddict->cMem = customMem;
  136. { size_t const initResult = ZSTD_initDDict_internal(ddict,
  137. dict, dictSize,
  138. dictLoadMethod, dictContentType);
  139. if (ZSTD_isError(initResult)) {
  140. ZSTD_freeDDict(ddict);
  141. return NULL;
  142. } }
  143. return ddict;
  144. }
  145. }
  146. /*! ZSTD_createDDict() :
  147. * Create a digested dictionary, to start decompression without startup delay.
  148. * `dict` content is copied inside DDict.
  149. * Consequently, `dict` can be released after `ZSTD_DDict` creation */
  150. ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
  151. {
  152. ZSTD_customMem const allocator = { NULL, NULL, NULL };
  153. return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
  154. }
  155. /*! ZSTD_createDDict_byReference() :
  156. * Create a digested dictionary, to start decompression without startup delay.
  157. * Dictionary content is simply referenced, it will be accessed during decompression.
  158. * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
  159. ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
  160. {
  161. ZSTD_customMem const allocator = { NULL, NULL, NULL };
  162. return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
  163. }
  164. const ZSTD_DDict* ZSTD_initStaticDDict(
  165. void* sBuffer, size_t sBufferSize,
  166. const void* dict, size_t dictSize,
  167. ZSTD_dictLoadMethod_e dictLoadMethod,
  168. ZSTD_dictContentType_e dictContentType)
  169. {
  170. size_t const neededSpace = sizeof(ZSTD_DDict)
  171. + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
  172. ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
  173. assert(sBuffer != NULL);
  174. assert(dict != NULL);
  175. if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
  176. if (sBufferSize < neededSpace) return NULL;
  177. if (dictLoadMethod == ZSTD_dlm_byCopy) {
  178. ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */
  179. dict = ddict+1;
  180. }
  181. if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
  182. dict, dictSize,
  183. ZSTD_dlm_byRef, dictContentType) ))
  184. return NULL;
  185. return ddict;
  186. }
  187. size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
  188. {
  189. if (ddict==NULL) return 0; /* support free on NULL */
  190. { ZSTD_customMem const cMem = ddict->cMem;
  191. ZSTD_customFree(ddict->dictBuffer, cMem);
  192. ZSTD_customFree(ddict, cMem);
  193. return 0;
  194. }
  195. }
  196. /*! ZSTD_estimateDDictSize() :
  197. * Estimate amount of memory that will be needed to create a dictionary for decompression.
  198. * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
  199. size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
  200. {
  201. return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
  202. }
  203. size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
  204. {
  205. if (ddict==NULL) return 0; /* support sizeof on NULL */
  206. return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
  207. }
  208. /*! ZSTD_getDictID_fromDDict() :
  209. * Provides the dictID of the dictionary loaded into `ddict`.
  210. * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
  211. * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
  212. unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
  213. {
  214. if (ddict==NULL) return 0;
  215. return ddict->dictID;
  216. }