index_decoder.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. // SPDX-License-Identifier: 0BSD
  2. ///////////////////////////////////////////////////////////////////////////////
  3. //
  4. /// \file index_decoder.c
  5. /// \brief Decodes the Index field
  6. //
  7. // Author: Lasse Collin
  8. //
  9. ///////////////////////////////////////////////////////////////////////////////
  10. #include "index_decoder.h"
  11. #include "check.h"
  12. typedef struct {
  13. enum {
  14. SEQ_INDICATOR,
  15. SEQ_COUNT,
  16. SEQ_MEMUSAGE,
  17. SEQ_UNPADDED,
  18. SEQ_UNCOMPRESSED,
  19. SEQ_PADDING_INIT,
  20. SEQ_PADDING,
  21. SEQ_CRC32,
  22. } sequence;
  23. /// Memory usage limit
  24. uint64_t memlimit;
  25. /// Target Index
  26. lzma_index *index;
  27. /// Pointer give by the application, which is set after
  28. /// successful decoding.
  29. lzma_index **index_ptr;
  30. /// Number of Records left to decode.
  31. lzma_vli count;
  32. /// The most recent Unpadded Size field
  33. lzma_vli unpadded_size;
  34. /// The most recent Uncompressed Size field
  35. lzma_vli uncompressed_size;
  36. /// Position in integers
  37. size_t pos;
  38. /// CRC32 of the List of Records field
  39. uint32_t crc32;
  40. } lzma_index_coder;
  41. static lzma_ret
  42. index_decode(void *coder_ptr, const lzma_allocator *allocator,
  43. const uint8_t *restrict in, size_t *restrict in_pos,
  44. size_t in_size,
  45. uint8_t *restrict out lzma_attribute((__unused__)),
  46. size_t *restrict out_pos lzma_attribute((__unused__)),
  47. size_t out_size lzma_attribute((__unused__)),
  48. lzma_action action lzma_attribute((__unused__)))
  49. {
  50. lzma_index_coder *coder = coder_ptr;
  51. // Similar optimization as in index_encoder.c
  52. const size_t in_start = *in_pos;
  53. lzma_ret ret = LZMA_OK;
  54. while (*in_pos < in_size)
  55. switch (coder->sequence) {
  56. case SEQ_INDICATOR:
  57. // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
  58. // LZMA_FORMAT_ERROR, because a typical usage case for Index
  59. // decoder is when parsing the Stream backwards. If seeking
  60. // backward from the Stream Footer gives us something that
  61. // doesn't begin with Index Indicator, the file is considered
  62. // corrupt, not "programming error" or "unrecognized file
  63. // format". One could argue that the application should
  64. // verify the Index Indicator before trying to decode the
  65. // Index, but well, I suppose it is simpler this way.
  66. if (in[(*in_pos)++] != INDEX_INDICATOR)
  67. return LZMA_DATA_ERROR;
  68. coder->sequence = SEQ_COUNT;
  69. break;
  70. case SEQ_COUNT:
  71. ret = lzma_vli_decode(&coder->count, &coder->pos,
  72. in, in_pos, in_size);
  73. if (ret != LZMA_STREAM_END)
  74. goto out;
  75. coder->pos = 0;
  76. coder->sequence = SEQ_MEMUSAGE;
  77. // Fall through
  78. case SEQ_MEMUSAGE:
  79. if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
  80. ret = LZMA_MEMLIMIT_ERROR;
  81. goto out;
  82. }
  83. // Tell the Index handling code how many Records this
  84. // Index has to allow it to allocate memory more efficiently.
  85. lzma_index_prealloc(coder->index, coder->count);
  86. ret = LZMA_OK;
  87. coder->sequence = coder->count == 0
  88. ? SEQ_PADDING_INIT : SEQ_UNPADDED;
  89. break;
  90. case SEQ_UNPADDED:
  91. case SEQ_UNCOMPRESSED: {
  92. lzma_vli *size = coder->sequence == SEQ_UNPADDED
  93. ? &coder->unpadded_size
  94. : &coder->uncompressed_size;
  95. ret = lzma_vli_decode(size, &coder->pos,
  96. in, in_pos, in_size);
  97. if (ret != LZMA_STREAM_END)
  98. goto out;
  99. ret = LZMA_OK;
  100. coder->pos = 0;
  101. if (coder->sequence == SEQ_UNPADDED) {
  102. // Validate that encoded Unpadded Size isn't too small
  103. // or too big.
  104. if (coder->unpadded_size < UNPADDED_SIZE_MIN
  105. || coder->unpadded_size
  106. > UNPADDED_SIZE_MAX)
  107. return LZMA_DATA_ERROR;
  108. coder->sequence = SEQ_UNCOMPRESSED;
  109. } else {
  110. // Add the decoded Record to the Index.
  111. return_if_error(lzma_index_append(
  112. coder->index, allocator,
  113. coder->unpadded_size,
  114. coder->uncompressed_size));
  115. // Check if this was the last Record.
  116. coder->sequence = --coder->count == 0
  117. ? SEQ_PADDING_INIT
  118. : SEQ_UNPADDED;
  119. }
  120. break;
  121. }
  122. case SEQ_PADDING_INIT:
  123. coder->pos = lzma_index_padding_size(coder->index);
  124. coder->sequence = SEQ_PADDING;
  125. // Fall through
  126. case SEQ_PADDING:
  127. if (coder->pos > 0) {
  128. --coder->pos;
  129. if (in[(*in_pos)++] != 0x00)
  130. return LZMA_DATA_ERROR;
  131. break;
  132. }
  133. // Finish the CRC32 calculation.
  134. coder->crc32 = lzma_crc32(in + in_start,
  135. *in_pos - in_start, coder->crc32);
  136. coder->sequence = SEQ_CRC32;
  137. // Fall through
  138. case SEQ_CRC32:
  139. do {
  140. if (*in_pos == in_size)
  141. return LZMA_OK;
  142. if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
  143. != in[(*in_pos)++]) {
  144. #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  145. return LZMA_DATA_ERROR;
  146. #endif
  147. }
  148. } while (++coder->pos < 4);
  149. // Decoding was successful, now we can let the application
  150. // see the decoded Index.
  151. *coder->index_ptr = coder->index;
  152. // Make index NULL so we don't free it unintentionally.
  153. coder->index = NULL;
  154. return LZMA_STREAM_END;
  155. default:
  156. assert(0);
  157. return LZMA_PROG_ERROR;
  158. }
  159. out:
  160. // Update the CRC32.
  161. //
  162. // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
  163. // In such a case we had no input and thus in_used == 0.
  164. {
  165. const size_t in_used = *in_pos - in_start;
  166. if (in_used > 0)
  167. coder->crc32 = lzma_crc32(in + in_start,
  168. in_used, coder->crc32);
  169. }
  170. return ret;
  171. }
  172. static void
  173. index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
  174. {
  175. lzma_index_coder *coder = coder_ptr;
  176. lzma_index_end(coder->index, allocator);
  177. lzma_free(coder, allocator);
  178. return;
  179. }
  180. static lzma_ret
  181. index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
  182. uint64_t *old_memlimit, uint64_t new_memlimit)
  183. {
  184. lzma_index_coder *coder = coder_ptr;
  185. *memusage = lzma_index_memusage(1, coder->count);
  186. *old_memlimit = coder->memlimit;
  187. if (new_memlimit != 0) {
  188. if (new_memlimit < *memusage)
  189. return LZMA_MEMLIMIT_ERROR;
  190. coder->memlimit = new_memlimit;
  191. }
  192. return LZMA_OK;
  193. }
  194. static lzma_ret
  195. index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
  196. lzma_index **i, uint64_t memlimit)
  197. {
  198. // Remember the pointer given by the application. We will set it
  199. // to point to the decoded Index only if decoding is successful.
  200. // Before that, keep it NULL so that applications can always safely
  201. // pass it to lzma_index_end() no matter did decoding succeed or not.
  202. coder->index_ptr = i;
  203. *i = NULL;
  204. // We always allocate a new lzma_index.
  205. coder->index = lzma_index_init(allocator);
  206. if (coder->index == NULL)
  207. return LZMA_MEM_ERROR;
  208. // Initialize the rest.
  209. coder->sequence = SEQ_INDICATOR;
  210. coder->memlimit = my_max(1, memlimit);
  211. coder->count = 0; // Needs to be initialized due to _memconfig().
  212. coder->pos = 0;
  213. coder->crc32 = 0;
  214. return LZMA_OK;
  215. }
  216. extern lzma_ret
  217. lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
  218. lzma_index **i, uint64_t memlimit)
  219. {
  220. lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
  221. if (i == NULL)
  222. return LZMA_PROG_ERROR;
  223. lzma_index_coder *coder = next->coder;
  224. if (coder == NULL) {
  225. coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
  226. if (coder == NULL)
  227. return LZMA_MEM_ERROR;
  228. next->coder = coder;
  229. next->code = &index_decode;
  230. next->end = &index_decoder_end;
  231. next->memconfig = &index_decoder_memconfig;
  232. coder->index = NULL;
  233. } else {
  234. lzma_index_end(coder->index, allocator);
  235. }
  236. return index_decoder_reset(coder, allocator, i, memlimit);
  237. }
  238. extern LZMA_API(lzma_ret)
  239. lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
  240. {
  241. // If i isn't NULL, *i must always be initialized due to
  242. // the wording in the API docs. This way it is initialized
  243. // if we return LZMA_PROG_ERROR due to strm == NULL.
  244. if (i != NULL)
  245. *i = NULL;
  246. lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
  247. strm->internal->supported_actions[LZMA_RUN] = true;
  248. strm->internal->supported_actions[LZMA_FINISH] = true;
  249. return LZMA_OK;
  250. }
  251. extern LZMA_API(lzma_ret)
  252. lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
  253. const lzma_allocator *allocator,
  254. const uint8_t *in, size_t *in_pos, size_t in_size)
  255. {
  256. // If i isn't NULL, *i must always be initialized due to
  257. // the wording in the API docs.
  258. if (i != NULL)
  259. *i = NULL;
  260. // Sanity checks
  261. if (i == NULL || memlimit == NULL
  262. || in == NULL || in_pos == NULL || *in_pos > in_size)
  263. return LZMA_PROG_ERROR;
  264. // Initialize the decoder.
  265. lzma_index_coder coder;
  266. return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
  267. // Store the input start position so that we can restore it in case
  268. // of an error.
  269. const size_t in_start = *in_pos;
  270. // Do the actual decoding.
  271. lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
  272. NULL, NULL, 0, LZMA_RUN);
  273. if (ret == LZMA_STREAM_END) {
  274. ret = LZMA_OK;
  275. } else {
  276. // Something went wrong, free the Index structure and restore
  277. // the input position.
  278. lzma_index_end(coder.index, allocator);
  279. *in_pos = in_start;
  280. if (ret == LZMA_OK) {
  281. // The input is truncated or otherwise corrupt.
  282. // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
  283. // like lzma_vli_decode() does in single-call mode.
  284. ret = LZMA_DATA_ERROR;
  285. } else if (ret == LZMA_MEMLIMIT_ERROR) {
  286. // Tell the caller how much memory would have
  287. // been needed.
  288. *memlimit = lzma_index_memusage(1, coder.count);
  289. }
  290. }
  291. return ret;
  292. }