index_decoder.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file index_decoder.c
  4. /// \brief Decodes the Index field
  5. //
  6. // Author: Lasse Collin
  7. //
  8. // This file has been put into the public domain.
  9. // You can do whatever you want with this file.
  10. //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "index_decoder.h"
  13. #include "check.h"
  14. typedef struct {
  15. enum {
  16. SEQ_INDICATOR,
  17. SEQ_COUNT,
  18. SEQ_MEMUSAGE,
  19. SEQ_UNPADDED,
  20. SEQ_UNCOMPRESSED,
  21. SEQ_PADDING_INIT,
  22. SEQ_PADDING,
  23. SEQ_CRC32,
  24. } sequence;
  25. /// Memory usage limit
  26. uint64_t memlimit;
  27. /// Target Index
  28. lzma_index *index;
  29. /// Pointer give by the application, which is set after
  30. /// successful decoding.
  31. lzma_index **index_ptr;
  32. /// Number of Records left to decode.
  33. lzma_vli count;
  34. /// The most recent Unpadded Size field
  35. lzma_vli unpadded_size;
  36. /// The most recent Uncompressed Size field
  37. lzma_vli uncompressed_size;
  38. /// Position in integers
  39. size_t pos;
  40. /// CRC32 of the List of Records field
  41. uint32_t crc32;
  42. } lzma_index_coder;
  43. static lzma_ret
  44. index_decode(void *coder_ptr, const lzma_allocator *allocator,
  45. const uint8_t *restrict in, size_t *restrict in_pos,
  46. size_t in_size,
  47. uint8_t *restrict out lzma_attribute((__unused__)),
  48. size_t *restrict out_pos lzma_attribute((__unused__)),
  49. size_t out_size lzma_attribute((__unused__)),
  50. lzma_action action lzma_attribute((__unused__)))
  51. {
  52. lzma_index_coder *coder = coder_ptr;
  53. // Similar optimization as in index_encoder.c
  54. const size_t in_start = *in_pos;
  55. lzma_ret ret = LZMA_OK;
  56. while (*in_pos < in_size)
  57. switch (coder->sequence) {
  58. case SEQ_INDICATOR:
  59. // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
  60. // LZMA_FORMAT_ERROR, because a typical usage case for Index
  61. // decoder is when parsing the Stream backwards. If seeking
  62. // backward from the Stream Footer gives us something that
  63. // doesn't begin with Index Indicator, the file is considered
  64. // corrupt, not "programming error" or "unrecognized file
  65. // format". One could argue that the application should
  66. // verify the Index Indicator before trying to decode the
  67. // Index, but well, I suppose it is simpler this way.
  68. if (in[(*in_pos)++] != 0x00)
  69. return LZMA_DATA_ERROR;
  70. coder->sequence = SEQ_COUNT;
  71. break;
  72. case SEQ_COUNT:
  73. ret = lzma_vli_decode(&coder->count, &coder->pos,
  74. in, in_pos, in_size);
  75. if (ret != LZMA_STREAM_END)
  76. goto out;
  77. coder->pos = 0;
  78. coder->sequence = SEQ_MEMUSAGE;
  79. // Fall through
  80. case SEQ_MEMUSAGE:
  81. if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
  82. ret = LZMA_MEMLIMIT_ERROR;
  83. goto out;
  84. }
  85. // Tell the Index handling code how many Records this
  86. // Index has to allow it to allocate memory more efficiently.
  87. lzma_index_prealloc(coder->index, coder->count);
  88. ret = LZMA_OK;
  89. coder->sequence = coder->count == 0
  90. ? SEQ_PADDING_INIT : SEQ_UNPADDED;
  91. break;
  92. case SEQ_UNPADDED:
  93. case SEQ_UNCOMPRESSED: {
  94. lzma_vli *size = coder->sequence == SEQ_UNPADDED
  95. ? &coder->unpadded_size
  96. : &coder->uncompressed_size;
  97. ret = lzma_vli_decode(size, &coder->pos,
  98. in, in_pos, in_size);
  99. if (ret != LZMA_STREAM_END)
  100. goto out;
  101. ret = LZMA_OK;
  102. coder->pos = 0;
  103. if (coder->sequence == SEQ_UNPADDED) {
  104. // Validate that encoded Unpadded Size isn't too small
  105. // or too big.
  106. if (coder->unpadded_size < UNPADDED_SIZE_MIN
  107. || coder->unpadded_size
  108. > UNPADDED_SIZE_MAX)
  109. return LZMA_DATA_ERROR;
  110. coder->sequence = SEQ_UNCOMPRESSED;
  111. } else {
  112. // Add the decoded Record to the Index.
  113. return_if_error(lzma_index_append(
  114. coder->index, allocator,
  115. coder->unpadded_size,
  116. coder->uncompressed_size));
  117. // Check if this was the last Record.
  118. coder->sequence = --coder->count == 0
  119. ? SEQ_PADDING_INIT
  120. : SEQ_UNPADDED;
  121. }
  122. break;
  123. }
  124. case SEQ_PADDING_INIT:
  125. coder->pos = lzma_index_padding_size(coder->index);
  126. coder->sequence = SEQ_PADDING;
  127. // Fall through
  128. case SEQ_PADDING:
  129. if (coder->pos > 0) {
  130. --coder->pos;
  131. if (in[(*in_pos)++] != 0x00)
  132. return LZMA_DATA_ERROR;
  133. break;
  134. }
  135. // Finish the CRC32 calculation.
  136. coder->crc32 = lzma_crc32(in + in_start,
  137. *in_pos - in_start, coder->crc32);
  138. coder->sequence = SEQ_CRC32;
  139. // Fall through
  140. case SEQ_CRC32:
  141. do {
  142. if (*in_pos == in_size)
  143. return LZMA_OK;
  144. if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
  145. != in[(*in_pos)++]) {
  146. #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  147. return LZMA_DATA_ERROR;
  148. #endif
  149. }
  150. } while (++coder->pos < 4);
  151. // Decoding was successful, now we can let the application
  152. // see the decoded Index.
  153. *coder->index_ptr = coder->index;
  154. // Make index NULL so we don't free it unintentionally.
  155. coder->index = NULL;
  156. return LZMA_STREAM_END;
  157. default:
  158. assert(0);
  159. return LZMA_PROG_ERROR;
  160. }
  161. out:
  162. // Update the CRC32,
  163. coder->crc32 = lzma_crc32(in + in_start,
  164. *in_pos - in_start, coder->crc32);
  165. return ret;
  166. }
  167. static void
  168. index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
  169. {
  170. lzma_index_coder *coder = coder_ptr;
  171. lzma_index_end(coder->index, allocator);
  172. lzma_free(coder, allocator);
  173. return;
  174. }
  175. static lzma_ret
  176. index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
  177. uint64_t *old_memlimit, uint64_t new_memlimit)
  178. {
  179. lzma_index_coder *coder = coder_ptr;
  180. *memusage = lzma_index_memusage(1, coder->count);
  181. *old_memlimit = coder->memlimit;
  182. if (new_memlimit != 0) {
  183. if (new_memlimit < *memusage)
  184. return LZMA_MEMLIMIT_ERROR;
  185. coder->memlimit = new_memlimit;
  186. }
  187. return LZMA_OK;
  188. }
  189. static lzma_ret
  190. index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
  191. lzma_index **i, uint64_t memlimit)
  192. {
  193. // Remember the pointer given by the application. We will set it
  194. // to point to the decoded Index only if decoding is successful.
  195. // Before that, keep it NULL so that applications can always safely
  196. // pass it to lzma_index_end() no matter did decoding succeed or not.
  197. coder->index_ptr = i;
  198. *i = NULL;
  199. // We always allocate a new lzma_index.
  200. coder->index = lzma_index_init(allocator);
  201. if (coder->index == NULL)
  202. return LZMA_MEM_ERROR;
  203. // Initialize the rest.
  204. coder->sequence = SEQ_INDICATOR;
  205. coder->memlimit = my_max(1, memlimit);
  206. coder->count = 0; // Needs to be initialized due to _memconfig().
  207. coder->pos = 0;
  208. coder->crc32 = 0;
  209. return LZMA_OK;
  210. }
  211. extern lzma_ret
  212. lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
  213. lzma_index **i, uint64_t memlimit)
  214. {
  215. lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
  216. if (i == NULL)
  217. return LZMA_PROG_ERROR;
  218. lzma_index_coder *coder = next->coder;
  219. if (coder == NULL) {
  220. coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
  221. if (coder == NULL)
  222. return LZMA_MEM_ERROR;
  223. next->coder = coder;
  224. next->code = &index_decode;
  225. next->end = &index_decoder_end;
  226. next->memconfig = &index_decoder_memconfig;
  227. coder->index = NULL;
  228. } else {
  229. lzma_index_end(coder->index, allocator);
  230. }
  231. return index_decoder_reset(coder, allocator, i, memlimit);
  232. }
  233. extern LZMA_API(lzma_ret)
  234. lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
  235. {
  236. lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
  237. strm->internal->supported_actions[LZMA_RUN] = true;
  238. strm->internal->supported_actions[LZMA_FINISH] = true;
  239. return LZMA_OK;
  240. }
  241. extern LZMA_API(lzma_ret)
  242. lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
  243. const lzma_allocator *allocator,
  244. const uint8_t *in, size_t *in_pos, size_t in_size)
  245. {
  246. // Sanity checks
  247. if (i == NULL || memlimit == NULL
  248. || in == NULL || in_pos == NULL || *in_pos > in_size)
  249. return LZMA_PROG_ERROR;
  250. // Initialize the decoder.
  251. lzma_index_coder coder;
  252. return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
  253. // Store the input start position so that we can restore it in case
  254. // of an error.
  255. const size_t in_start = *in_pos;
  256. // Do the actual decoding.
  257. lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
  258. NULL, NULL, 0, LZMA_RUN);
  259. if (ret == LZMA_STREAM_END) {
  260. ret = LZMA_OK;
  261. } else {
  262. // Something went wrong, free the Index structure and restore
  263. // the input position.
  264. lzma_index_end(coder.index, allocator);
  265. *in_pos = in_start;
  266. if (ret == LZMA_OK) {
  267. // The input is truncated or otherwise corrupt.
  268. // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
  269. // like lzma_vli_decode() does in single-call mode.
  270. ret = LZMA_DATA_ERROR;
  271. } else if (ret == LZMA_MEMLIMIT_ERROR) {
  272. // Tell the caller how much memory would have
  273. // been needed.
  274. *memlimit = lzma_index_memusage(1, coder.count);
  275. }
  276. }
  277. return ret;
  278. }