stream_decoder.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. // SPDX-License-Identifier: 0BSD
  2. ///////////////////////////////////////////////////////////////////////////////
  3. //
  4. /// \file stream_decoder.c
  5. /// \brief Decodes .xz Streams
  6. //
  7. // Author: Lasse Collin
  8. //
  9. ///////////////////////////////////////////////////////////////////////////////
  10. #include "stream_decoder.h"
  11. #include "block_decoder.h"
  12. #include "index.h"
  13. typedef struct {
  14. enum {
  15. SEQ_STREAM_HEADER,
  16. SEQ_BLOCK_HEADER,
  17. SEQ_BLOCK_INIT,
  18. SEQ_BLOCK_RUN,
  19. SEQ_INDEX,
  20. SEQ_STREAM_FOOTER,
  21. SEQ_STREAM_PADDING,
  22. } sequence;
  23. /// Block decoder
  24. lzma_next_coder block_decoder;
  25. /// Block options decoded by the Block Header decoder and used by
  26. /// the Block decoder.
  27. lzma_block block_options;
  28. /// Stream Flags from Stream Header
  29. lzma_stream_flags stream_flags;
  30. /// Index is hashed so that it can be compared to the sizes of Blocks
  31. /// with O(1) memory usage.
  32. lzma_index_hash *index_hash;
  33. /// Memory usage limit
  34. uint64_t memlimit;
  35. /// Amount of memory actually needed (only an estimate)
  36. uint64_t memusage;
  37. /// If true, LZMA_NO_CHECK is returned if the Stream has
  38. /// no integrity check.
  39. bool tell_no_check;
  40. /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
  41. /// an integrity check that isn't supported by this liblzma build.
  42. bool tell_unsupported_check;
  43. /// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
  44. bool tell_any_check;
  45. /// If true, we will tell the Block decoder to skip calculating
  46. /// and verifying the integrity check.
  47. bool ignore_check;
  48. /// If true, we will decode concatenated Streams that possibly have
  49. /// Stream Padding between or after them. LZMA_STREAM_END is returned
  50. /// once the application isn't giving us any new input (LZMA_FINISH),
  51. /// and we aren't in the middle of a Stream, and possible
  52. /// Stream Padding is a multiple of four bytes.
  53. bool concatenated;
  54. /// When decoding concatenated Streams, this is true as long as we
  55. /// are decoding the first Stream. This is needed to avoid misleading
  56. /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
  57. /// bytes.
  58. bool first_stream;
  59. /// Write position in buffer[] and position in Stream Padding
  60. size_t pos;
  61. /// Buffer to hold Stream Header, Block Header, and Stream Footer.
  62. /// Block Header has biggest maximum size.
  63. uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
  64. } lzma_stream_coder;
  65. static lzma_ret
  66. stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator)
  67. {
  68. // Initialize the Index hash used to verify the Index.
  69. coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
  70. if (coder->index_hash == NULL)
  71. return LZMA_MEM_ERROR;
  72. // Reset the rest of the variables.
  73. coder->sequence = SEQ_STREAM_HEADER;
  74. coder->pos = 0;
  75. return LZMA_OK;
  76. }
  77. static lzma_ret
  78. stream_decode(void *coder_ptr, const lzma_allocator *allocator,
  79. const uint8_t *restrict in, size_t *restrict in_pos,
  80. size_t in_size, uint8_t *restrict out,
  81. size_t *restrict out_pos, size_t out_size, lzma_action action)
  82. {
  83. lzma_stream_coder *coder = coder_ptr;
  84. // When decoding the actual Block, it may be able to produce more
  85. // output even if we don't give it any new input.
  86. while (true)
  87. switch (coder->sequence) {
  88. case SEQ_STREAM_HEADER: {
  89. // Copy the Stream Header to the internal buffer.
  90. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
  91. LZMA_STREAM_HEADER_SIZE);
  92. // Return if we didn't get the whole Stream Header yet.
  93. if (coder->pos < LZMA_STREAM_HEADER_SIZE)
  94. return LZMA_OK;
  95. coder->pos = 0;
  96. // Decode the Stream Header.
  97. const lzma_ret ret = lzma_stream_header_decode(
  98. &coder->stream_flags, coder->buffer);
  99. if (ret != LZMA_OK)
  100. return ret == LZMA_FORMAT_ERROR && !coder->first_stream
  101. ? LZMA_DATA_ERROR : ret;
  102. // If we are decoding concatenated Streams, and the later
  103. // Streams have invalid Header Magic Bytes, we give
  104. // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
  105. coder->first_stream = false;
  106. // Copy the type of the Check so that Block Header and Block
  107. // decoders see it.
  108. coder->block_options.check = coder->stream_flags.check;
  109. // Even if we return LZMA_*_CHECK below, we want
  110. // to continue from Block Header decoding.
  111. coder->sequence = SEQ_BLOCK_HEADER;
  112. // Detect if there's no integrity check or if it is
  113. // unsupported if those were requested by the application.
  114. if (coder->tell_no_check && coder->stream_flags.check
  115. == LZMA_CHECK_NONE)
  116. return LZMA_NO_CHECK;
  117. if (coder->tell_unsupported_check
  118. && !lzma_check_is_supported(
  119. coder->stream_flags.check))
  120. return LZMA_UNSUPPORTED_CHECK;
  121. if (coder->tell_any_check)
  122. return LZMA_GET_CHECK;
  123. }
  124. // Fall through
  125. case SEQ_BLOCK_HEADER: {
  126. if (*in_pos >= in_size)
  127. return LZMA_OK;
  128. if (coder->pos == 0) {
  129. // Detect if it's Index.
  130. if (in[*in_pos] == INDEX_INDICATOR) {
  131. coder->sequence = SEQ_INDEX;
  132. break;
  133. }
  134. // Calculate the size of the Block Header. Note that
  135. // Block Header decoder wants to see this byte too
  136. // so don't advance *in_pos.
  137. coder->block_options.header_size
  138. = lzma_block_header_size_decode(
  139. in[*in_pos]);
  140. }
  141. // Copy the Block Header to the internal buffer.
  142. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
  143. coder->block_options.header_size);
  144. // Return if we didn't get the whole Block Header yet.
  145. if (coder->pos < coder->block_options.header_size)
  146. return LZMA_OK;
  147. coder->pos = 0;
  148. coder->sequence = SEQ_BLOCK_INIT;
  149. }
  150. // Fall through
  151. case SEQ_BLOCK_INIT: {
  152. // Checking memusage and doing the initialization needs
  153. // its own sequence point because we need to be able to
  154. // retry if we return LZMA_MEMLIMIT_ERROR.
  155. // Version 1 is needed to support the .ignore_check option.
  156. coder->block_options.version = 1;
  157. // Set up a buffer to hold the filter chain. Block Header
  158. // decoder will initialize all members of this array so
  159. // we don't need to do it here.
  160. lzma_filter filters[LZMA_FILTERS_MAX + 1];
  161. coder->block_options.filters = filters;
  162. // Decode the Block Header.
  163. return_if_error(lzma_block_header_decode(&coder->block_options,
  164. allocator, coder->buffer));
  165. // If LZMA_IGNORE_CHECK was used, this flag needs to be set.
  166. // It has to be set after lzma_block_header_decode() because
  167. // it always resets this to false.
  168. coder->block_options.ignore_check = coder->ignore_check;
  169. // Check the memory usage limit.
  170. const uint64_t memusage = lzma_raw_decoder_memusage(filters);
  171. lzma_ret ret;
  172. if (memusage == UINT64_MAX) {
  173. // One or more unknown Filter IDs.
  174. ret = LZMA_OPTIONS_ERROR;
  175. } else {
  176. // Now we can set coder->memusage since we know that
  177. // the filter chain is valid. We don't want
  178. // lzma_memusage() to return UINT64_MAX in case of
  179. // invalid filter chain.
  180. coder->memusage = memusage;
  181. if (memusage > coder->memlimit) {
  182. // The chain would need too much memory.
  183. ret = LZMA_MEMLIMIT_ERROR;
  184. } else {
  185. // Memory usage is OK.
  186. // Initialize the Block decoder.
  187. ret = lzma_block_decoder_init(
  188. &coder->block_decoder,
  189. allocator,
  190. &coder->block_options);
  191. }
  192. }
  193. // Free the allocated filter options since they are needed
  194. // only to initialize the Block decoder.
  195. lzma_filters_free(filters, allocator);
  196. coder->block_options.filters = NULL;
  197. // Check if memory usage calculation and Block decoder
  198. // initialization succeeded.
  199. if (ret != LZMA_OK)
  200. return ret;
  201. coder->sequence = SEQ_BLOCK_RUN;
  202. }
  203. // Fall through
  204. case SEQ_BLOCK_RUN: {
  205. const lzma_ret ret = coder->block_decoder.code(
  206. coder->block_decoder.coder, allocator,
  207. in, in_pos, in_size, out, out_pos, out_size,
  208. action);
  209. if (ret != LZMA_STREAM_END)
  210. return ret;
  211. // Block decoded successfully. Add the new size pair to
  212. // the Index hash.
  213. return_if_error(lzma_index_hash_append(coder->index_hash,
  214. lzma_block_unpadded_size(
  215. &coder->block_options),
  216. coder->block_options.uncompressed_size));
  217. coder->sequence = SEQ_BLOCK_HEADER;
  218. break;
  219. }
  220. case SEQ_INDEX: {
  221. // If we don't have any input, don't call
  222. // lzma_index_hash_decode() since it would return
  223. // LZMA_BUF_ERROR, which we must not do here.
  224. if (*in_pos >= in_size)
  225. return LZMA_OK;
  226. // Decode the Index and compare it to the hash calculated
  227. // from the sizes of the Blocks (if any).
  228. const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
  229. in, in_pos, in_size);
  230. if (ret != LZMA_STREAM_END)
  231. return ret;
  232. coder->sequence = SEQ_STREAM_FOOTER;
  233. }
  234. // Fall through
  235. case SEQ_STREAM_FOOTER: {
  236. // Copy the Stream Footer to the internal buffer.
  237. lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
  238. LZMA_STREAM_HEADER_SIZE);
  239. // Return if we didn't get the whole Stream Footer yet.
  240. if (coder->pos < LZMA_STREAM_HEADER_SIZE)
  241. return LZMA_OK;
  242. coder->pos = 0;
  243. // Decode the Stream Footer. The decoder gives
  244. // LZMA_FORMAT_ERROR if the magic bytes don't match,
  245. // so convert that return code to LZMA_DATA_ERROR.
  246. lzma_stream_flags footer_flags;
  247. const lzma_ret ret = lzma_stream_footer_decode(
  248. &footer_flags, coder->buffer);
  249. if (ret != LZMA_OK)
  250. return ret == LZMA_FORMAT_ERROR
  251. ? LZMA_DATA_ERROR : ret;
  252. // Check that Index Size stored in the Stream Footer matches
  253. // the real size of the Index field.
  254. if (lzma_index_hash_size(coder->index_hash)
  255. != footer_flags.backward_size)
  256. return LZMA_DATA_ERROR;
  257. // Compare that the Stream Flags fields are identical in
  258. // both Stream Header and Stream Footer.
  259. return_if_error(lzma_stream_flags_compare(
  260. &coder->stream_flags, &footer_flags));
  261. if (!coder->concatenated)
  262. return LZMA_STREAM_END;
  263. coder->sequence = SEQ_STREAM_PADDING;
  264. }
  265. // Fall through
  266. case SEQ_STREAM_PADDING:
  267. assert(coder->concatenated);
  268. // Skip over possible Stream Padding.
  269. while (true) {
  270. if (*in_pos >= in_size) {
  271. // Unless LZMA_FINISH was used, we cannot
  272. // know if there's more input coming later.
  273. if (action != LZMA_FINISH)
  274. return LZMA_OK;
  275. // Stream Padding must be a multiple of
  276. // four bytes.
  277. return coder->pos == 0
  278. ? LZMA_STREAM_END
  279. : LZMA_DATA_ERROR;
  280. }
  281. // If the byte is not zero, it probably indicates
  282. // beginning of a new Stream (or the file is corrupt).
  283. if (in[*in_pos] != 0x00)
  284. break;
  285. ++*in_pos;
  286. coder->pos = (coder->pos + 1) & 3;
  287. }
  288. // Stream Padding must be a multiple of four bytes (empty
  289. // Stream Padding is OK).
  290. if (coder->pos != 0) {
  291. ++*in_pos;
  292. return LZMA_DATA_ERROR;
  293. }
  294. // Prepare to decode the next Stream.
  295. return_if_error(stream_decoder_reset(coder, allocator));
  296. break;
  297. default:
  298. assert(0);
  299. return LZMA_PROG_ERROR;
  300. }
  301. // Never reached
  302. }
  303. static void
  304. stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
  305. {
  306. lzma_stream_coder *coder = coder_ptr;
  307. lzma_next_end(&coder->block_decoder, allocator);
  308. lzma_index_hash_end(coder->index_hash, allocator);
  309. lzma_free(coder, allocator);
  310. return;
  311. }
  312. static lzma_check
  313. stream_decoder_get_check(const void *coder_ptr)
  314. {
  315. const lzma_stream_coder *coder = coder_ptr;
  316. return coder->stream_flags.check;
  317. }
  318. static lzma_ret
  319. stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
  320. uint64_t *old_memlimit, uint64_t new_memlimit)
  321. {
  322. lzma_stream_coder *coder = coder_ptr;
  323. *memusage = coder->memusage;
  324. *old_memlimit = coder->memlimit;
  325. if (new_memlimit != 0) {
  326. if (new_memlimit < coder->memusage)
  327. return LZMA_MEMLIMIT_ERROR;
  328. coder->memlimit = new_memlimit;
  329. }
  330. return LZMA_OK;
  331. }
  332. extern lzma_ret
  333. lzma_stream_decoder_init(
  334. lzma_next_coder *next, const lzma_allocator *allocator,
  335. uint64_t memlimit, uint32_t flags)
  336. {
  337. lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
  338. if (flags & ~LZMA_SUPPORTED_FLAGS)
  339. return LZMA_OPTIONS_ERROR;
  340. lzma_stream_coder *coder = next->coder;
  341. if (coder == NULL) {
  342. coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
  343. if (coder == NULL)
  344. return LZMA_MEM_ERROR;
  345. next->coder = coder;
  346. next->code = &stream_decode;
  347. next->end = &stream_decoder_end;
  348. next->get_check = &stream_decoder_get_check;
  349. next->memconfig = &stream_decoder_memconfig;
  350. coder->block_decoder = LZMA_NEXT_CODER_INIT;
  351. coder->index_hash = NULL;
  352. }
  353. coder->memlimit = my_max(1, memlimit);
  354. coder->memusage = LZMA_MEMUSAGE_BASE;
  355. coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
  356. coder->tell_unsupported_check
  357. = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
  358. coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
  359. coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
  360. coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
  361. coder->first_stream = true;
  362. return stream_decoder_reset(coder, allocator);
  363. }
  364. extern LZMA_API(lzma_ret)
  365. lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
  366. {
  367. lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
  368. strm->internal->supported_actions[LZMA_RUN] = true;
  369. strm->internal->supported_actions[LZMA_FINISH] = true;
  370. return LZMA_OK;
  371. }