file_info.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file file_info.c
  4. /// \brief Decode .xz file information into a lzma_index structure
  5. //
  6. // Author: Lasse Collin
  7. //
  8. // This file has been put into the public domain.
  9. // You can do whatever you want with this file.
  10. //
  11. ///////////////////////////////////////////////////////////////////////////////
  12. #include "index_decoder.h"
  13. typedef struct {
  14. enum {
  15. SEQ_MAGIC_BYTES,
  16. SEQ_PADDING_SEEK,
  17. SEQ_PADDING_DECODE,
  18. SEQ_FOOTER,
  19. SEQ_INDEX_INIT,
  20. SEQ_INDEX_DECODE,
  21. SEQ_HEADER_DECODE,
  22. SEQ_HEADER_COMPARE,
  23. } sequence;
  24. /// Absolute position of in[*in_pos] in the file. All code that
  25. /// modifies *in_pos also updates this. seek_to_pos() needs this
  26. /// to determine if we need to request the application to seek for
  27. /// us or if we can do the seeking internally by adjusting *in_pos.
  28. uint64_t file_cur_pos;
  29. /// This refers to absolute positions of interesting parts of the
  30. /// input file. Sometimes it points to the *beginning* of a specific
  31. /// field and sometimes to the *end* of a field. The current target
  32. /// position at each moment is explained in the comments.
  33. uint64_t file_target_pos;
  34. /// Size of the .xz file (from the application).
  35. uint64_t file_size;
  36. /// Index decoder
  37. lzma_next_coder index_decoder;
  38. /// Number of bytes remaining in the Index field that is currently
  39. /// being decoded.
  40. lzma_vli index_remaining;
  41. /// The Index decoder will store the decoded Index in this pointer.
  42. lzma_index *this_index;
  43. /// Amount of Stream Padding in the current Stream.
  44. lzma_vli stream_padding;
  45. /// The final combined index is collected here.
  46. lzma_index *combined_index;
  47. /// Pointer from the application where to store the index information
  48. /// after successful decoding.
  49. lzma_index **dest_index;
  50. /// Pointer to lzma_stream.seek_pos to be used when returning
  51. /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
  52. uint64_t *external_seek_pos;
  53. /// Memory usage limit
  54. uint64_t memlimit;
  55. /// Stream Flags from the very beginning of the file.
  56. lzma_stream_flags first_header_flags;
  57. /// Stream Flags from Stream Header of the current Stream.
  58. lzma_stream_flags header_flags;
  59. /// Stream Flags from Stream Footer of the current Stream.
  60. lzma_stream_flags footer_flags;
  61. size_t temp_pos;
  62. size_t temp_size;
  63. uint8_t temp[8192];
  64. } lzma_file_info_coder;
  65. /// Copies data from in[*in_pos] into coder->temp until
  66. /// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
  67. /// in sync with *in_pos. Returns true if more input is needed.
  68. static bool
  69. fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
  70. size_t *restrict in_pos, size_t in_size)
  71. {
  72. coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
  73. coder->temp, &coder->temp_pos, coder->temp_size);
  74. return coder->temp_pos < coder->temp_size;
  75. }
  76. /// Seeks to the absolute file position specified by target_pos.
  77. /// This tries to do the seeking by only modifying *in_pos, if possible.
  78. /// The main benefit of this is that if one passes the whole file at once
  79. /// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
  80. /// as all the seeking can be done by adjusting *in_pos in this function.
  81. ///
  82. /// Returns true if an external seek is needed and the caller must return
  83. /// LZMA_SEEK_NEEDED.
  84. static bool
  85. seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
  86. size_t in_start, size_t *in_pos, size_t in_size)
  87. {
  88. // The input buffer doesn't extend beyond the end of the file.
  89. // This has been checked by file_info_decode() already.
  90. assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
  91. const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
  92. const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
  93. bool external_seek_needed;
  94. if (target_pos >= pos_min && target_pos <= pos_max) {
  95. // The requested position is available in the current input
  96. // buffer or right after it. That is, in a corner case we
  97. // end up setting *in_pos == in_size and thus will immediately
  98. // need new input bytes from the application.
  99. *in_pos += (size_t)(target_pos - coder->file_cur_pos);
  100. external_seek_needed = false;
  101. } else {
  102. // Ask the application to seek the input file.
  103. *coder->external_seek_pos = target_pos;
  104. external_seek_needed = true;
  105. // Mark the whole input buffer as used. This way
  106. // lzma_stream.total_in will have a better estimate
  107. // of the amount of data read. It still won't be perfect
  108. // as the value will depend on the input buffer size that
  109. // the application uses, but it should be good enough for
  110. // those few who want an estimate.
  111. *in_pos = in_size;
  112. }
  113. // After seeking (internal or external) the current position
  114. // will match the requested target position.
  115. coder->file_cur_pos = target_pos;
  116. return external_seek_needed;
  117. }
  118. /// The caller sets coder->file_target_pos so that it points to the *end*
  119. /// of the desired file position. This function then determines how far
  120. /// backwards from that position we can seek. After seeking fill_temp()
  121. /// can be used to read data into coder->temp. When fill_temp() has finished,
  122. /// coder->temp[coder->temp_size] will match coder->file_target_pos.
  123. ///
  124. /// This also validates that coder->target_file_pos is sane in sense that
  125. /// we aren't trying to seek too far backwards (too close or beyond the
  126. /// beginning of the file).
  127. static lzma_ret
  128. reverse_seek(lzma_file_info_coder *coder,
  129. size_t in_start, size_t *in_pos, size_t in_size)
  130. {
  131. // Check that there is enough data before the target position
  132. // to contain at least Stream Header and Stream Footer. If there
  133. // isn't, the file cannot be valid.
  134. if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
  135. return LZMA_DATA_ERROR;
  136. coder->temp_pos = 0;
  137. // The Stream Header at the very beginning of the file gets handled
  138. // specially in SEQ_MAGIC_BYTES and thus we will never need to seek
  139. // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
  140. // we avoid a useless external seek after SEQ_MAGIC_BYTES if the
  141. // application uses an extremely small input buffer and the input
  142. // file is very small.
  143. if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
  144. < sizeof(coder->temp))
  145. coder->temp_size = (size_t)(coder->file_target_pos
  146. - LZMA_STREAM_HEADER_SIZE);
  147. else
  148. coder->temp_size = sizeof(coder->temp);
  149. // The above if-statements guarantee this. This is important because
  150. // the Stream Header/Footer decoders assume that there's at least
  151. // LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
  152. assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
  153. if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
  154. in_start, in_pos, in_size))
  155. return LZMA_SEEK_NEEDED;
  156. return LZMA_OK;
  157. }
  158. /// Gets the number of zero-bytes at the end of the buffer.
  159. static size_t
  160. get_padding_size(const uint8_t *buf, size_t buf_size)
  161. {
  162. size_t padding = 0;
  163. while (buf_size > 0 && buf[--buf_size] == 0x00)
  164. ++padding;
  165. return padding;
  166. }
  167. /// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
  168. /// is used to tell the application that Magic Bytes didn't match. In other
  169. /// Stream Header/Footer fields (in the middle/end of the file) it could be
  170. /// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
  171. /// is a valid Stream Header at the beginning of the file. For those cases
  172. /// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
  173. static lzma_ret
  174. hide_format_error(lzma_ret ret)
  175. {
  176. if (ret == LZMA_FORMAT_ERROR)
  177. ret = LZMA_DATA_ERROR;
  178. return ret;
  179. }
  180. /// Calls the Index decoder and updates coder->index_remaining.
  181. /// This is a separate function because the input can be either directly
  182. /// from the application or from coder->temp.
  183. static lzma_ret
  184. decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
  185. const uint8_t *restrict in, size_t *restrict in_pos,
  186. size_t in_size, bool update_file_cur_pos)
  187. {
  188. const size_t in_start = *in_pos;
  189. const lzma_ret ret = coder->index_decoder.code(
  190. coder->index_decoder.coder,
  191. allocator, in, in_pos, in_size,
  192. NULL, NULL, 0, LZMA_RUN);
  193. coder->index_remaining -= *in_pos - in_start;
  194. if (update_file_cur_pos)
  195. coder->file_cur_pos += *in_pos - in_start;
  196. return ret;
  197. }
  198. static lzma_ret
  199. file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
  200. const uint8_t *restrict in, size_t *restrict in_pos,
  201. size_t in_size,
  202. uint8_t *restrict out lzma_attribute((__unused__)),
  203. size_t *restrict out_pos lzma_attribute((__unused__)),
  204. size_t out_size lzma_attribute((__unused__)),
  205. lzma_action action lzma_attribute((__unused__)))
  206. {
  207. lzma_file_info_coder *coder = coder_ptr;
  208. const size_t in_start = *in_pos;
  209. // If the caller provides input past the end of the file, trim
  210. // the extra bytes from the buffer so that we won't read too far.
  211. assert(coder->file_size >= coder->file_cur_pos);
  212. if (coder->file_size - coder->file_cur_pos < in_size - in_start)
  213. in_size = in_start
  214. + (size_t)(coder->file_size - coder->file_cur_pos);
  215. while (true)
  216. switch (coder->sequence) {
  217. case SEQ_MAGIC_BYTES:
  218. // Decode the Stream Header at the beginning of the file
  219. // first to check if the Magic Bytes match. The flags
  220. // are stored in coder->first_header_flags so that we
  221. // don't need to seek to it again.
  222. //
  223. // Check that the file is big enough to contain at least
  224. // Stream Header.
  225. if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
  226. return LZMA_FORMAT_ERROR;
  227. // Read the Stream Header field into coder->temp.
  228. if (fill_temp(coder, in, in_pos, in_size))
  229. return LZMA_OK;
  230. // This is the only Stream Header/Footer decoding where we
  231. // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
  232. // match. Elsewhere it will be converted to LZMA_DATA_ERROR.
  233. return_if_error(lzma_stream_header_decode(
  234. &coder->first_header_flags, coder->temp));
  235. // Now that we know that the Magic Bytes match, check the
  236. // file size. It's better to do this here after checking the
  237. // Magic Bytes since this way we can give LZMA_FORMAT_ERROR
  238. // instead of LZMA_DATA_ERROR when the Magic Bytes don't
  239. // match in a file that is too big or isn't a multiple of
  240. // four bytes.
  241. if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
  242. return LZMA_DATA_ERROR;
  243. // Start looking for Stream Padding and Stream Footer
  244. // at the end of the file.
  245. coder->file_target_pos = coder->file_size;
  246. // Fall through
  247. case SEQ_PADDING_SEEK:
  248. coder->sequence = SEQ_PADDING_DECODE;
  249. return_if_error(reverse_seek(
  250. coder, in_start, in_pos, in_size));
  251. // Fall through
  252. case SEQ_PADDING_DECODE: {
  253. // Copy to coder->temp first. This keeps the code simpler if
  254. // the application only provides input a few bytes at a time.
  255. if (fill_temp(coder, in, in_pos, in_size))
  256. return LZMA_OK;
  257. // Scan the buffer backwards to get the size of the
  258. // Stream Padding field (if any).
  259. const size_t new_padding = get_padding_size(
  260. coder->temp, coder->temp_size);
  261. coder->stream_padding += new_padding;
  262. // Set the target position to the beginning of Stream Padding
  263. // that has been observed so far. If all Stream Padding has
  264. // been seen, then the target position will be at the end
  265. // of the Stream Footer field.
  266. coder->file_target_pos -= new_padding;
  267. if (new_padding == coder->temp_size) {
  268. // The whole buffer was padding. Seek backwards in
  269. // the file to get more input.
  270. coder->sequence = SEQ_PADDING_SEEK;
  271. break;
  272. }
  273. // Size of Stream Padding must be a multiple of 4 bytes.
  274. if (coder->stream_padding & 3)
  275. return LZMA_DATA_ERROR;
  276. coder->sequence = SEQ_FOOTER;
  277. // Calculate the amount of non-padding data in coder->temp.
  278. coder->temp_size -= new_padding;
  279. coder->temp_pos = coder->temp_size;
  280. // We can avoid an external seek if the whole Stream Footer
  281. // is already in coder->temp. In that case SEQ_FOOTER won't
  282. // read more input and will find the Stream Footer from
  283. // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
  284. //
  285. // Otherwise we will need to seek. The seeking is done so
  286. // that Stream Footer wil be at the end of coder->temp.
  287. // This way it's likely that we also get a complete Index
  288. // field into coder->temp without needing a separate seek
  289. // for that (unless the Index field is big).
  290. if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
  291. return_if_error(reverse_seek(
  292. coder, in_start, in_pos, in_size));
  293. }
  294. // Fall through
  295. case SEQ_FOOTER:
  296. // Copy the Stream Footer field into coder->temp.
  297. // If Stream Footer was already available in coder->temp
  298. // in SEQ_PADDING_DECODE, then this does nothing.
  299. if (fill_temp(coder, in, in_pos, in_size))
  300. return LZMA_OK;
  301. // Make coder->file_target_pos and coder->temp_size point
  302. // to the beginning of Stream Footer and thus to the end
  303. // of the Index field. coder->temp_pos will be updated
  304. // a bit later.
  305. coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
  306. coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
  307. // Decode Stream Footer.
  308. return_if_error(hide_format_error(lzma_stream_footer_decode(
  309. &coder->footer_flags,
  310. coder->temp + coder->temp_size)));
  311. // Check that we won't seek past the beginning of the file.
  312. //
  313. // LZMA_STREAM_HEADER_SIZE is added because there must be
  314. // space for Stream Header too even though we won't seek
  315. // there before decoding the Index field.
  316. //
  317. // There's no risk of integer overflow here because
  318. // Backward Size cannot be greater than 2^34.
  319. if (coder->file_target_pos < coder->footer_flags.backward_size
  320. + LZMA_STREAM_HEADER_SIZE)
  321. return LZMA_DATA_ERROR;
  322. // Set the target position to the beginning of the Index field.
  323. coder->file_target_pos -= coder->footer_flags.backward_size;
  324. coder->sequence = SEQ_INDEX_INIT;
  325. // We can avoid an external seek if the whole Index field is
  326. // already available in coder->temp.
  327. if (coder->temp_size >= coder->footer_flags.backward_size) {
  328. // Set coder->temp_pos to point to the beginning
  329. // of the Index.
  330. coder->temp_pos = coder->temp_size
  331. - coder->footer_flags.backward_size;
  332. } else {
  333. // These are set to zero to indicate that there's no
  334. // useful data (Index or anything else) in coder->temp.
  335. coder->temp_pos = 0;
  336. coder->temp_size = 0;
  337. // Seek to the beginning of the Index field.
  338. if (seek_to_pos(coder, coder->file_target_pos,
  339. in_start, in_pos, in_size))
  340. return LZMA_SEEK_NEEDED;
  341. }
  342. // Fall through
  343. case SEQ_INDEX_INIT: {
  344. // Calculate the amount of memory already used by the earlier
  345. // Indexes so that we know how big memory limit to pass to
  346. // the Index decoder.
  347. //
  348. // NOTE: When there are multiple Streams, the separate
  349. // lzma_index structures can use more RAM (as measured by
  350. // lzma_index_memused()) than the final combined lzma_index.
  351. // Thus memlimit may need to be slightly higher than the final
  352. // calculated memory usage will be. This is perhaps a bit
  353. // confusing to the application, but I think it shouldn't
  354. // cause problems in practice.
  355. uint64_t memused = 0;
  356. if (coder->combined_index != NULL) {
  357. memused = lzma_index_memused(coder->combined_index);
  358. assert(memused <= coder->memlimit);
  359. if (memused > coder->memlimit) // Extra sanity check
  360. return LZMA_PROG_ERROR;
  361. }
  362. // Initialize the Index decoder.
  363. return_if_error(lzma_index_decoder_init(
  364. &coder->index_decoder, allocator,
  365. &coder->this_index,
  366. coder->memlimit - memused));
  367. coder->index_remaining = coder->footer_flags.backward_size;
  368. coder->sequence = SEQ_INDEX_DECODE;
  369. }
  370. // Fall through
  371. case SEQ_INDEX_DECODE: {
  372. // Decode (a part of) the Index. If the whole Index is already
  373. // in coder->temp, read it from there. Otherwise read from
  374. // in[*in_pos] onwards. Note that index_decode() updates
  375. // coder->index_remaining and optionally coder->file_cur_pos.
  376. lzma_ret ret;
  377. if (coder->temp_size != 0) {
  378. assert(coder->temp_size - coder->temp_pos
  379. == coder->index_remaining);
  380. ret = decode_index(coder, allocator, coder->temp,
  381. &coder->temp_pos, coder->temp_size,
  382. false);
  383. } else {
  384. // Don't give the decoder more input than the known
  385. // remaining size of the Index field.
  386. size_t in_stop = in_size;
  387. if (in_size - *in_pos > coder->index_remaining)
  388. in_stop = *in_pos
  389. + (size_t)(coder->index_remaining);
  390. ret = decode_index(coder, allocator,
  391. in, in_pos, in_stop, true);
  392. }
  393. switch (ret) {
  394. case LZMA_OK:
  395. // If the Index docoder asks for more input when we
  396. // have already given it as much input as Backward Size
  397. // indicated, the file is invalid.
  398. if (coder->index_remaining == 0)
  399. return LZMA_DATA_ERROR;
  400. // We cannot get here if we were reading Index from
  401. // coder->temp because when reading from coder->temp
  402. // we give the Index decoder exactly
  403. // coder->index_remaining bytes of input.
  404. assert(coder->temp_size == 0);
  405. return LZMA_OK;
  406. case LZMA_STREAM_END:
  407. // If the decoding seems to be successful, check also
  408. // that the Index decoder consumed as much input as
  409. // indicated by the Backward Size field.
  410. if (coder->index_remaining != 0)
  411. return LZMA_DATA_ERROR;
  412. break;
  413. default:
  414. return ret;
  415. }
  416. // Calculate how much the Index tells us to seek backwards
  417. // (relative to the beginning of the Index): Total size of
  418. // all Blocks plus the size of the Stream Header field.
  419. // No integer overflow here because lzma_index_total_size()
  420. // cannot return a value greater than LZMA_VLI_MAX.
  421. const uint64_t seek_amount
  422. = lzma_index_total_size(coder->this_index)
  423. + LZMA_STREAM_HEADER_SIZE;
  424. // Check that Index is sane in sense that seek_amount won't
  425. // make us seek past the beginning of the file when locating
  426. // the Stream Header.
  427. //
  428. // coder->file_target_pos still points to the beginning of
  429. // the Index field.
  430. if (coder->file_target_pos < seek_amount)
  431. return LZMA_DATA_ERROR;
  432. // Set the target to the beginning of Stream Header.
  433. coder->file_target_pos -= seek_amount;
  434. if (coder->file_target_pos == 0) {
  435. // We would seek to the beginning of the file, but
  436. // since we already decoded that Stream Header in
  437. // SEQ_MAGIC_BYTES, we can use the cached value from
  438. // coder->first_header_flags to avoid the seek.
  439. coder->header_flags = coder->first_header_flags;
  440. coder->sequence = SEQ_HEADER_COMPARE;
  441. break;
  442. }
  443. coder->sequence = SEQ_HEADER_DECODE;
  444. // Make coder->file_target_pos point to the end of
  445. // the Stream Header field.
  446. coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
  447. // If coder->temp_size is non-zero, it points to the end
  448. // of the Index field. Then the beginning of the Index
  449. // field is at coder->temp[coder->temp_size
  450. // - coder->footer_flags.backward_size].
  451. assert(coder->temp_size == 0 || coder->temp_size
  452. >= coder->footer_flags.backward_size);
  453. // If coder->temp contained the whole Index, see if it has
  454. // enough data to contain also the Stream Header. If so,
  455. // we avoid an external seek.
  456. //
  457. // NOTE: This can happen only with small .xz files and only
  458. // for the non-first Stream as the Stream Flags of the first
  459. // Stream are cached and already handled a few lines above.
  460. // So this isn't as useful as the other seek-avoidance cases.
  461. if (coder->temp_size != 0 && coder->temp_size
  462. - coder->footer_flags.backward_size
  463. >= seek_amount) {
  464. // Make temp_pos and temp_size point to the *end* of
  465. // Stream Header so that SEQ_HEADER_DECODE will find
  466. // the start of Stream Header from coder->temp[
  467. // coder->temp_size - LZMA_STREAM_HEADER_SIZE].
  468. coder->temp_pos = coder->temp_size
  469. - coder->footer_flags.backward_size
  470. - seek_amount
  471. + LZMA_STREAM_HEADER_SIZE;
  472. coder->temp_size = coder->temp_pos;
  473. } else {
  474. // Seek so that Stream Header will be at the end of
  475. // coder->temp. With typical multi-Stream files we
  476. // will usually also get the Stream Footer and Index
  477. // of the *previous* Stream in coder->temp and thus
  478. // won't need a separate seek for them.
  479. return_if_error(reverse_seek(coder,
  480. in_start, in_pos, in_size));
  481. }
  482. }
  483. // Fall through
  484. case SEQ_HEADER_DECODE:
  485. // Copy the Stream Header field into coder->temp.
  486. // If Stream Header was already available in coder->temp
  487. // in SEQ_INDEX_DECODE, then this does nothing.
  488. if (fill_temp(coder, in, in_pos, in_size))
  489. return LZMA_OK;
  490. // Make all these point to the beginning of Stream Header.
  491. coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
  492. coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
  493. coder->temp_pos = coder->temp_size;
  494. // Decode the Stream Header.
  495. return_if_error(hide_format_error(lzma_stream_header_decode(
  496. &coder->header_flags,
  497. coder->temp + coder->temp_size)));
  498. coder->sequence = SEQ_HEADER_COMPARE;
  499. // Fall through
  500. case SEQ_HEADER_COMPARE:
  501. // Compare Stream Header against Stream Footer. They must
  502. // match.
  503. return_if_error(lzma_stream_flags_compare(
  504. &coder->header_flags, &coder->footer_flags));
  505. // Store the decoded Stream Flags into the Index. Use the
  506. // Footer Flags because it contains Backward Size, although
  507. // it shouldn't matter in practice.
  508. if (lzma_index_stream_flags(coder->this_index,
  509. &coder->footer_flags) != LZMA_OK)
  510. return LZMA_PROG_ERROR;
  511. // Store also the size of the Stream Padding field. It is
  512. // needed to calculate the offsets of the Streams correctly.
  513. if (lzma_index_stream_padding(coder->this_index,
  514. coder->stream_padding) != LZMA_OK)
  515. return LZMA_PROG_ERROR;
  516. // Reset it so that it's ready for the next Stream.
  517. coder->stream_padding = 0;
  518. // Append the earlier decoded Indexes after this_index.
  519. if (coder->combined_index != NULL)
  520. return_if_error(lzma_index_cat(coder->this_index,
  521. coder->combined_index, allocator));
  522. coder->combined_index = coder->this_index;
  523. coder->this_index = NULL;
  524. // If the whole file was decoded, tell the caller that we
  525. // are finished.
  526. if (coder->file_target_pos == 0) {
  527. // The combined index must indicate the same file
  528. // size as was told to us at initialization.
  529. assert(lzma_index_file_size(coder->combined_index)
  530. == coder->file_size);
  531. // Make the combined index available to
  532. // the application.
  533. *coder->dest_index = coder->combined_index;
  534. coder->combined_index = NULL;
  535. // Mark the input buffer as used since we may have
  536. // done internal seeking and thus don't know how
  537. // many input bytes were actually used. This way
  538. // lzma_stream.total_in gets a slightly better
  539. // estimate of the amount of input used.
  540. *in_pos = in_size;
  541. return LZMA_STREAM_END;
  542. }
  543. // We didn't hit the beginning of the file yet, so continue
  544. // reading backwards in the file. If we have unprocessed
  545. // data in coder->temp, use it before requesting more data
  546. // from the application.
  547. //
  548. // coder->file_target_pos, coder->temp_size, and
  549. // coder->temp_pos all point to the beginning of Stream Header
  550. // and thus the end of the previous Stream in the file.
  551. coder->sequence = coder->temp_size > 0
  552. ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
  553. break;
  554. default:
  555. assert(0);
  556. return LZMA_PROG_ERROR;
  557. }
  558. }
  559. static lzma_ret
  560. file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
  561. uint64_t *old_memlimit, uint64_t new_memlimit)
  562. {
  563. lzma_file_info_coder *coder = coder_ptr;
  564. // The memory usage calculation comes from three things:
  565. //
  566. // (1) The Indexes that have already been decoded and processed into
  567. // coder->combined_index.
  568. //
  569. // (2) The latest Index in coder->this_index that has been decoded but
  570. // not yet put into coder->combined_index.
  571. //
  572. // (3) The latest Index that we have started decoding but haven't
  573. // finished and thus isn't available in coder->this_index yet.
  574. // Memory usage and limit information needs to be communicated
  575. // from/to coder->index_decoder.
  576. //
  577. // Care has to be taken to not do both (2) and (3) when calculating
  578. // the memory usage.
  579. uint64_t combined_index_memusage = 0;
  580. uint64_t this_index_memusage = 0;
  581. // (1) If we have already successfully decoded one or more Indexes,
  582. // get their memory usage.
  583. if (coder->combined_index != NULL)
  584. combined_index_memusage = lzma_index_memused(
  585. coder->combined_index);
  586. // Choose between (2), (3), or neither.
  587. if (coder->this_index != NULL) {
  588. // (2) The latest Index is available. Use its memory usage.
  589. this_index_memusage = lzma_index_memused(coder->this_index);
  590. } else if (coder->sequence == SEQ_INDEX_DECODE) {
  591. // (3) The Index decoder is activate and hasn't yet stored
  592. // the new index in coder->this_index. Get the memory usage
  593. // information from the Index decoder.
  594. //
  595. // NOTE: If the Index decoder doesn't yet know how much memory
  596. // it will eventually need, it will return a tiny value here.
  597. uint64_t dummy;
  598. if (coder->index_decoder.memconfig(coder->index_decoder.coder,
  599. &this_index_memusage, &dummy, 0)
  600. != LZMA_OK) {
  601. assert(0);
  602. return LZMA_PROG_ERROR;
  603. }
  604. }
  605. // Now we know the total memory usage/requirement. If we had neither
  606. // old Indexes nor a new Index, this will be zero which isn't
  607. // acceptable as lzma_memusage() has to return non-zero on success
  608. // and even with an empty .xz file we will end up with a lzma_index
  609. // that takes some memory.
  610. *memusage = combined_index_memusage + this_index_memusage;
  611. if (*memusage == 0)
  612. *memusage = lzma_index_memusage(1, 0);
  613. *old_memlimit = coder->memlimit;
  614. // If requested, set a new memory usage limit.
  615. if (new_memlimit != 0) {
  616. if (new_memlimit < *memusage)
  617. return LZMA_MEMLIMIT_ERROR;
  618. // In the condition (3) we need to tell the Index decoder
  619. // its new memory usage limit.
  620. if (coder->this_index == NULL
  621. && coder->sequence == SEQ_INDEX_DECODE) {
  622. const uint64_t idec_new_memlimit = new_memlimit
  623. - combined_index_memusage;
  624. assert(this_index_memusage > 0);
  625. assert(idec_new_memlimit > 0);
  626. uint64_t dummy1;
  627. uint64_t dummy2;
  628. if (coder->index_decoder.memconfig(
  629. coder->index_decoder.coder,
  630. &dummy1, &dummy2, idec_new_memlimit)
  631. != LZMA_OK) {
  632. assert(0);
  633. return LZMA_PROG_ERROR;
  634. }
  635. }
  636. coder->memlimit = new_memlimit;
  637. }
  638. return LZMA_OK;
  639. }
  640. static void
  641. file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
  642. {
  643. lzma_file_info_coder *coder = coder_ptr;
  644. lzma_next_end(&coder->index_decoder, allocator);
  645. lzma_index_end(coder->this_index, allocator);
  646. lzma_index_end(coder->combined_index, allocator);
  647. lzma_free(coder, allocator);
  648. return;
  649. }
  650. static lzma_ret
  651. lzma_file_info_decoder_init(lzma_next_coder *next,
  652. const lzma_allocator *allocator, uint64_t *seek_pos,
  653. lzma_index **dest_index,
  654. uint64_t memlimit, uint64_t file_size)
  655. {
  656. lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
  657. if (dest_index == NULL)
  658. return LZMA_PROG_ERROR;
  659. lzma_file_info_coder *coder = next->coder;
  660. if (coder == NULL) {
  661. coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
  662. if (coder == NULL)
  663. return LZMA_MEM_ERROR;
  664. next->coder = coder;
  665. next->code = &file_info_decode;
  666. next->end = &file_info_decoder_end;
  667. next->memconfig = &file_info_decoder_memconfig;
  668. coder->index_decoder = LZMA_NEXT_CODER_INIT;
  669. coder->this_index = NULL;
  670. coder->combined_index = NULL;
  671. }
  672. coder->sequence = SEQ_MAGIC_BYTES;
  673. coder->file_cur_pos = 0;
  674. coder->file_target_pos = 0;
  675. coder->file_size = file_size;
  676. lzma_index_end(coder->this_index, allocator);
  677. coder->this_index = NULL;
  678. lzma_index_end(coder->combined_index, allocator);
  679. coder->combined_index = NULL;
  680. coder->stream_padding = 0;
  681. coder->dest_index = dest_index;
  682. coder->external_seek_pos = seek_pos;
  683. // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
  684. // won't return 0 (which would indicate an error).
  685. coder->memlimit = my_max(1, memlimit);
  686. // Prepare these for reading the first Stream Header into coder->temp.
  687. coder->temp_pos = 0;
  688. coder->temp_size = LZMA_STREAM_HEADER_SIZE;
  689. return LZMA_OK;
  690. }
  691. extern LZMA_API(lzma_ret)
  692. lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
  693. uint64_t memlimit, uint64_t file_size)
  694. {
  695. lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
  696. dest_index, memlimit, file_size);
  697. // We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
  698. // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
  699. // combination in a sane way. Applications still need to be careful
  700. // if they use LZMA_FINISH so that they remember to reset it back
  701. // to LZMA_RUN after seeking if needed.
  702. strm->internal->supported_actions[LZMA_RUN] = true;
  703. strm->internal->supported_actions[LZMA_FINISH] = true;
  704. return LZMA_OK;
  705. }