123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- // SPDX-License-Identifier: 0BSD
- ///////////////////////////////////////////////////////////////////////////////
- //
- /// \file index_decoder.c
- /// \brief Decodes the Index field
- //
- // Author: Lasse Collin
- //
- ///////////////////////////////////////////////////////////////////////////////
- #include "index_decoder.h"
- #include "check.h"
- typedef struct {
- enum {
- SEQ_INDICATOR,
- SEQ_COUNT,
- SEQ_MEMUSAGE,
- SEQ_UNPADDED,
- SEQ_UNCOMPRESSED,
- SEQ_PADDING_INIT,
- SEQ_PADDING,
- SEQ_CRC32,
- } sequence;
- /// Memory usage limit
- uint64_t memlimit;
- /// Target Index
- lzma_index *index;
- /// Pointer give by the application, which is set after
- /// successful decoding.
- lzma_index **index_ptr;
- /// Number of Records left to decode.
- lzma_vli count;
- /// The most recent Unpadded Size field
- lzma_vli unpadded_size;
- /// The most recent Uncompressed Size field
- lzma_vli uncompressed_size;
- /// Position in integers
- size_t pos;
- /// CRC32 of the List of Records field
- uint32_t crc32;
- } lzma_index_coder;
- static lzma_ret
- index_decode(void *coder_ptr, const lzma_allocator *allocator,
- const uint8_t *restrict in, size_t *restrict in_pos,
- size_t in_size,
- uint8_t *restrict out lzma_attribute((__unused__)),
- size_t *restrict out_pos lzma_attribute((__unused__)),
- size_t out_size lzma_attribute((__unused__)),
- lzma_action action lzma_attribute((__unused__)))
- {
- lzma_index_coder *coder = coder_ptr;
- // Similar optimization as in index_encoder.c
- const size_t in_start = *in_pos;
- lzma_ret ret = LZMA_OK;
- while (*in_pos < in_size)
- switch (coder->sequence) {
- case SEQ_INDICATOR:
- // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
- // LZMA_FORMAT_ERROR, because a typical usage case for Index
- // decoder is when parsing the Stream backwards. If seeking
- // backward from the Stream Footer gives us something that
- // doesn't begin with Index Indicator, the file is considered
- // corrupt, not "programming error" or "unrecognized file
- // format". One could argue that the application should
- // verify the Index Indicator before trying to decode the
- // Index, but well, I suppose it is simpler this way.
- if (in[(*in_pos)++] != INDEX_INDICATOR)
- return LZMA_DATA_ERROR;
- coder->sequence = SEQ_COUNT;
- break;
- case SEQ_COUNT:
- ret = lzma_vli_decode(&coder->count, &coder->pos,
- in, in_pos, in_size);
- if (ret != LZMA_STREAM_END)
- goto out;
- coder->pos = 0;
- coder->sequence = SEQ_MEMUSAGE;
- // Fall through
- case SEQ_MEMUSAGE:
- if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
- ret = LZMA_MEMLIMIT_ERROR;
- goto out;
- }
- // Tell the Index handling code how many Records this
- // Index has to allow it to allocate memory more efficiently.
- lzma_index_prealloc(coder->index, coder->count);
- ret = LZMA_OK;
- coder->sequence = coder->count == 0
- ? SEQ_PADDING_INIT : SEQ_UNPADDED;
- break;
- case SEQ_UNPADDED:
- case SEQ_UNCOMPRESSED: {
- lzma_vli *size = coder->sequence == SEQ_UNPADDED
- ? &coder->unpadded_size
- : &coder->uncompressed_size;
- ret = lzma_vli_decode(size, &coder->pos,
- in, in_pos, in_size);
- if (ret != LZMA_STREAM_END)
- goto out;
- ret = LZMA_OK;
- coder->pos = 0;
- if (coder->sequence == SEQ_UNPADDED) {
- // Validate that encoded Unpadded Size isn't too small
- // or too big.
- if (coder->unpadded_size < UNPADDED_SIZE_MIN
- || coder->unpadded_size
- > UNPADDED_SIZE_MAX)
- return LZMA_DATA_ERROR;
- coder->sequence = SEQ_UNCOMPRESSED;
- } else {
- // Add the decoded Record to the Index.
- return_if_error(lzma_index_append(
- coder->index, allocator,
- coder->unpadded_size,
- coder->uncompressed_size));
- // Check if this was the last Record.
- coder->sequence = --coder->count == 0
- ? SEQ_PADDING_INIT
- : SEQ_UNPADDED;
- }
- break;
- }
- case SEQ_PADDING_INIT:
- coder->pos = lzma_index_padding_size(coder->index);
- coder->sequence = SEQ_PADDING;
- // Fall through
- case SEQ_PADDING:
- if (coder->pos > 0) {
- --coder->pos;
- if (in[(*in_pos)++] != 0x00)
- return LZMA_DATA_ERROR;
- break;
- }
- // Finish the CRC32 calculation.
- coder->crc32 = lzma_crc32(in + in_start,
- *in_pos - in_start, coder->crc32);
- coder->sequence = SEQ_CRC32;
- // Fall through
- case SEQ_CRC32:
- do {
- if (*in_pos == in_size)
- return LZMA_OK;
- if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
- != in[(*in_pos)++]) {
- #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
- return LZMA_DATA_ERROR;
- #endif
- }
- } while (++coder->pos < 4);
- // Decoding was successful, now we can let the application
- // see the decoded Index.
- *coder->index_ptr = coder->index;
- // Make index NULL so we don't free it unintentionally.
- coder->index = NULL;
- return LZMA_STREAM_END;
- default:
- assert(0);
- return LZMA_PROG_ERROR;
- }
- out:
- // Update the CRC32.
- //
- // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
- // In such a case we had no input and thus in_used == 0.
- {
- const size_t in_used = *in_pos - in_start;
- if (in_used > 0)
- coder->crc32 = lzma_crc32(in + in_start,
- in_used, coder->crc32);
- }
- return ret;
- }
- static void
- index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
- {
- lzma_index_coder *coder = coder_ptr;
- lzma_index_end(coder->index, allocator);
- lzma_free(coder, allocator);
- return;
- }
- static lzma_ret
- index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
- uint64_t *old_memlimit, uint64_t new_memlimit)
- {
- lzma_index_coder *coder = coder_ptr;
- *memusage = lzma_index_memusage(1, coder->count);
- *old_memlimit = coder->memlimit;
- if (new_memlimit != 0) {
- if (new_memlimit < *memusage)
- return LZMA_MEMLIMIT_ERROR;
- coder->memlimit = new_memlimit;
- }
- return LZMA_OK;
- }
- static lzma_ret
- index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
- lzma_index **i, uint64_t memlimit)
- {
- // Remember the pointer given by the application. We will set it
- // to point to the decoded Index only if decoding is successful.
- // Before that, keep it NULL so that applications can always safely
- // pass it to lzma_index_end() no matter did decoding succeed or not.
- coder->index_ptr = i;
- *i = NULL;
- // We always allocate a new lzma_index.
- coder->index = lzma_index_init(allocator);
- if (coder->index == NULL)
- return LZMA_MEM_ERROR;
- // Initialize the rest.
- coder->sequence = SEQ_INDICATOR;
- coder->memlimit = my_max(1, memlimit);
- coder->count = 0; // Needs to be initialized due to _memconfig().
- coder->pos = 0;
- coder->crc32 = 0;
- return LZMA_OK;
- }
- extern lzma_ret
- lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
- lzma_index **i, uint64_t memlimit)
- {
- lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
- if (i == NULL)
- return LZMA_PROG_ERROR;
- lzma_index_coder *coder = next->coder;
- if (coder == NULL) {
- coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
- if (coder == NULL)
- return LZMA_MEM_ERROR;
- next->coder = coder;
- next->code = &index_decode;
- next->end = &index_decoder_end;
- next->memconfig = &index_decoder_memconfig;
- coder->index = NULL;
- } else {
- lzma_index_end(coder->index, allocator);
- }
- return index_decoder_reset(coder, allocator, i, memlimit);
- }
- extern LZMA_API(lzma_ret)
- lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
- {
- // If i isn't NULL, *i must always be initialized due to
- // the wording in the API docs. This way it is initialized
- // if we return LZMA_PROG_ERROR due to strm == NULL.
- if (i != NULL)
- *i = NULL;
- lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
- strm->internal->supported_actions[LZMA_RUN] = true;
- strm->internal->supported_actions[LZMA_FINISH] = true;
- return LZMA_OK;
- }
- extern LZMA_API(lzma_ret)
- lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
- const lzma_allocator *allocator,
- const uint8_t *in, size_t *in_pos, size_t in_size)
- {
- // If i isn't NULL, *i must always be initialized due to
- // the wording in the API docs.
- if (i != NULL)
- *i = NULL;
- // Sanity checks
- if (i == NULL || memlimit == NULL
- || in == NULL || in_pos == NULL || *in_pos > in_size)
- return LZMA_PROG_ERROR;
- // Initialize the decoder.
- lzma_index_coder coder;
- return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
- // Store the input start position so that we can restore it in case
- // of an error.
- const size_t in_start = *in_pos;
- // Do the actual decoding.
- lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
- NULL, NULL, 0, LZMA_RUN);
- if (ret == LZMA_STREAM_END) {
- ret = LZMA_OK;
- } else {
- // Something went wrong, free the Index structure and restore
- // the input position.
- lzma_index_end(coder.index, allocator);
- *in_pos = in_start;
- if (ret == LZMA_OK) {
- // The input is truncated or otherwise corrupt.
- // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
- // like lzma_vli_decode() does in single-call mode.
- ret = LZMA_DATA_ERROR;
- } else if (ret == LZMA_MEMLIMIT_ERROR) {
- // Tell the caller how much memory would have
- // been needed.
- *memlimit = lzma_index_memusage(1, coder.count);
- }
- }
- return ret;
- }
|