123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- // SPDX-License-Identifier: 0BSD
- ///////////////////////////////////////////////////////////////////////////////
- //
- /// \file lz_decoder.h
- /// \brief LZ out window
- ///
- // Authors: Igor Pavlov
- // Lasse Collin
- //
- ///////////////////////////////////////////////////////////////////////////////
- #ifndef LZMA_LZ_DECODER_H
- #define LZMA_LZ_DECODER_H
- #include "common.h"
- /// Maximum length of a match rounded up to a nice power of 2 which is
- /// a good size for aligned memcpy(). The allocated dictionary buffer will
- /// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
- ///
- /// (1) Every time the decoder reaches the end of the dictionary buffer,
- /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
- /// This way dict_repeat() will only need to copy from one place,
- /// never from both the end and beginning of the buffer.
- ///
- /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
- /// the oldest byte still in the dictionary and the current write
- /// position. This way dict_repeat(dict, dict->size - 1, &len)
- /// won't need memmove() as the copying cannot overlap.
- ///
- /// Note that memcpy() still cannot be used if distance < len.
- ///
- /// LZMA's longest match length is 273 so pick a multiple of 16 above that.
- #define LZ_DICT_REPEAT_MAX 288
- typedef struct {
- /// Pointer to the dictionary buffer.
- uint8_t *buf;
- /// Write position in dictionary. The next byte will be written to
- /// buf[pos].
- size_t pos;
- /// Indicates how full the dictionary is. This is used by
- /// dict_is_distance_valid() to detect corrupt files that would
- /// read beyond the beginning of the dictionary.
- size_t full;
- /// Write limit
- size_t limit;
- /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
- /// larger than the actual dictionary size. This is enforced by
- /// how the value for "full" is set; it can be at most
- /// "size - 2 * LZ_DICT_REPEAT_MAX".
- size_t size;
- /// True once the dictionary has become full and the writing position
- /// has been wrapped in decode_buffer() in lz_decoder.c.
- bool has_wrapped;
- /// True when dictionary should be reset before decoding more data.
- bool need_reset;
- } lzma_dict;
- typedef struct {
- size_t dict_size;
- const uint8_t *preset_dict;
- size_t preset_dict_size;
- } lzma_lz_options;
- typedef struct {
- /// Data specific to the LZ-based decoder
- void *coder;
- /// Function to decode from in[] to *dict
- lzma_ret (*code)(void *coder,
- lzma_dict *restrict dict, const uint8_t *restrict in,
- size_t *restrict in_pos, size_t in_size);
- void (*reset)(void *coder, const void *options);
- /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
- /// then allow_eopm will always be true.
- void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
- bool allow_eopm);
- /// Free allocated resources
- void (*end)(void *coder, const lzma_allocator *allocator);
- } lzma_lz_decoder;
- #define LZMA_LZ_DECODER_INIT \
- (lzma_lz_decoder){ \
- .coder = NULL, \
- .code = NULL, \
- .reset = NULL, \
- .set_uncompressed = NULL, \
- .end = NULL, \
- }
- extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
- const lzma_allocator *allocator,
- const lzma_filter_info *filters,
- lzma_ret (*lz_init)(lzma_lz_decoder *lz,
- const lzma_allocator *allocator,
- lzma_vli id, const void *options,
- lzma_lz_options *lz_options));
- extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
- //////////////////////
- // Inline functions //
- //////////////////////
- /// Get a byte from the history buffer.
- static inline uint8_t
- dict_get(const lzma_dict *const dict, const uint32_t distance)
- {
- return dict->buf[dict->pos - distance - 1
- + (distance < dict->pos
- ? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
- }
- /// Optimized version of dict_get(dict, 0)
- static inline uint8_t
- dict_get0(const lzma_dict *const dict)
- {
- return dict->buf[dict->pos - 1];
- }
- /// Test if dictionary is empty.
- static inline bool
- dict_is_empty(const lzma_dict *const dict)
- {
- return dict->full == 0;
- }
- /// Validate the match distance
- static inline bool
- dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
- {
- return dict->full > distance;
- }
- /// Repeat *len bytes at distance.
- static inline bool
- dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
- {
- // Don't write past the end of the dictionary.
- const size_t dict_avail = dict->limit - dict->pos;
- uint32_t left = my_min(dict_avail, *len);
- *len -= left;
- size_t back = dict->pos - distance - 1;
- if (distance >= dict->pos)
- back += dict->size - LZ_DICT_REPEAT_MAX;
- // Repeat a block of data from the history. Because memcpy() is faster
- // than copying byte by byte in a loop, the copying process gets split
- // into two cases.
- if (distance < left) {
- // Source and target areas overlap, thus we can't use
- // memcpy() nor even memmove() safely.
- do {
- dict->buf[dict->pos++] = dict->buf[back++];
- } while (--left > 0);
- } else {
- memcpy(dict->buf + dict->pos, dict->buf + back, left);
- dict->pos += left;
- }
- // Update how full the dictionary is.
- if (!dict->has_wrapped)
- dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
- return *len != 0;
- }
- static inline void
- dict_put(lzma_dict *dict, uint8_t byte)
- {
- dict->buf[dict->pos++] = byte;
- if (!dict->has_wrapped)
- dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
- }
- /// Puts one byte into the dictionary. Returns true if the dictionary was
- /// already full and the byte couldn't be added.
- static inline bool
- dict_put_safe(lzma_dict *dict, uint8_t byte)
- {
- if (unlikely(dict->pos == dict->limit))
- return true;
- dict_put(dict, byte);
- return false;
- }
- /// Copies arbitrary amount of data into the dictionary.
- static inline void
- dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
- size_t *restrict in_pos, size_t in_size,
- size_t *restrict left)
- {
- // NOTE: If we are being given more data than the size of the
- // dictionary, it could be possible to optimize the LZ decoder
- // so that not everything needs to go through the dictionary.
- // This shouldn't be very common thing in practice though, and
- // the slowdown of one extra memcpy() isn't bad compared to how
- // much time it would have taken if the data were compressed.
- if (in_size - *in_pos > *left)
- in_size = *in_pos + *left;
- *left -= lzma_bufcpy(in, in_pos, in_size,
- dict->buf, &dict->pos, dict->limit);
- if (!dict->has_wrapped)
- dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
- return;
- }
- static inline void
- dict_reset(lzma_dict *dict)
- {
- dict->need_reset = true;
- return;
- }
- #endif
|