lz_decoder.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. // SPDX-License-Identifier: 0BSD
  2. ///////////////////////////////////////////////////////////////////////////////
  3. //
  4. /// \file lz_decoder.h
  5. /// \brief LZ out window
  6. ///
  7. // Authors: Igor Pavlov
  8. // Lasse Collin
  9. //
  10. ///////////////////////////////////////////////////////////////////////////////
  11. #ifndef LZMA_LZ_DECODER_H
  12. #define LZMA_LZ_DECODER_H
  13. #include "common.h"
  14. /// Maximum length of a match rounded up to a nice power of 2 which is
  15. /// a good size for aligned memcpy(). The allocated dictionary buffer will
  16. /// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
  17. ///
  18. /// (1) Every time the decoder reaches the end of the dictionary buffer,
  19. /// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
  20. /// This way dict_repeat() will only need to copy from one place,
  21. /// never from both the end and beginning of the buffer.
  22. ///
  23. /// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
  24. /// the oldest byte still in the dictionary and the current write
  25. /// position. This way dict_repeat(dict, dict->size - 1, &len)
  26. /// won't need memmove() as the copying cannot overlap.
  27. ///
  28. /// Note that memcpy() still cannot be used if distance < len.
  29. ///
  30. /// LZMA's longest match length is 273 so pick a multiple of 16 above that.
  31. #define LZ_DICT_REPEAT_MAX 288
  32. typedef struct {
  33. /// Pointer to the dictionary buffer.
  34. uint8_t *buf;
  35. /// Write position in dictionary. The next byte will be written to
  36. /// buf[pos].
  37. size_t pos;
  38. /// Indicates how full the dictionary is. This is used by
  39. /// dict_is_distance_valid() to detect corrupt files that would
  40. /// read beyond the beginning of the dictionary.
  41. size_t full;
  42. /// Write limit
  43. size_t limit;
  44. /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
  45. /// larger than the actual dictionary size. This is enforced by
  46. /// how the value for "full" is set; it can be at most
  47. /// "size - 2 * LZ_DICT_REPEAT_MAX".
  48. size_t size;
  49. /// True once the dictionary has become full and the writing position
  50. /// has been wrapped in decode_buffer() in lz_decoder.c.
  51. bool has_wrapped;
  52. /// True when dictionary should be reset before decoding more data.
  53. bool need_reset;
  54. } lzma_dict;
  55. typedef struct {
  56. size_t dict_size;
  57. const uint8_t *preset_dict;
  58. size_t preset_dict_size;
  59. } lzma_lz_options;
  60. typedef struct {
  61. /// Data specific to the LZ-based decoder
  62. void *coder;
  63. /// Function to decode from in[] to *dict
  64. lzma_ret (*code)(void *coder,
  65. lzma_dict *restrict dict, const uint8_t *restrict in,
  66. size_t *restrict in_pos, size_t in_size);
  67. void (*reset)(void *coder, const void *options);
  68. /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
  69. /// then allow_eopm will always be true.
  70. void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
  71. bool allow_eopm);
  72. /// Free allocated resources
  73. void (*end)(void *coder, const lzma_allocator *allocator);
  74. } lzma_lz_decoder;
  75. #define LZMA_LZ_DECODER_INIT \
  76. (lzma_lz_decoder){ \
  77. .coder = NULL, \
  78. .code = NULL, \
  79. .reset = NULL, \
  80. .set_uncompressed = NULL, \
  81. .end = NULL, \
  82. }
  83. extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
  84. const lzma_allocator *allocator,
  85. const lzma_filter_info *filters,
  86. lzma_ret (*lz_init)(lzma_lz_decoder *lz,
  87. const lzma_allocator *allocator,
  88. lzma_vli id, const void *options,
  89. lzma_lz_options *lz_options));
  90. extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
  91. //////////////////////
  92. // Inline functions //
  93. //////////////////////
  94. /// Get a byte from the history buffer.
  95. static inline uint8_t
  96. dict_get(const lzma_dict *const dict, const uint32_t distance)
  97. {
  98. return dict->buf[dict->pos - distance - 1
  99. + (distance < dict->pos
  100. ? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
  101. }
  102. /// Optimized version of dict_get(dict, 0)
  103. static inline uint8_t
  104. dict_get0(const lzma_dict *const dict)
  105. {
  106. return dict->buf[dict->pos - 1];
  107. }
  108. /// Test if dictionary is empty.
  109. static inline bool
  110. dict_is_empty(const lzma_dict *const dict)
  111. {
  112. return dict->full == 0;
  113. }
  114. /// Validate the match distance
  115. static inline bool
  116. dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
  117. {
  118. return dict->full > distance;
  119. }
  120. /// Repeat *len bytes at distance.
  121. static inline bool
  122. dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
  123. {
  124. // Don't write past the end of the dictionary.
  125. const size_t dict_avail = dict->limit - dict->pos;
  126. uint32_t left = my_min(dict_avail, *len);
  127. *len -= left;
  128. size_t back = dict->pos - distance - 1;
  129. if (distance >= dict->pos)
  130. back += dict->size - LZ_DICT_REPEAT_MAX;
  131. // Repeat a block of data from the history. Because memcpy() is faster
  132. // than copying byte by byte in a loop, the copying process gets split
  133. // into two cases.
  134. if (distance < left) {
  135. // Source and target areas overlap, thus we can't use
  136. // memcpy() nor even memmove() safely.
  137. do {
  138. dict->buf[dict->pos++] = dict->buf[back++];
  139. } while (--left > 0);
  140. } else {
  141. memcpy(dict->buf + dict->pos, dict->buf + back, left);
  142. dict->pos += left;
  143. }
  144. // Update how full the dictionary is.
  145. if (!dict->has_wrapped)
  146. dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
  147. return *len != 0;
  148. }
  149. static inline void
  150. dict_put(lzma_dict *dict, uint8_t byte)
  151. {
  152. dict->buf[dict->pos++] = byte;
  153. if (!dict->has_wrapped)
  154. dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
  155. }
  156. /// Puts one byte into the dictionary. Returns true if the dictionary was
  157. /// already full and the byte couldn't be added.
  158. static inline bool
  159. dict_put_safe(lzma_dict *dict, uint8_t byte)
  160. {
  161. if (unlikely(dict->pos == dict->limit))
  162. return true;
  163. dict_put(dict, byte);
  164. return false;
  165. }
  166. /// Copies arbitrary amount of data into the dictionary.
  167. static inline void
  168. dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
  169. size_t *restrict in_pos, size_t in_size,
  170. size_t *restrict left)
  171. {
  172. // NOTE: If we are being given more data than the size of the
  173. // dictionary, it could be possible to optimize the LZ decoder
  174. // so that not everything needs to go through the dictionary.
  175. // This shouldn't be very common thing in practice though, and
  176. // the slowdown of one extra memcpy() isn't bad compared to how
  177. // much time it would have taken if the data were compressed.
  178. if (in_size - *in_pos > *left)
  179. in_size = *in_pos + *left;
  180. *left -= lzma_bufcpy(in, in_pos, in_size,
  181. dict->buf, &dict->pos, dict->limit);
  182. if (!dict->has_wrapped)
  183. dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
  184. return;
  185. }
  186. static inline void
  187. dict_reset(lzma_dict *dict)
  188. {
  189. dict->need_reset = true;
  190. return;
  191. }
  192. #endif