lz_decoder.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file lz_decoder.h
  4. /// \brief LZ out window
  5. ///
  6. // Authors: Igor Pavlov
  7. // Lasse Collin
  8. //
  9. // This file has been put into the public domain.
  10. // You can do whatever you want with this file.
  11. //
  12. ///////////////////////////////////////////////////////////////////////////////
  13. #ifndef LZMA_LZ_DECODER_H
  14. #define LZMA_LZ_DECODER_H
  15. #include "common.h"
  16. typedef struct {
  17. /// Pointer to the dictionary buffer. It can be an allocated buffer
  18. /// internal to liblzma, or it can a be a buffer given by the
  19. /// application when in single-call mode (not implemented yet).
  20. uint8_t *buf;
  21. /// Write position in dictionary. The next byte will be written to
  22. /// buf[pos].
  23. size_t pos;
  24. /// Indicates how full the dictionary is. This is used by
  25. /// dict_is_distance_valid() to detect corrupt files that would
  26. /// read beyond the beginning of the dictionary.
  27. size_t full;
  28. /// Write limit
  29. size_t limit;
  30. /// Size of the dictionary
  31. size_t size;
  32. /// True when dictionary should be reset before decoding more data.
  33. bool need_reset;
  34. } lzma_dict;
  35. typedef struct {
  36. size_t dict_size;
  37. const uint8_t *preset_dict;
  38. size_t preset_dict_size;
  39. } lzma_lz_options;
  40. typedef struct {
  41. /// Data specific to the LZ-based decoder
  42. void *coder;
  43. /// Function to decode from in[] to *dict
  44. lzma_ret (*code)(void *coder,
  45. lzma_dict *restrict dict, const uint8_t *restrict in,
  46. size_t *restrict in_pos, size_t in_size);
  47. void (*reset)(void *coder, const void *options);
  48. /// Set the uncompressed size. If uncompressed_size == LZMA_VLI_UNKNOWN
  49. /// then allow_eopm will always be true.
  50. void (*set_uncompressed)(void *coder, lzma_vli uncompressed_size,
  51. bool allow_eopm);
  52. /// Free allocated resources
  53. void (*end)(void *coder, const lzma_allocator *allocator);
  54. } lzma_lz_decoder;
  55. #define LZMA_LZ_DECODER_INIT \
  56. (lzma_lz_decoder){ \
  57. .coder = NULL, \
  58. .code = NULL, \
  59. .reset = NULL, \
  60. .set_uncompressed = NULL, \
  61. .end = NULL, \
  62. }
  63. extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next,
  64. const lzma_allocator *allocator,
  65. const lzma_filter_info *filters,
  66. lzma_ret (*lz_init)(lzma_lz_decoder *lz,
  67. const lzma_allocator *allocator,
  68. lzma_vli id, const void *options,
  69. lzma_lz_options *lz_options));
  70. extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size);
  71. //////////////////////
  72. // Inline functions //
  73. //////////////////////
  74. /// Get a byte from the history buffer.
  75. static inline uint8_t
  76. dict_get(const lzma_dict *const dict, const uint32_t distance)
  77. {
  78. return dict->buf[dict->pos - distance - 1
  79. + (distance < dict->pos ? 0 : dict->size)];
  80. }
  81. /// Test if dictionary is empty.
  82. static inline bool
  83. dict_is_empty(const lzma_dict *const dict)
  84. {
  85. return dict->full == 0;
  86. }
  87. /// Validate the match distance
  88. static inline bool
  89. dict_is_distance_valid(const lzma_dict *const dict, const size_t distance)
  90. {
  91. return dict->full > distance;
  92. }
  93. /// Repeat *len bytes at distance.
  94. static inline bool
  95. dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
  96. {
  97. // Don't write past the end of the dictionary.
  98. const size_t dict_avail = dict->limit - dict->pos;
  99. uint32_t left = my_min(dict_avail, *len);
  100. *len -= left;
  101. // Repeat a block of data from the history. Because memcpy() is faster
  102. // than copying byte by byte in a loop, the copying process gets split
  103. // into three cases.
  104. if (distance < left) {
  105. // Source and target areas overlap, thus we can't use
  106. // memcpy() nor even memmove() safely.
  107. do {
  108. dict->buf[dict->pos] = dict_get(dict, distance);
  109. ++dict->pos;
  110. } while (--left > 0);
  111. } else if (distance < dict->pos) {
  112. // The easiest and fastest case
  113. memcpy(dict->buf + dict->pos,
  114. dict->buf + dict->pos - distance - 1,
  115. left);
  116. dict->pos += left;
  117. } else {
  118. // The bigger the dictionary, the more rare this
  119. // case occurs. We need to "wrap" the dict, thus
  120. // we might need two memcpy() to copy all the data.
  121. assert(dict->full == dict->size);
  122. const uint32_t copy_pos
  123. = dict->pos - distance - 1 + dict->size;
  124. uint32_t copy_size = dict->size - copy_pos;
  125. if (copy_size < left) {
  126. memmove(dict->buf + dict->pos, dict->buf + copy_pos,
  127. copy_size);
  128. dict->pos += copy_size;
  129. copy_size = left - copy_size;
  130. memcpy(dict->buf + dict->pos, dict->buf, copy_size);
  131. dict->pos += copy_size;
  132. } else {
  133. memmove(dict->buf + dict->pos, dict->buf + copy_pos,
  134. left);
  135. dict->pos += left;
  136. }
  137. }
  138. // Update how full the dictionary is.
  139. if (dict->full < dict->pos)
  140. dict->full = dict->pos;
  141. return unlikely(*len != 0);
  142. }
  143. /// Puts one byte into the dictionary. Returns true if the dictionary was
  144. /// already full and the byte couldn't be added.
  145. static inline bool
  146. dict_put(lzma_dict *dict, uint8_t byte)
  147. {
  148. if (unlikely(dict->pos == dict->limit))
  149. return true;
  150. dict->buf[dict->pos++] = byte;
  151. if (dict->pos > dict->full)
  152. dict->full = dict->pos;
  153. return false;
  154. }
  155. /// Copies arbitrary amount of data into the dictionary.
  156. static inline void
  157. dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
  158. size_t *restrict in_pos, size_t in_size,
  159. size_t *restrict left)
  160. {
  161. // NOTE: If we are being given more data than the size of the
  162. // dictionary, it could be possible to optimize the LZ decoder
  163. // so that not everything needs to go through the dictionary.
  164. // This shouldn't be very common thing in practice though, and
  165. // the slowdown of one extra memcpy() isn't bad compared to how
  166. // much time it would have taken if the data were compressed.
  167. if (in_size - *in_pos > *left)
  168. in_size = *in_pos + *left;
  169. *left -= lzma_bufcpy(in, in_pos, in_size,
  170. dict->buf, &dict->pos, dict->limit);
  171. if (dict->pos > dict->full)
  172. dict->full = dict->pos;
  173. return;
  174. }
  175. static inline void
  176. dict_reset(lzma_dict *dict)
  177. {
  178. dict->need_reset = true;
  179. return;
  180. }
  181. #endif