block_buffer_encoder.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. // SPDX-License-Identifier: 0BSD
  2. ///////////////////////////////////////////////////////////////////////////////
  3. //
  4. /// \file block_buffer_encoder.c
  5. /// \brief Single-call .xz Block encoder
  6. //
  7. // Author: Lasse Collin
  8. //
  9. ///////////////////////////////////////////////////////////////////////////////
  10. #include "block_buffer_encoder.h"
  11. #include "block_encoder.h"
  12. #include "filter_encoder.h"
  13. #include "lzma2_encoder.h"
  14. #include "check.h"
  15. /// Estimate the maximum size of the Block Header and Check fields for
  16. /// a Block that uses LZMA2 uncompressed chunks. We could use
  17. /// lzma_block_header_size() but this is simpler.
  18. ///
  19. /// Block Header Size + Block Flags + Compressed Size
  20. /// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check
  21. /// and round up to the next multiple of four to take Header Padding
  22. /// into account.
  23. #define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \
  24. + LZMA_CHECK_SIZE_MAX + 3) & ~3)
  25. static uint64_t
  26. lzma2_bound(uint64_t uncompressed_size)
  27. {
  28. // Prevent integer overflow in overhead calculation.
  29. if (uncompressed_size > COMPRESSED_SIZE_MAX)
  30. return 0;
  31. // Calculate the exact overhead of the LZMA2 headers: Round
  32. // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
  33. // multiply by the size of per-chunk header, and add one byte for
  34. // the end marker.
  35. const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
  36. / LZMA2_CHUNK_MAX)
  37. * LZMA2_HEADER_UNCOMPRESSED + 1;
  38. // Catch the possible integer overflow.
  39. if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size)
  40. return 0;
  41. return uncompressed_size + overhead;
  42. }
  43. extern uint64_t
  44. lzma_block_buffer_bound64(uint64_t uncompressed_size)
  45. {
  46. // If the data doesn't compress, we always use uncompressed
  47. // LZMA2 chunks.
  48. uint64_t lzma2_size = lzma2_bound(uncompressed_size);
  49. if (lzma2_size == 0)
  50. return 0;
  51. // Take Block Padding into account.
  52. lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
  53. // No risk of integer overflow because lzma2_bound() already takes
  54. // into account the size of the headers in the Block.
  55. return HEADERS_BOUND + lzma2_size;
  56. }
  57. extern LZMA_API(size_t)
  58. lzma_block_buffer_bound(size_t uncompressed_size)
  59. {
  60. uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
  61. #if SIZE_MAX < UINT64_MAX
  62. // Catch the possible integer overflow on 32-bit systems.
  63. if (ret > SIZE_MAX)
  64. return 0;
  65. #endif
  66. return ret;
  67. }
  68. static lzma_ret
  69. block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
  70. uint8_t *out, size_t *out_pos, size_t out_size)
  71. {
  72. // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
  73. // all, but LZMA2 always requires a dictionary, so use the minimum
  74. // value to minimize memory usage of the decoder.
  75. lzma_options_lzma lzma2 = {
  76. .dict_size = LZMA_DICT_SIZE_MIN,
  77. };
  78. lzma_filter filters[2];
  79. filters[0].id = LZMA_FILTER_LZMA2;
  80. filters[0].options = &lzma2;
  81. filters[1].id = LZMA_VLI_UNKNOWN;
  82. // Set the above filter options to *block temporarily so that we can
  83. // encode the Block Header.
  84. lzma_filter *filters_orig = block->filters;
  85. block->filters = filters;
  86. if (lzma_block_header_size(block) != LZMA_OK) {
  87. block->filters = filters_orig;
  88. return LZMA_PROG_ERROR;
  89. }
  90. // Check that there's enough output space. The caller has already
  91. // set block->compressed_size to what lzma2_bound() has returned,
  92. // so we can reuse that value. We know that compressed_size is a
  93. // known valid VLI and header_size is a small value so their sum
  94. // will never overflow.
  95. assert(block->compressed_size == lzma2_bound(in_size));
  96. if (out_size - *out_pos
  97. < block->header_size + block->compressed_size) {
  98. block->filters = filters_orig;
  99. return LZMA_BUF_ERROR;
  100. }
  101. if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) {
  102. block->filters = filters_orig;
  103. return LZMA_PROG_ERROR;
  104. }
  105. block->filters = filters_orig;
  106. *out_pos += block->header_size;
  107. // Encode the data using LZMA2 uncompressed chunks.
  108. size_t in_pos = 0;
  109. uint8_t control = 0x01; // Dictionary reset
  110. while (in_pos < in_size) {
  111. // Control byte: Indicate uncompressed chunk, of which
  112. // the first resets the dictionary.
  113. out[(*out_pos)++] = control;
  114. control = 0x02; // No dictionary reset
  115. // Size of the uncompressed chunk
  116. const size_t copy_size
  117. = my_min(in_size - in_pos, LZMA2_CHUNK_MAX);
  118. out[(*out_pos)++] = (copy_size - 1) >> 8;
  119. out[(*out_pos)++] = (copy_size - 1) & 0xFF;
  120. // The actual data
  121. assert(*out_pos + copy_size <= out_size);
  122. memcpy(out + *out_pos, in + in_pos, copy_size);
  123. in_pos += copy_size;
  124. *out_pos += copy_size;
  125. }
  126. // End marker
  127. out[(*out_pos)++] = 0x00;
  128. assert(*out_pos <= out_size);
  129. return LZMA_OK;
  130. }
  131. static lzma_ret
  132. block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
  133. const uint8_t *in, size_t in_size,
  134. uint8_t *out, size_t *out_pos, size_t out_size)
  135. {
  136. // Find out the size of the Block Header.
  137. return_if_error(lzma_block_header_size(block));
  138. // Reserve space for the Block Header and skip it for now.
  139. if (out_size - *out_pos <= block->header_size)
  140. return LZMA_BUF_ERROR;
  141. const size_t out_start = *out_pos;
  142. *out_pos += block->header_size;
  143. // Limit out_size so that we stop encoding if the output would grow
  144. // bigger than what uncompressed Block would be.
  145. if (out_size - *out_pos > block->compressed_size)
  146. out_size = *out_pos + block->compressed_size;
  147. // TODO: In many common cases this could be optimized to use
  148. // significantly less memory.
  149. lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT;
  150. lzma_ret ret = lzma_raw_encoder_init(
  151. &raw_encoder, allocator, block->filters);
  152. if (ret == LZMA_OK) {
  153. size_t in_pos = 0;
  154. ret = raw_encoder.code(raw_encoder.coder, allocator,
  155. in, &in_pos, in_size, out, out_pos, out_size,
  156. LZMA_FINISH);
  157. }
  158. // NOTE: This needs to be run even if lzma_raw_encoder_init() failed.
  159. lzma_next_end(&raw_encoder, allocator);
  160. if (ret == LZMA_STREAM_END) {
  161. // Compression was successful. Write the Block Header.
  162. block->compressed_size
  163. = *out_pos - (out_start + block->header_size);
  164. ret = lzma_block_header_encode(block, out + out_start);
  165. if (ret != LZMA_OK)
  166. ret = LZMA_PROG_ERROR;
  167. } else if (ret == LZMA_OK) {
  168. // Output buffer became full.
  169. ret = LZMA_BUF_ERROR;
  170. }
  171. // Reset *out_pos if something went wrong.
  172. if (ret != LZMA_OK)
  173. *out_pos = out_start;
  174. return ret;
  175. }
  176. static lzma_ret
  177. block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
  178. const uint8_t *in, size_t in_size,
  179. uint8_t *out, size_t *out_pos, size_t out_size,
  180. bool try_to_compress)
  181. {
  182. // Validate the arguments.
  183. if (block == NULL || (in == NULL && in_size != 0) || out == NULL
  184. || out_pos == NULL || *out_pos > out_size)
  185. return LZMA_PROG_ERROR;
  186. // The contents of the structure may depend on the version so
  187. // check the version before validating the contents of *block.
  188. if (block->version > 1)
  189. return LZMA_OPTIONS_ERROR;
  190. if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
  191. || (try_to_compress && block->filters == NULL))
  192. return LZMA_PROG_ERROR;
  193. if (!lzma_check_is_supported(block->check))
  194. return LZMA_UNSUPPORTED_CHECK;
  195. // Size of a Block has to be a multiple of four, so limit the size
  196. // here already. This way we don't need to check it again when adding
  197. // Block Padding.
  198. out_size -= (out_size - *out_pos) & 3;
  199. // Get the size of the Check field.
  200. const size_t check_size = lzma_check_size(block->check);
  201. assert(check_size != UINT32_MAX);
  202. // Reserve space for the Check field.
  203. if (out_size - *out_pos <= check_size)
  204. return LZMA_BUF_ERROR;
  205. out_size -= check_size;
  206. // Initialize block->uncompressed_size and calculate the worst-case
  207. // value for block->compressed_size.
  208. block->uncompressed_size = in_size;
  209. block->compressed_size = lzma2_bound(in_size);
  210. if (block->compressed_size == 0)
  211. return LZMA_DATA_ERROR;
  212. // Do the actual compression.
  213. lzma_ret ret = LZMA_BUF_ERROR;
  214. if (try_to_compress)
  215. ret = block_encode_normal(block, allocator,
  216. in, in_size, out, out_pos, out_size);
  217. if (ret != LZMA_OK) {
  218. // If the error was something else than output buffer
  219. // becoming full, return the error now.
  220. if (ret != LZMA_BUF_ERROR)
  221. return ret;
  222. // The data was incompressible (at least with the options
  223. // given to us) or the output buffer was too small. Use the
  224. // uncompressed chunks of LZMA2 to wrap the data into a valid
  225. // Block. If we haven't been given enough output space, even
  226. // this may fail.
  227. return_if_error(block_encode_uncompressed(block, in, in_size,
  228. out, out_pos, out_size));
  229. }
  230. assert(*out_pos <= out_size);
  231. // Block Padding. No buffer overflow here, because we already adjusted
  232. // out_size so that (out_size - out_start) is a multiple of four.
  233. // Thus, if the buffer is full, the loop body can never run.
  234. for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) {
  235. assert(*out_pos < out_size);
  236. out[(*out_pos)++] = 0x00;
  237. }
  238. // If there's no Check field, we are done now.
  239. if (check_size > 0) {
  240. // Calculate the integrity check. We reserved space for
  241. // the Check field earlier so we don't need to check for
  242. // available output space here.
  243. lzma_check_state check;
  244. lzma_check_init(&check, block->check);
  245. lzma_check_update(&check, block->check, in, in_size);
  246. lzma_check_finish(&check, block->check);
  247. memcpy(block->raw_check, check.buffer.u8, check_size);
  248. memcpy(out + *out_pos, check.buffer.u8, check_size);
  249. *out_pos += check_size;
  250. }
  251. return LZMA_OK;
  252. }
  253. extern LZMA_API(lzma_ret)
  254. lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
  255. const uint8_t *in, size_t in_size,
  256. uint8_t *out, size_t *out_pos, size_t out_size)
  257. {
  258. return block_buffer_encode(block, allocator,
  259. in, in_size, out, out_pos, out_size, true);
  260. }
  261. #ifdef HAVE_SYMBOL_VERSIONS_LINUX
  262. // This is for compatibility with binaries linked against liblzma that
  263. // has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
  264. LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2",
  265. lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block,
  266. const uint8_t *in, size_t in_size,
  267. uint8_t *out, size_t *out_pos, size_t out_size)
  268. lzma_nothrow lzma_attr_warn_unused_result
  269. __attribute__((__alias__("lzma_block_uncomp_encode_52")));
  270. LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2",
  271. lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block,
  272. const uint8_t *in, size_t in_size,
  273. uint8_t *out, size_t *out_pos, size_t out_size)
  274. lzma_nothrow lzma_attr_warn_unused_result;
  275. #define lzma_block_uncomp_encode lzma_block_uncomp_encode_52
  276. #endif
  277. extern LZMA_API(lzma_ret)
  278. lzma_block_uncomp_encode(lzma_block *block,
  279. const uint8_t *in, size_t in_size,
  280. uint8_t *out, size_t *out_pos, size_t out_size)
  281. {
  282. // It won't allocate any memory from heap so no need
  283. // for lzma_allocator.
  284. return block_buffer_encode(block, NULL,
  285. in, in_size, out, out_pos, out_size, false);
  286. }