huffman.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /**
  2. * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3. * SPDX-License-Identifier: Apache-2.0.
  4. */
  5. #include <aws/compression/huffman.h>
  6. #define BITSIZEOF(val) (sizeof(val) * 8)
  7. static uint8_t MAX_PATTERN_BITS = BITSIZEOF(((struct aws_huffman_code *)0)->pattern);
  8. void aws_huffman_encoder_init(struct aws_huffman_encoder *encoder, struct aws_huffman_symbol_coder *coder) {
  9. AWS_ASSERT(encoder);
  10. AWS_ASSERT(coder);
  11. AWS_ZERO_STRUCT(*encoder);
  12. encoder->coder = coder;
  13. encoder->eos_padding = UINT8_MAX;
  14. }
  15. void aws_huffman_encoder_reset(struct aws_huffman_encoder *encoder) {
  16. AWS_ASSERT(encoder);
  17. AWS_ZERO_STRUCT(encoder->overflow_bits);
  18. }
  19. void aws_huffman_decoder_init(struct aws_huffman_decoder *decoder, struct aws_huffman_symbol_coder *coder) {
  20. AWS_ASSERT(decoder);
  21. AWS_ASSERT(coder);
  22. AWS_ZERO_STRUCT(*decoder);
  23. decoder->coder = coder;
  24. }
  25. void aws_huffman_decoder_reset(struct aws_huffman_decoder *decoder) {
  26. decoder->working_bits = 0;
  27. decoder->num_bits = 0;
  28. }
  29. void aws_huffman_decoder_allow_growth(struct aws_huffman_decoder *decoder, bool allow_growth) {
  30. decoder->allow_growth = allow_growth;
  31. }
  32. /* Much of encode is written in a helper function,
  33. so this struct helps avoid passing all the parameters through by hand */
  34. struct encoder_state {
  35. struct aws_huffman_encoder *encoder;
  36. struct aws_byte_buf *output_buf;
  37. uint8_t working;
  38. uint8_t bit_pos;
  39. };
  40. /* Helper function to write a single bit_pattern to memory (or working_bits if
  41. * out of buffer space) */
  42. static int encode_write_bit_pattern(struct encoder_state *state, struct aws_huffman_code bit_pattern) {
  43. AWS_PRECONDITION(state->output_buf->len < state->output_buf->capacity);
  44. if (bit_pattern.num_bits == 0) {
  45. return aws_raise_error(AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL);
  46. }
  47. uint8_t bits_to_write = bit_pattern.num_bits;
  48. while (bits_to_write > 0) {
  49. uint8_t bits_for_current = bits_to_write > state->bit_pos ? state->bit_pos : bits_to_write;
  50. /* Chop off the top 0s and bits that have already been read */
  51. uint8_t bits_to_cut =
  52. (BITSIZEOF(bit_pattern.pattern) - bit_pattern.num_bits) + (bit_pattern.num_bits - bits_to_write);
  53. /* Write the appropiate number of bits to this byte
  54. Shift to the left to cut any unneeded bits
  55. Shift to the right to position the bits correctly */
  56. state->working |= (bit_pattern.pattern << bits_to_cut) >> (MAX_PATTERN_BITS - state->bit_pos);
  57. bits_to_write -= bits_for_current;
  58. state->bit_pos -= bits_for_current;
  59. if (state->bit_pos == 0) {
  60. /* Save the whole byte */
  61. aws_byte_buf_write_u8(state->output_buf, state->working);
  62. state->bit_pos = 8;
  63. state->working = 0;
  64. if (state->output_buf->len == state->output_buf->capacity) {
  65. state->encoder->overflow_bits.num_bits = bits_to_write;
  66. if (bits_to_write) {
  67. /* If buffer is full and there are remaining bits, save them to overflow and return */
  68. bits_to_cut += bits_for_current;
  69. state->encoder->overflow_bits.pattern =
  70. (bit_pattern.pattern << bits_to_cut) >> (MAX_PATTERN_BITS - bits_to_write);
  71. return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
  72. }
  73. }
  74. }
  75. }
  76. return AWS_OP_SUCCESS;
  77. }
  78. size_t aws_huffman_get_encoded_length(struct aws_huffman_encoder *encoder, struct aws_byte_cursor to_encode) {
  79. AWS_PRECONDITION(encoder);
  80. AWS_PRECONDITION(aws_byte_cursor_is_valid(&to_encode));
  81. size_t num_bits = 0;
  82. while (to_encode.len) {
  83. uint8_t new_byte = 0;
  84. aws_byte_cursor_read_u8(&to_encode, &new_byte);
  85. struct aws_huffman_code code_point = encoder->coder->encode(new_byte, encoder->coder->userdata);
  86. num_bits += code_point.num_bits;
  87. }
  88. size_t length = num_bits / 8;
  89. /* Round up */
  90. if (num_bits % 8) {
  91. ++length;
  92. }
  93. return length;
  94. }
  95. int aws_huffman_encode(
  96. struct aws_huffman_encoder *encoder,
  97. struct aws_byte_cursor *to_encode,
  98. struct aws_byte_buf *output) {
  99. AWS_ASSERT(encoder);
  100. AWS_ASSERT(encoder->coder);
  101. AWS_ASSERT(to_encode);
  102. AWS_ASSERT(output);
  103. struct encoder_state state = {
  104. .working = 0,
  105. .bit_pos = 8,
  106. };
  107. state.encoder = encoder;
  108. state.output_buf = output;
  109. /* Write any bits leftover from previous invocation */
  110. if (encoder->overflow_bits.num_bits) {
  111. if (output->len == output->capacity) {
  112. return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
  113. }
  114. if (encode_write_bit_pattern(&state, encoder->overflow_bits)) {
  115. return AWS_OP_ERR;
  116. }
  117. encoder->overflow_bits.num_bits = 0;
  118. }
  119. while (to_encode->len) {
  120. if (output->len == output->capacity) {
  121. return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
  122. }
  123. uint8_t new_byte = 0;
  124. aws_byte_cursor_read_u8(to_encode, &new_byte);
  125. struct aws_huffman_code code_point = encoder->coder->encode(new_byte, encoder->coder->userdata);
  126. if (encode_write_bit_pattern(&state, code_point)) {
  127. return AWS_OP_ERR;
  128. }
  129. }
  130. /* The following code only runs when the buffer has written successfully */
  131. /* If whole buffer processed, write EOS */
  132. if (state.bit_pos != 8) {
  133. struct aws_huffman_code eos_cp;
  134. eos_cp.pattern = encoder->eos_padding;
  135. eos_cp.num_bits = state.bit_pos;
  136. encode_write_bit_pattern(&state, eos_cp);
  137. AWS_ASSERT(state.bit_pos == 8);
  138. }
  139. return AWS_OP_SUCCESS;
  140. }
  141. /* Decode's reading is written in a helper function,
  142. so this struct helps avoid passing all the parameters through by hand */
  143. struct huffman_decoder_state {
  144. struct aws_huffman_decoder *decoder;
  145. struct aws_byte_cursor *input_cursor;
  146. };
  147. static void decode_fill_working_bits(struct huffman_decoder_state *state) {
  148. /* Read from bytes in the buffer until there are enough bytes to process */
  149. while (state->decoder->num_bits < MAX_PATTERN_BITS && state->input_cursor->len) {
  150. /* Read the appropiate number of bits from this byte */
  151. uint8_t new_byte = 0;
  152. aws_byte_cursor_read_u8(state->input_cursor, &new_byte);
  153. uint64_t positioned = ((uint64_t)new_byte)
  154. << (BITSIZEOF(state->decoder->working_bits) - 8 - state->decoder->num_bits);
  155. state->decoder->working_bits |= positioned;
  156. state->decoder->num_bits += 8;
  157. }
  158. }
  159. int aws_huffman_decode(
  160. struct aws_huffman_decoder *decoder,
  161. struct aws_byte_cursor *to_decode,
  162. struct aws_byte_buf *output) {
  163. AWS_ASSERT(decoder);
  164. AWS_ASSERT(decoder->coder);
  165. AWS_ASSERT(to_decode);
  166. AWS_ASSERT(output);
  167. struct huffman_decoder_state state;
  168. state.decoder = decoder;
  169. state.input_cursor = to_decode;
  170. /* Measures how much of the input was read */
  171. size_t bits_left = decoder->num_bits + to_decode->len * 8;
  172. while (1) {
  173. decode_fill_working_bits(&state);
  174. uint8_t symbol;
  175. uint8_t bits_read = decoder->coder->decode(
  176. (uint32_t)(decoder->working_bits >> (BITSIZEOF(decoder->working_bits) - MAX_PATTERN_BITS)),
  177. &symbol,
  178. decoder->coder->userdata);
  179. if (bits_read == 0) {
  180. if (bits_left < MAX_PATTERN_BITS) {
  181. /* More input is needed to continue */
  182. return AWS_OP_SUCCESS;
  183. }
  184. /* Unknown symbol found */
  185. return aws_raise_error(AWS_ERROR_COMPRESSION_UNKNOWN_SYMBOL);
  186. }
  187. if (bits_read > bits_left) {
  188. /* Check if the buffer has been overrun.
  189. Note: because of the check in decode_fill_working_bits,
  190. the buffer won't actually overrun, instead there will
  191. be 0's in the bottom of working_bits. */
  192. return AWS_OP_SUCCESS;
  193. }
  194. if (output->len == output->capacity) {
  195. /* Check if we've hit the end of the output buffer.
  196. * Grow buffer, or raise error, depending on settings */
  197. if (decoder->allow_growth) {
  198. /* Double the capacity */
  199. if (aws_byte_buf_reserve_relative(output, output->capacity)) {
  200. return AWS_OP_ERR;
  201. }
  202. } else {
  203. return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
  204. }
  205. }
  206. bits_left -= bits_read;
  207. decoder->working_bits <<= bits_read;
  208. decoder->num_bits -= bits_read;
  209. /* Store the found symbol */
  210. aws_byte_buf_write_u8(output, symbol);
  211. /* Successfully decoded whole buffer */
  212. if (bits_left == 0) {
  213. return AWS_OP_SUCCESS;
  214. }
  215. }
  216. /* This case is unreachable */
  217. AWS_ASSERT(0);
  218. }