123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498 |
- /* Copyright 2010 Google Inc. All Rights Reserved.
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
- /* A (forgetful) hash table to the data seen by the compressor, to
- help create backward references to previous data. */
- #ifndef BROTLI_ENC_HASH_H_
- #define BROTLI_ENC_HASH_H_
- #include <string.h> /* memcmp, memset */
- #include "../common/constants.h"
- #include "../common/dictionary.h"
- #include "../common/platform.h"
- #include <brotli/types.h>
- #include "./encoder_dict.h"
- #include "./fast_log.h"
- #include "./find_match_length.h"
- #include "./memory.h"
- #include "./quality.h"
- #include "./static_dict.h"
- #if defined(__cplusplus) || defined(c_plusplus)
- extern "C" {
- #endif
- /* Pointer to hasher data.
- *
- * Excluding initialization and destruction, hasher can be passed as
- * HasherHandle by value.
- *
- * Typically hasher data consists of 3 sections:
- * * HasherCommon structure
- * * private structured hasher data, depending on hasher type
- * * private dynamic hasher data, depending on hasher type and parameters
- *
- * Using "define" instead of "typedef", because on MSVC __restrict does not work
- * on typedef pointer types. */
- #define HasherHandle uint8_t*
- typedef struct {
- BrotliHasherParams params;
- /* False if hasher needs to be "prepared" before use. */
- BROTLI_BOOL is_prepared_;
- size_t dict_num_lookups;
- size_t dict_num_matches;
- } HasherCommon;
- static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
- return (HasherCommon*)handle;
- }
- #define score_t size_t
- static const uint32_t kCutoffTransformsCount = 10;
- /* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
- /* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
- static const uint64_t kCutoffTransforms =
- BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
- typedef struct HasherSearchResult {
- size_t len;
- size_t distance;
- score_t score;
- int len_code_delta; /* == len_code - len */
- } HasherSearchResult;
- /* kHashMul32 multiplier has these properties:
- * The multiplier must be odd. Otherwise we may lose the highest bit.
- * No long streaks of ones or zeros.
- * There is no effort to ensure that it is a prime, the oddity is enough
- for this use.
- * The number has been tuned heuristically against compression benchmarks. */
- static const uint32_t kHashMul32 = 0x1E35A7BD;
- static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
- static const uint64_t kHashMul64Long =
- BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
- static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
- uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
- /* The higher bits contain more mixture from the multiplication,
- so we take our results from there. */
- return h >> (32 - 14);
- }
- static BROTLI_INLINE void PrepareDistanceCache(
- int* BROTLI_RESTRICT distance_cache, const int num_distances) {
- if (num_distances > 4) {
- int last_distance = distance_cache[0];
- distance_cache[4] = last_distance - 1;
- distance_cache[5] = last_distance + 1;
- distance_cache[6] = last_distance - 2;
- distance_cache[7] = last_distance + 2;
- distance_cache[8] = last_distance - 3;
- distance_cache[9] = last_distance + 3;
- if (num_distances > 10) {
- int next_last_distance = distance_cache[1];
- distance_cache[10] = next_last_distance - 1;
- distance_cache[11] = next_last_distance + 1;
- distance_cache[12] = next_last_distance - 2;
- distance_cache[13] = next_last_distance + 2;
- distance_cache[14] = next_last_distance - 3;
- distance_cache[15] = next_last_distance + 3;
- }
- }
- }
- #define BROTLI_LITERAL_BYTE_SCORE 135
- #define BROTLI_DISTANCE_BIT_PENALTY 30
- /* Score must be positive after applying maximal penalty. */
- #define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
- /* Usually, we always choose the longest backward reference. This function
- allows for the exception of that rule.
- If we choose a backward reference that is further away, it will
- usually be coded with more bits. We approximate this by assuming
- log2(distance). If the distance can be expressed in terms of the
- last four distances, we use some heuristic constants to estimate
- the bits cost. For the first up to four literals we use the bit
- cost of the literals from the literal cost model, after that we
- use the average bit cost of the cost model.
- This function is used to sometimes discard a longer backward reference
- when it is not much longer and the bit cost for encoding it is more
- than the saved literals.
- backward_reference_offset MUST be positive. */
- static BROTLI_INLINE score_t BackwardReferenceScore(
- size_t copy_length, size_t backward_reference_offset) {
- return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
- BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
- }
- static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
- size_t copy_length) {
- return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
- BROTLI_SCORE_BASE + 15;
- }
- static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
- size_t distance_short_code) {
- return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
- }
- static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
- const BrotliEncoderDictionary* dictionary, size_t item,
- const uint8_t* data, size_t max_length, size_t max_backward,
- size_t max_distance, HasherSearchResult* out) {
- size_t len;
- size_t word_idx;
- size_t offset;
- size_t matchlen;
- size_t backward;
- score_t score;
- len = item & 0x1F;
- word_idx = item >> 5;
- offset = dictionary->words->offsets_by_length[len] + len * word_idx;
- if (len > max_length) {
- return BROTLI_FALSE;
- }
- matchlen =
- FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
- if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
- return BROTLI_FALSE;
- }
- {
- size_t cut = len - matchlen;
- size_t transform_id = (cut << 2) +
- (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
- backward = max_backward + 1 + word_idx +
- (transform_id << dictionary->words->size_bits_by_length[len]);
- }
- if (backward > max_distance) {
- return BROTLI_FALSE;
- }
- score = BackwardReferenceScore(matchlen, backward);
- if (score < out->score) {
- return BROTLI_FALSE;
- }
- out->len = matchlen;
- out->len_code_delta = (int)len - (int)matchlen;
- out->distance = backward;
- out->score = score;
- return BROTLI_TRUE;
- }
- static BROTLI_INLINE void SearchInStaticDictionary(
- const BrotliEncoderDictionary* dictionary,
- HasherHandle handle, const uint8_t* data, size_t max_length,
- size_t max_backward, size_t max_distance,
- HasherSearchResult* out, BROTLI_BOOL shallow) {
- size_t key;
- size_t i;
- HasherCommon* self = GetHasherCommon(handle);
- if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
- return;
- }
- key = Hash14(data) << 1;
- for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
- size_t item = dictionary->hash_table[key];
- self->dict_num_lookups++;
- if (item != 0) {
- BROTLI_BOOL item_matches = TestStaticDictionaryItem(
- dictionary, item, data,
- max_length, max_backward, max_distance, out);
- if (item_matches) {
- self->dict_num_matches++;
- }
- }
- }
- }
- typedef struct BackwardMatch {
- uint32_t distance;
- uint32_t length_and_code;
- } BackwardMatch;
- static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
- size_t dist, size_t len) {
- self->distance = (uint32_t)dist;
- self->length_and_code = (uint32_t)(len << 5);
- }
- static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
- size_t dist, size_t len, size_t len_code) {
- self->distance = (uint32_t)dist;
- self->length_and_code =
- (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
- }
- static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
- return self->length_and_code >> 5;
- }
- static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
- size_t code = self->length_and_code & 31;
- return code ? code : BackwardMatchLength(self);
- }
- #define EXPAND_CAT(a, b) CAT(a, b)
- #define CAT(a, b) a ## b
- #define FN(X) EXPAND_CAT(X, HASHER())
- #define HASHER() H10
- #define BUCKET_BITS 17
- #define MAX_TREE_SEARCH_DEPTH 64
- #define MAX_TREE_COMP_LENGTH 128
- #include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
- #undef MAX_TREE_SEARCH_DEPTH
- #undef MAX_TREE_COMP_LENGTH
- #undef BUCKET_BITS
- #undef HASHER
- /* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
- #define MAX_NUM_MATCHES_H10 128
- /* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
- a little faster (0.5% - 1%) and it compresses 0.15% better on small text
- and HTML inputs. */
- #define HASHER() H2
- #define BUCKET_BITS 16
- #define BUCKET_SWEEP 1
- #define HASH_LEN 5
- #define USE_DICTIONARY 1
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
- #undef BUCKET_SWEEP
- #undef USE_DICTIONARY
- #undef HASHER
- #define HASHER() H3
- #define BUCKET_SWEEP 2
- #define USE_DICTIONARY 0
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
- #undef USE_DICTIONARY
- #undef BUCKET_SWEEP
- #undef BUCKET_BITS
- #undef HASHER
- #define HASHER() H4
- #define BUCKET_BITS 17
- #define BUCKET_SWEEP 4
- #define USE_DICTIONARY 1
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
- #undef USE_DICTIONARY
- #undef HASH_LEN
- #undef BUCKET_SWEEP
- #undef BUCKET_BITS
- #undef HASHER
- #define HASHER() H5
- #include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
- #undef HASHER
- #define HASHER() H6
- #include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */
- #undef HASHER
- #define BUCKET_BITS 15
- #define NUM_LAST_DISTANCES_TO_CHECK 4
- #define NUM_BANKS 1
- #define BANK_BITS 16
- #define HASHER() H40
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
- #undef HASHER
- #undef NUM_LAST_DISTANCES_TO_CHECK
- #define NUM_LAST_DISTANCES_TO_CHECK 10
- #define HASHER() H41
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
- #undef HASHER
- #undef NUM_LAST_DISTANCES_TO_CHECK
- #undef NUM_BANKS
- #undef BANK_BITS
- #define NUM_LAST_DISTANCES_TO_CHECK 16
- #define NUM_BANKS 512
- #define BANK_BITS 9
- #define HASHER() H42
- #include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
- #undef HASHER
- #undef NUM_LAST_DISTANCES_TO_CHECK
- #undef NUM_BANKS
- #undef BANK_BITS
- #undef BUCKET_BITS
- #define HASHER() H54
- #define BUCKET_BITS 20
- #define BUCKET_SWEEP 4
- #define HASH_LEN 7
- #define USE_DICTIONARY 0
- #include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
- #undef USE_DICTIONARY
- #undef HASH_LEN
- #undef BUCKET_SWEEP
- #undef BUCKET_BITS
- #undef HASHER
- /* fast large window hashers */
- #define HASHER() HROLLING_FAST
- #define CHUNKLEN 32
- #define JUMP 4
- #define NUMBUCKETS 16777216
- #define MASK ((NUMBUCKETS * 64) - 1)
- #include "./hash_rolling_inc.h" /* NOLINT(build/include) */
- #undef JUMP
- #undef HASHER
- #define HASHER() HROLLING
- #define JUMP 1
- #include "./hash_rolling_inc.h" /* NOLINT(build/include) */
- #undef MASK
- #undef NUMBUCKETS
- #undef JUMP
- #undef CHUNKLEN
- #undef HASHER
- #define HASHER() H35
- #define HASHER_A H3
- #define HASHER_B HROLLING_FAST
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
- #undef HASHER_A
- #undef HASHER_B
- #undef HASHER
- #define HASHER() H55
- #define HASHER_A H54
- #define HASHER_B HROLLING_FAST
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
- #undef HASHER_A
- #undef HASHER_B
- #undef HASHER
- #define HASHER() H65
- #define HASHER_A H6
- #define HASHER_B HROLLING
- #include "./hash_composite_inc.h" /* NOLINT(build/include) */
- #undef HASHER_A
- #undef HASHER_B
- #undef HASHER
- #undef FN
- #undef CAT
- #undef EXPAND_CAT
- #define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\
- H(35) H(55) H(65)
- #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
- static BROTLI_INLINE void DestroyHasher(
- MemoryManager* m, HasherHandle* handle) {
- if (*handle == NULL) return;
- BROTLI_FREE(m, *handle);
- }
- static BROTLI_INLINE void HasherReset(HasherHandle handle) {
- if (handle == NULL) return;
- GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
- }
- static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
- BROTLI_BOOL one_shot, const size_t input_size) {
- size_t result = sizeof(HasherCommon);
- switch (params->hasher.type) {
- #define SIZE_(N) \
- case N: \
- result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
- break;
- FOR_ALL_HASHERS(SIZE_)
- #undef SIZE_
- default:
- break;
- }
- return result;
- }
- static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
- BrotliEncoderParams* params, const uint8_t* data, size_t position,
- size_t input_size, BROTLI_BOOL is_last) {
- HasherHandle self = NULL;
- HasherCommon* common = NULL;
- BROTLI_BOOL one_shot = (position == 0 && is_last);
- if (*handle == NULL) {
- size_t alloc_size;
- ChooseHasher(params, ¶ms->hasher);
- alloc_size = HasherSize(params, one_shot, input_size);
- self = BROTLI_ALLOC(m, uint8_t, alloc_size);
- if (BROTLI_IS_OOM(m)) return;
- *handle = self;
- common = GetHasherCommon(self);
- common->params = params->hasher;
- switch (common->params.type) {
- #define INITIALIZE_(N) \
- case N: \
- InitializeH ## N(*handle, params); \
- break;
- FOR_ALL_HASHERS(INITIALIZE_);
- #undef INITIALIZE_
- default:
- break;
- }
- HasherReset(*handle);
- }
- self = *handle;
- common = GetHasherCommon(self);
- if (!common->is_prepared_) {
- switch (common->params.type) {
- #define PREPARE_(N) \
- case N: \
- PrepareH ## N(self, one_shot, input_size, data); \
- break;
- FOR_ALL_HASHERS(PREPARE_)
- #undef PREPARE_
- default: break;
- }
- if (position == 0) {
- common->dict_num_lookups = 0;
- common->dict_num_matches = 0;
- }
- common->is_prepared_ = BROTLI_TRUE;
- }
- }
- static BROTLI_INLINE void InitOrStitchToPreviousBlock(
- MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
- BrotliEncoderParams* params, size_t position, size_t input_size,
- BROTLI_BOOL is_last) {
- HasherHandle self;
- HasherSetup(m, handle, params, data, position, input_size, is_last);
- if (BROTLI_IS_OOM(m)) return;
- self = *handle;
- switch (GetHasherCommon(self)->params.type) {
- #define INIT_(N) \
- case N: \
- StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
- break;
- FOR_ALL_HASHERS(INIT_)
- #undef INIT_
- default: break;
- }
- }
- #if defined(__cplusplus) || defined(c_plusplus)
- } /* extern "C" */
- #endif
- #endif /* BROTLI_ENC_HASH_H_ */
|