123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- /* NOLINT(build/header_guard) */
- /* Copyright 2018 Google Inc. All Rights Reserved.
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
- /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
- /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
- /* JUMP = skip bytes for speedup */
- /* Rolling hash for long distance long string matches. Stores one position
- per bucket, bucket key is computed over a long region. */
- #define HashRolling HASHER()
- static const uint32_t FN(kRollingHashMul32) = 69069;
- static const uint32_t FN(kInvalidPos) = 0xffffffff;
- /* This hasher uses a longer forward length, but returning a higher value here
- will hurt compression by the main hasher when combined with a composite
- hasher. The hasher tests for forward itself instead. */
- static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
- static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
- /* Computes a code from a single byte. A lookup table of 256 values could be
- used, but simply adding 1 works about as good. */
- static uint32_t FN(HashByte)(uint8_t byte) {
- return (uint32_t)byte + 1u;
- }
- static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
- uint32_t factor) {
- return (uint32_t)(factor * state + FN(HashByte)(add));
- }
- static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
- uint8_t rem, uint32_t factor,
- uint32_t factor_remove) {
- return (uint32_t)(factor * state +
- FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
- }
- typedef struct HashRolling {
- uint32_t state;
- uint32_t* table;
- size_t next_ix;
- uint32_t chunk_len;
- uint32_t factor;
- uint32_t factor_remove;
- } HashRolling;
- static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) {
- return (HashRolling*)&(GetHasherCommon(handle)[1]);
- }
- static void FN(Initialize)(
- HasherHandle handle, const BrotliEncoderParams* params) {
- HashRolling* self = FN(Self)(handle);
- size_t i;
- self->state = 0;
- self->next_ix = 0;
- self->factor = FN(kRollingHashMul32);
- /* Compute the factor of the oldest byte to remove: factor**steps modulo
- 0xffffffff (the multiplications rely on 32-bit overflow) */
- self->factor_remove = 1;
- for (i = 0; i < CHUNKLEN; i += JUMP) {
- self->factor_remove *= self->factor;
- }
- self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling));
- for (i = 0; i < NUMBUCKETS; i++) {
- self->table[i] = FN(kInvalidPos);
- }
- BROTLI_UNUSED(params);
- }
- static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
- size_t input_size, const uint8_t* data) {
- HashRolling* self = FN(Self)(handle);
- size_t i;
- /* Too small size, cannot use this hasher. */
- if (input_size < CHUNKLEN) return;
- self->state = 0;
- for (i = 0; i < CHUNKLEN; i += JUMP) {
- self->state = FN(HashRollingFunctionInitial)(
- self->state, data[i], self->factor);
- }
- BROTLI_UNUSED(one_shot);
- }
- static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
- const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
- size_t input_size) {
- return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t);
- BROTLI_UNUSED(params);
- BROTLI_UNUSED(one_shot);
- BROTLI_UNUSED(input_size);
- }
- static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
- const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
- BROTLI_UNUSED(handle);
- BROTLI_UNUSED(data);
- BROTLI_UNUSED(mask);
- BROTLI_UNUSED(ix);
- }
- static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
- const uint8_t* data, const size_t mask, const size_t ix_start,
- const size_t ix_end) {
- BROTLI_UNUSED(handle);
- BROTLI_UNUSED(data);
- BROTLI_UNUSED(mask);
- BROTLI_UNUSED(ix_start);
- BROTLI_UNUSED(ix_end);
- }
- static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
- size_t num_bytes, size_t position, const uint8_t* ringbuffer,
- size_t ring_buffer_mask) {
- /* In this case we must re-initialize the hasher from scratch from the
- current position. */
- HashRolling* self = FN(Self)(handle);
- size_t position_masked;
- size_t available = num_bytes;
- if ((position & (JUMP - 1)) != 0) {
- size_t diff = JUMP - (position & (JUMP - 1));
- available = (diff > available) ? 0 : (available - diff);
- position += diff;
- }
- position_masked = position & ring_buffer_mask;
- /* wrapping around ringbuffer not handled. */
- if (available > ring_buffer_mask - position_masked) {
- available = ring_buffer_mask - position_masked;
- }
- FN(Prepare)(handle, BROTLI_FALSE, available,
- ringbuffer + (position & ring_buffer_mask));
- self->next_ix = position;
- BROTLI_UNUSED(num_bytes);
- }
- static BROTLI_INLINE void FN(PrepareDistanceCache)(
- HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
- BROTLI_UNUSED(handle);
- BROTLI_UNUSED(distance_cache);
- }
- static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
- const BrotliEncoderDictionary* dictionary,
- const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
- const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
- const size_t max_length, const size_t max_backward,
- const size_t gap, const size_t max_distance,
- HasherSearchResult* BROTLI_RESTRICT out) {
- HashRolling* self = FN(Self)(handle);
- const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
- size_t pos = self->next_ix;
- if ((cur_ix & (JUMP - 1)) != 0) return;
- /* Not enough lookahead */
- if (max_length < CHUNKLEN) return;
- for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
- uint32_t code = self->state & MASK;
- uint8_t rem = data[pos & ring_buffer_mask];
- uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
- size_t found_ix = FN(kInvalidPos);
- self->state = FN(HashRollingFunction)(
- self->state, add, rem, self->factor, self->factor_remove);
- if (code < NUMBUCKETS) {
- found_ix = self->table[code];
- self->table[code] = (uint32_t)pos;
- if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
- /* The cast to 32-bit makes backward distances up to 4GB work even
- if cur_ix is above 4GB, despite using 32-bit values in the table. */
- size_t backward = (uint32_t)(cur_ix - found_ix);
- if (backward <= max_backward) {
- const size_t found_ix_masked = found_ix & ring_buffer_mask;
- const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
- &data[cur_ix_masked],
- max_length);
- if (len >= 4 && len > out->len) {
- score_t score = BackwardReferenceScore(len, backward);
- if (score > out->score) {
- out->len = len;
- out->distance = backward;
- out->score = score;
- out->len_code_delta = 0;
- }
- }
- }
- }
- }
- }
- self->next_ix = cur_ix + JUMP;
- /* NOTE: this hasher does not search in the dictionary. It is used as
- backup-hasher, the main hasher already searches in it. */
- BROTLI_UNUSED(dictionary);
- BROTLI_UNUSED(distance_cache);
- BROTLI_UNUSED(gap);
- BROTLI_UNUSED(max_distance);
- }
- #undef HashRolling
|