123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- /* NOLINT(build/header_guard) */
- /* Copyright 2013 Google Inc. All Rights Reserved.
- Distributed under MIT license.
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
- */
- /* template parameters: FN */
- #define HistogramType FN(Histogram)
- double FN(BrotliPopulationCost)(const HistogramType* histogram) {
- static const double kOneSymbolHistogramCost = 12;
- static const double kTwoSymbolHistogramCost = 20;
- static const double kThreeSymbolHistogramCost = 28;
- static const double kFourSymbolHistogramCost = 37;
- const size_t data_size = FN(HistogramDataSize)();
- int count = 0;
- size_t s[5];
- double bits = 0.0;
- size_t i;
- if (histogram->total_count_ == 0) {
- return kOneSymbolHistogramCost;
- }
- for (i = 0; i < data_size; ++i) {
- if (histogram->data_[i] > 0) {
- s[count] = i;
- ++count;
- if (count > 4) break;
- }
- }
- if (count == 1) {
- return kOneSymbolHistogramCost;
- }
- if (count == 2) {
- return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
- }
- if (count == 3) {
- const uint32_t histo0 = histogram->data_[s[0]];
- const uint32_t histo1 = histogram->data_[s[1]];
- const uint32_t histo2 = histogram->data_[s[2]];
- const uint32_t histomax =
- BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
- return (kThreeSymbolHistogramCost +
- 2 * (histo0 + histo1 + histo2) - histomax);
- }
- if (count == 4) {
- uint32_t histo[4];
- uint32_t h23;
- uint32_t histomax;
- for (i = 0; i < 4; ++i) {
- histo[i] = histogram->data_[s[i]];
- }
- /* Sort */
- for (i = 0; i < 4; ++i) {
- size_t j;
- for (j = i + 1; j < 4; ++j) {
- if (histo[j] > histo[i]) {
- BROTLI_SWAP(uint32_t, histo, j, i);
- }
- }
- }
- h23 = histo[2] + histo[3];
- histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
- return (kFourSymbolHistogramCost +
- 3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
- }
- {
- /* In this loop we compute the entropy of the histogram and simultaneously
- build a simplified histogram of the code length codes where we use the
- zero repeat code 17, but we don't use the non-zero repeat code 16. */
- size_t max_depth = 1;
- uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
- const double log2total = FastLog2(histogram->total_count_);
- for (i = 0; i < data_size;) {
- if (histogram->data_[i] > 0) {
- /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
- = log2(total_count) - log2(count(symbol)) */
- double log2p = log2total - FastLog2(histogram->data_[i]);
- /* Approximate the bit depth by round(-log2(P(symbol))) */
- size_t depth = (size_t)(log2p + 0.5);
- bits += histogram->data_[i] * log2p;
- if (depth > 15) {
- depth = 15;
- }
- if (depth > max_depth) {
- max_depth = depth;
- }
- ++depth_histo[depth];
- ++i;
- } else {
- /* Compute the run length of zeros and add the appropriate number of 0
- and 17 code length codes to the code length code histogram. */
- uint32_t reps = 1;
- size_t k;
- for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
- ++reps;
- }
- i += reps;
- if (i == data_size) {
- /* Don't add any cost for the last zero run, since these are encoded
- only implicitly. */
- break;
- }
- if (reps < 3) {
- depth_histo[0] += reps;
- } else {
- reps -= 2;
- while (reps > 0) {
- ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
- /* Add the 3 extra bits for the 17 code length code. */
- bits += 3;
- reps >>= 3;
- }
- }
- }
- }
- /* Add the estimated encoding cost of the code length code histogram. */
- bits += (double)(18 + 2 * max_depth);
- /* Add the entropy of the code length code histogram. */
- bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
- }
- return bits;
- }
- #undef HistogramType
|