crc_internal.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. // Copyright 2022 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
  15. #define ABSL_CRC_INTERNAL_CRC_INTERNAL_H_
  16. #include <cstdint>
  17. #include <memory>
  18. #include <vector>
  19. #include "absl/base/internal/raw_logging.h"
  20. #include "absl/crc/internal/crc.h"
  21. namespace absl {
  22. ABSL_NAMESPACE_BEGIN
  23. namespace crc_internal {
  24. // Prefetch constants used in some Extend() implementations
  25. constexpr int kPrefetchHorizon = ABSL_CACHELINE_SIZE * 4; // Prefetch this far
  26. // Shorter prefetch distance for smaller buffers
  27. constexpr int kPrefetchHorizonMedium = ABSL_CACHELINE_SIZE * 1;
  28. static_assert(kPrefetchHorizon >= 64, "CRCPrefetchHorizon less than loop len");
  29. // We require the Scramble() function:
  30. // - to be reversible (Unscramble() must exist)
  31. // - to be non-linear in the polynomial's Galois field (so the CRC of a
  32. // scrambled CRC is not linearly affected by the scrambled CRC, even if
  33. // using the same polynomial)
  34. // - not to be its own inverse. Preferably, if X=Scramble^N(X) and N!=0, then
  35. // N is large.
  36. // - to be fast.
  37. // - not to change once defined.
  38. // We introduce non-linearity in two ways:
  39. // Addition of a constant.
  40. // - The carries introduce non-linearity; we use bits of an irrational
  41. // (phi) to make it unlikely that we introduce no carries.
  42. // Rotate by a constant number of bits.
  43. // - We use floor(degree/2)+1, which does not divide the degree, and
  44. // splits the bits nearly evenly, which makes it less likely the
  45. // halves will be the same or one will be all zeroes.
  46. // We do both things to improve the chances of non-linearity in the face of
  47. // bit patterns with low numbers of bits set, while still being fast.
  48. // Below is the constant that we add. The bits are the first 128 bits of the
  49. // fractional part of phi, with a 1 ored into the bottom bit to maximize the
  50. // cycle length of repeated adds.
  51. constexpr uint64_t kScrambleHi = (static_cast<uint64_t>(0x4f1bbcdcU) << 32) |
  52. static_cast<uint64_t>(0xbfa53e0aU);
  53. constexpr uint64_t kScrambleLo = (static_cast<uint64_t>(0xf9ce6030U) << 32) |
  54. static_cast<uint64_t>(0x2e76e41bU);
  55. class CRCImpl : public CRC { // Implementation of the abstract class CRC
  56. public:
  57. using Uint32By256 = uint32_t[256];
  58. CRCImpl() = default;
  59. ~CRCImpl() override = default;
  60. // The internal version of CRC::New().
  61. static CRCImpl* NewInternal();
  62. // Fill in a table for updating a CRC by one word of 'word_size' bytes
  63. // [last_lo, last_hi] contains the answer if the last bit in the word
  64. // is set.
  65. static void FillWordTable(uint32_t poly, uint32_t last, int word_size,
  66. Uint32By256* t);
  67. // Build the table for extending by zeroes, returning the number of entries.
  68. // For a in {1, 2, ..., ZEROES_BASE-1}, b in {0, 1, 2, 3, ...},
  69. // entry j=a-1+(ZEROES_BASE-1)*b
  70. // contains a polynomial Pi such that multiplying
  71. // a CRC by Pi mod P, where P is the CRC polynomial, is equivalent to
  72. // appending a*2**(ZEROES_BASE_LG*b) zero bytes to the original string.
  73. static int FillZeroesTable(uint32_t poly, Uint32By256* t);
  74. virtual void InitTables() = 0;
  75. private:
  76. CRCImpl(const CRCImpl&) = delete;
  77. CRCImpl& operator=(const CRCImpl&) = delete;
  78. };
  79. // This is the 32-bit implementation. It handles all sizes from 8 to 32.
  80. class CRC32 : public CRCImpl {
  81. public:
  82. CRC32() = default;
  83. ~CRC32() override = default;
  84. void Extend(uint32_t* crc, const void* bytes, size_t length) const override;
  85. void ExtendByZeroes(uint32_t* crc, size_t length) const override;
  86. void Scramble(uint32_t* crc) const override;
  87. void Unscramble(uint32_t* crc) const override;
  88. void UnextendByZeroes(uint32_t* crc, size_t length) const override;
  89. void InitTables() override;
  90. private:
  91. // Common implementation guts for ExtendByZeroes and UnextendByZeroes().
  92. //
  93. // zeroes_table is a table as returned by FillZeroesTable(), containing
  94. // polynomials representing CRCs of strings-of-zeros of various lengths,
  95. // and which can be combined by polynomial multiplication. poly_table is
  96. // a table of CRC byte extension values. These tables are determined by
  97. // the generator polynomial.
  98. //
  99. // These will be set to reverse_zeroes_ and reverse_table0_ for Unextend, and
  100. // CRC32::zeroes_ and CRC32::table0_ for Extend.
  101. static void ExtendByZeroesImpl(uint32_t* crc, size_t length,
  102. const uint32_t zeroes_table[256],
  103. const uint32_t poly_table[256]);
  104. uint32_t table0_[256]; // table of byte extensions
  105. uint32_t zeroes_[256]; // table of zero extensions
  106. // table of 4-byte extensions shifted by 12 bytes of zeroes
  107. uint32_t table_[4][256];
  108. // Reverse lookup tables, using the alternate polynomial used by
  109. // UnextendByZeroes().
  110. uint32_t reverse_table0_[256]; // table of reverse byte extensions
  111. uint32_t reverse_zeroes_[256]; // table of reverse zero extensions
  112. CRC32(const CRC32&) = delete;
  113. CRC32& operator=(const CRC32&) = delete;
  114. };
  115. // Helpers
  116. // Return a bit mask containing len 1-bits.
  117. // Requires 0 < len <= sizeof(T)
  118. template <typename T>
  119. T MaskOfLength(int len) {
  120. // shift 2 by len-1 rather than 1 by len because shifts of wordsize
  121. // are undefined.
  122. return (T(2) << (len - 1)) - 1;
  123. }
  124. // Rotate low-order "width" bits of "in" right by "r" bits,
  125. // setting other bits in word to arbitrary values.
  126. template <typename T>
  127. T RotateRight(T in, int width, int r) {
  128. return (in << (width - r)) | ((in >> r) & MaskOfLength<T>(width - r));
  129. }
  130. // RoundUp<N>(p) returns the lowest address >= p aligned to an N-byte
  131. // boundary. Requires that N is a power of 2.
  132. template <int alignment>
  133. const uint8_t* RoundUp(const uint8_t* p) {
  134. static_assert((alignment & (alignment - 1)) == 0, "alignment is not 2^n");
  135. constexpr uintptr_t mask = alignment - 1;
  136. const uintptr_t as_uintptr = reinterpret_cast<uintptr_t>(p);
  137. return reinterpret_cast<const uint8_t*>((as_uintptr + mask) & ~mask);
  138. }
  139. // Return a newly created CRC32AcceleratedX86ARMCombined if we can use Intel's
  140. // or ARM's CRC acceleration for a given polynomial. Return nullptr otherwise.
  141. CRCImpl* TryNewCRC32AcceleratedX86ARMCombined();
  142. // Return all possible hardware accelerated implementations. For testing only.
  143. std::vector<std::unique_ptr<CRCImpl>> NewCRC32AcceleratedX86ARMCombinedAll();
  144. } // namespace crc_internal
  145. ABSL_NAMESPACE_END
  146. } // namespace absl
  147. #endif // ABSL_CRC_INTERNAL_CRC_INTERNAL_H_