crc32c_sse4.h 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. // Copyright 2010 Google Inc. All rights reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Implements CRC32C using Intel's SSE4 crc32 instruction.
  15. // Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
  16. // emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
  17. #ifndef CRCUTIL_CRC32C_SSE4_H_
  18. #define CRCUTIL_CRC32C_SSE4_H_
  19. #include "gf_util.h" // base types, gf_util class, etc.
  20. #include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics
  21. #if HAVE_I386 || HAVE_AMD64
  22. #if CRCUTIL_USE_MM_CRC32
  23. #if HAVE_I386
  24. #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
  25. #else
  26. #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
  27. #endif // HAVE_I386
  28. #define CRC_UPDATE_BYTE(crc, value) \
  29. (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
  30. #else
  31. #include "generic_crc.h"
  32. #define CRC_UPDATE_WORD(crc, value) do { \
  33. size_t buf = (value); \
  34. CRC_WORD(this, crc, buf); \
  35. } while (0)
  36. #define CRC_UPDATE_BYTE(crc, value) do { \
  37. CRC_BYTE(this, crc, (value)); \
  38. } while (0)
  39. #endif // CRCUTIL_USE_MM_CRC32
  40. namespace crcutil {
  41. #pragma pack(push, 16)
  42. // Since the same pieces should be parameterized in many different places
  43. // and we do not want to introduce a mistake which is rather hard to find,
  44. // use a macro to enumerate all block sizes.
  45. //
  46. // Block sizes and number of stripes were tuned for best performance.
  47. //
  48. // All constants should be literal constants (too lazy to fix the macro).
  49. //
  50. // The use of different "macro_first", "macro", and "macro_last"
  51. // allows generation of different code for smallest, in between,
  52. // and largest block sizes.
  53. //
  54. // This macro shall be kept in sync with
  55. // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
  56. // Failure to do so will cause compile-time error.
  57. #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
  58. macro_smallest, macro, macro_largest) \
  59. macro_smallest(512, 3); \
  60. macro(1024, 3); \
  61. macro(4096, 3); \
  62. macro_largest(32768, 3)
  63. // This macro shall be kept in sync with
  64. // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
  65. // Failure to do so will cause compile-time error.
  66. #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
  67. macro_smallest, macro, macro_largest) \
  68. macro_largest(32768, 3); \
  69. macro(4096, 3); \
  70. macro(1024, 3); \
  71. macro_smallest(512, 3)
  72. // Enumerates all block sizes.
  73. #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
  74. CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
  75. #define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
  76. (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
  77. #define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
  78. (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
  79. #define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
  80. mul_table_##block_size##_##num_blocks##_
  81. class RollingCrc32cSSE4;
  82. class Crc32cSSE4 {
  83. public:
  84. // Exports Crc, TableEntry, and Word (needed by RollingCrc).
  85. typedef size_t Crc;
  86. typedef Crc Word;
  87. typedef Crc TableEntry;
  88. Crc32cSSE4() {}
  89. // Initializes the tables given generating polynomial of degree (degree).
  90. // If "canonical" is true, crc value will be XOR'ed with (-1) before and
  91. // after actual CRC computation.
  92. explicit Crc32cSSE4(bool canonical) {
  93. Init(canonical);
  94. }
  95. void Init(bool canonical);
  96. // Initializes the tables given generating polynomial of degree.
  97. // If "canonical" is true, crc value will be XOR'ed with (-1) before and
  98. // after actual CRC computation.
  99. // Provided for compatibility with GenericCrc.
  100. Crc32cSSE4(const Crc &generating_polynomial,
  101. size_t degree,
  102. bool canonical) {
  103. Init(generating_polynomial, degree, canonical);
  104. }
  105. void Init(const Crc &generating_polynomial,
  106. size_t degree,
  107. bool canonical) {
  108. if (generating_polynomial == FixedGeneratingPolynomial() &&
  109. degree == FixedDegree()) {
  110. Init(canonical);
  111. }
  112. }
  113. // Returns fixed generating polymonial the class implements.
  114. static Crc FixedGeneratingPolynomial() {
  115. return 0x82f63b78;
  116. }
  117. // Returns degree of fixed generating polymonial the class implements.
  118. static Crc FixedDegree() {
  119. return 32;
  120. }
  121. // Returns base class.
  122. const GfUtil<Crc> &Base() const { return base_; }
  123. // Computes CRC32.
  124. size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
  125. return Crc32c(data, bytes, crc);
  126. }
  127. // Returns true iff crc32 instruction is available.
  128. static bool IsSSE42Available();
  129. protected:
  130. // Actual implementation.
  131. size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
  132. enum {
  133. kTableEntryBits = 8,
  134. kTableEntries = 1 << kTableEntryBits,
  135. kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
  136. kNumTablesHalfLo = kNumTables / 2,
  137. kNumTablesHalfHi = (kNumTables + 1) / 2,
  138. kUnrolledLoopCount = 8,
  139. kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
  140. };
  141. // May be set to size_t or uint32, whichever is faster.
  142. typedef uint32 Entry;
  143. #define DECLARE_MUL_TABLE(block_size, num_stripes) \
  144. Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
  145. [kNumTables][kTableEntries]
  146. CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
  147. #undef DECLARE_MUL_TABLE
  148. GfUtil<Crc> base_;
  149. #if !CRCUTIL_USE_MM_CRC32
  150. TableEntry crc_word_[sizeof(Word)][256];
  151. friend class RollingCrc32cSSE4;
  152. #endif // !CRCUTIL_USE_MM_CRC32
  153. } GCC_ALIGN_ATTRIBUTE(16);
  154. class RollingCrc32cSSE4 {
  155. public:
  156. typedef Crc32cSSE4::Crc Crc;
  157. typedef Crc32cSSE4::TableEntry TableEntry;
  158. typedef Crc32cSSE4::Word Word;
  159. RollingCrc32cSSE4() {}
  160. // Initializes internal data structures.
  161. // Retains reference to "crc" instance -- it is used by Start().
  162. RollingCrc32cSSE4(const Crc32cSSE4 &crc,
  163. size_t roll_window_bytes,
  164. const Crc &start_value) {
  165. Init(crc, roll_window_bytes, start_value);
  166. }
  167. void Init(const Crc32cSSE4 &crc,
  168. size_t roll_window_bytes,
  169. const Crc &start_value);
  170. // Computes crc of "roll_window_bytes" using
  171. // "start_value" of "crc" (see Init()).
  172. Crc Start(const void *data) const {
  173. return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
  174. }
  175. // Computes CRC of "roll_window_bytes" starting in next position.
  176. Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
  177. Crc crc = old_crc;
  178. CRC_UPDATE_BYTE(crc, byte_in);
  179. crc ^= out_[byte_out];
  180. return crc;
  181. }
  182. // Returns start value.
  183. Crc StartValue() const { return start_value_; }
  184. // Returns length of roll window.
  185. size_t WindowBytes() const { return roll_window_bytes_; }
  186. protected:
  187. typedef Crc Entry;
  188. Entry out_[256];
  189. // Used only by Start().
  190. Crc start_value_;
  191. const Crc32cSSE4 *crc_;
  192. size_t roll_window_bytes_;
  193. #if !CRCUTIL_USE_MM_CRC32
  194. TableEntry crc_word_[sizeof(Word)][256];
  195. #endif // !CRCUTIL_USE_MM_CRC32
  196. } GCC_ALIGN_ATTRIBUTE(16);
  197. #pragma pack(pop)
  198. } // namespace crcutil
  199. #endif // HAVE_I386 || HAVE_AMD64
  200. #endif // CRCUTIL_CRC32C_SSE4_H_