// Copyright 2010 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Implements CRC32C using Intel's SSE4 crc32 instruction. // Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, // emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. #ifndef CRCUTIL_CRC32C_SSE4_H_ #define CRCUTIL_CRC32C_SSE4_H_ #include "gf_util.h" // base types, gf_util class, etc. #include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics #if HAVE_I386 || HAVE_AMD64 #if CRCUTIL_USE_MM_CRC32 #if HAVE_I386 #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value))) #else #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value))) #endif // HAVE_I386 #define CRC_UPDATE_BYTE(crc, value) \ (crc = _mm_crc32_u8(static_cast(crc), static_cast(value))) #else #include "generic_crc.h" #define CRC_UPDATE_WORD(crc, value) do { \ size_t buf = (value); \ CRC_WORD(this, crc, buf); \ } while (0) #define CRC_UPDATE_BYTE(crc, value) do { \ CRC_BYTE(this, crc, (value)); \ } while (0) #endif // CRCUTIL_USE_MM_CRC32 namespace crcutil { #pragma pack(push, 16) // Since the same pieces should be parameterized in many different places // and we do not want to introduce a mistake which is rather hard to find, // use a macro to enumerate all block sizes. // // Block sizes and number of stripes were tuned for best performance. // // All constants should be literal constants (too lazy to fix the macro). // // The use of different "macro_first", "macro", and "macro_last" // allows generation of different code for smallest, in between, // and largest block sizes. // // This macro shall be kept in sync with // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING. // Failure to do so will cause compile-time error. #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \ macro_smallest, macro, macro_largest) \ macro_smallest(512, 3); \ macro(1024, 3); \ macro(4096, 3); \ macro_largest(32768, 3) // This macro shall be kept in sync with // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING. // Failure to do so will cause compile-time error. #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \ macro_smallest, macro, macro_largest) \ macro_largest(32768, 3); \ macro(4096, 3); \ macro(1024, 3); \ macro_smallest(512, 3) // Enumerates all block sizes. #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \ CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro) #define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \ (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1)) #define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \ (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes)) #define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ mul_table_##block_size##_##num_blocks##_ class RollingCrc32cSSE4; class Crc32cSSE4 { public: // Exports Crc, TableEntry, and Word (needed by RollingCrc). typedef size_t Crc; typedef Crc Word; typedef Crc TableEntry; Crc32cSSE4() {} // Initializes the tables given generating polynomial of degree (degree). // If "canonical" is true, crc value will be XOR'ed with (-1) before and // after actual CRC computation. explicit Crc32cSSE4(bool canonical) { Init(canonical); } void Init(bool canonical); // Initializes the tables given generating polynomial of degree. // If "canonical" is true, crc value will be XOR'ed with (-1) before and // after actual CRC computation. // Provided for compatibility with GenericCrc. Crc32cSSE4(const Crc &generating_polynomial, size_t degree, bool canonical) { Init(generating_polynomial, degree, canonical); } void Init(const Crc &generating_polynomial, size_t degree, bool canonical) { if (generating_polynomial == FixedGeneratingPolynomial() && degree == FixedDegree()) { Init(canonical); } } // Returns fixed generating polymonial the class implements. static Crc FixedGeneratingPolynomial() { return 0x82f63b78; } // Returns degree of fixed generating polymonial the class implements. static Crc FixedDegree() { return 32; } // Returns base class. const GfUtil &Base() const { return base_; } // Computes CRC32. size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const { return Crc32c(data, bytes, crc); } // Returns true iff crc32 instruction is available. static bool IsSSE42Available(); protected: // Actual implementation. size_t Crc32c(const void *data, size_t bytes, Crc crc) const; enum { kTableEntryBits = 8, kTableEntries = 1 << kTableEntryBits, kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits, kNumTablesHalfLo = kNumTables / 2, kNumTablesHalfHi = (kNumTables + 1) / 2, kUnrolledLoopCount = 8, kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t), }; // May be set to size_t or uint32, whichever is faster. typedef uint32 Entry; #define DECLARE_MUL_TABLE(block_size, num_stripes) \ Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ [kNumTables][kTableEntries] CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE); #undef DECLARE_MUL_TABLE GfUtil base_; #if !CRCUTIL_USE_MM_CRC32 TableEntry crc_word_[sizeof(Word)][256]; friend class RollingCrc32cSSE4; #endif // !CRCUTIL_USE_MM_CRC32 } GCC_ALIGN_ATTRIBUTE(16); class RollingCrc32cSSE4 { public: typedef Crc32cSSE4::Crc Crc; typedef Crc32cSSE4::TableEntry TableEntry; typedef Crc32cSSE4::Word Word; RollingCrc32cSSE4() {} // Initializes internal data structures. // Retains reference to "crc" instance -- it is used by Start(). RollingCrc32cSSE4(const Crc32cSSE4 &crc, size_t roll_window_bytes, const Crc &start_value) { Init(crc, roll_window_bytes, start_value); } void Init(const Crc32cSSE4 &crc, size_t roll_window_bytes, const Crc &start_value); // Computes crc of "roll_window_bytes" using // "start_value" of "crc" (see Init()). Crc Start(const void *data) const { return crc_->CrcDefault(data, roll_window_bytes_, start_value_); } // Computes CRC of "roll_window_bytes" starting in next position. Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { Crc crc = old_crc; CRC_UPDATE_BYTE(crc, byte_in); crc ^= out_[byte_out]; return crc; } // Returns start value. Crc StartValue() const { return start_value_; } // Returns length of roll window. size_t WindowBytes() const { return roll_window_bytes_; } protected: typedef Crc Entry; Entry out_[256]; // Used only by Start(). Crc start_value_; const Crc32cSSE4 *crc_; size_t roll_window_bytes_; #if !CRCUTIL_USE_MM_CRC32 TableEntry crc_word_[sizeof(Word)][256]; #endif // !CRCUTIL_USE_MM_CRC32 } GCC_ALIGN_ATTRIBUTE(16); #pragma pack(pop) } // namespace crcutil #endif // HAVE_I386 || HAVE_AMD64 #endif // CRCUTIL_CRC32C_SSE4_H_