123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- // Copyright 2010 Google Inc. All rights reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- // Implements CRC32C using Intel's SSE4 crc32 instruction.
- // Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
- // emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
- #ifndef CRCUTIL_CRC32C_SSE4_H_
- #define CRCUTIL_CRC32C_SSE4_H_
- #include "gf_util.h" // base types, gf_util class, etc.
- #include "crc32c_sse4_intrin.h" // _mm_crc32_u* intrinsics
- #if HAVE_I386 || HAVE_AMD64
- #if CRCUTIL_USE_MM_CRC32
- #if HAVE_I386
- #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
- #else
- #define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
- #endif // HAVE_I386
- #define CRC_UPDATE_BYTE(crc, value) \
- (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
- #else
- #include "generic_crc.h"
- #define CRC_UPDATE_WORD(crc, value) do { \
- size_t buf = (value); \
- CRC_WORD(this, crc, buf); \
- } while (0)
- #define CRC_UPDATE_BYTE(crc, value) do { \
- CRC_BYTE(this, crc, (value)); \
- } while (0)
- #endif // CRCUTIL_USE_MM_CRC32
- namespace crcutil {
- #pragma pack(push, 16)
- // Since the same pieces should be parameterized in many different places
- // and we do not want to introduce a mistake which is rather hard to find,
- // use a macro to enumerate all block sizes.
- //
- // Block sizes and number of stripes were tuned for best performance.
- //
- // All constants should be literal constants (too lazy to fix the macro).
- //
- // The use of different "macro_first", "macro", and "macro_last"
- // allows generation of different code for smallest, in between,
- // and largest block sizes.
- //
- // This macro shall be kept in sync with
- // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
- // Failure to do so will cause compile-time error.
- #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
- macro_smallest, macro, macro_largest) \
- macro_smallest(512, 3); \
- macro(1024, 3); \
- macro(4096, 3); \
- macro_largest(32768, 3)
- // This macro shall be kept in sync with
- // CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
- // Failure to do so will cause compile-time error.
- #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
- macro_smallest, macro, macro_largest) \
- macro_largest(32768, 3); \
- macro(4096, 3); \
- macro(1024, 3); \
- macro_smallest(512, 3)
- // Enumerates all block sizes.
- #define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
- #define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
- (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
- #define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
- (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
- #define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- mul_table_##block_size##_##num_blocks##_
- class RollingCrc32cSSE4;
- class Crc32cSSE4 {
- public:
- // Exports Crc, TableEntry, and Word (needed by RollingCrc).
- typedef size_t Crc;
- typedef Crc Word;
- typedef Crc TableEntry;
- Crc32cSSE4() {}
- // Initializes the tables given generating polynomial of degree (degree).
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
- // after actual CRC computation.
- explicit Crc32cSSE4(bool canonical) {
- Init(canonical);
- }
- void Init(bool canonical);
- // Initializes the tables given generating polynomial of degree.
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
- // after actual CRC computation.
- // Provided for compatibility with GenericCrc.
- Crc32cSSE4(const Crc &generating_polynomial,
- size_t degree,
- bool canonical) {
- Init(generating_polynomial, degree, canonical);
- }
- void Init(const Crc &generating_polynomial,
- size_t degree,
- bool canonical) {
- if (generating_polynomial == FixedGeneratingPolynomial() &&
- degree == FixedDegree()) {
- Init(canonical);
- }
- }
- // Returns fixed generating polymonial the class implements.
- static Crc FixedGeneratingPolynomial() {
- return 0x82f63b78;
- }
- // Returns degree of fixed generating polymonial the class implements.
- static Crc FixedDegree() {
- return 32;
- }
- // Returns base class.
- const GfUtil<Crc> &Base() const { return base_; }
- // Computes CRC32.
- size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
- return Crc32c(data, bytes, crc);
- }
- // Returns true iff crc32 instruction is available.
- static bool IsSSE42Available();
- protected:
- // Actual implementation.
- size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
- enum {
- kTableEntryBits = 8,
- kTableEntries = 1 << kTableEntryBits,
- kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
- kNumTablesHalfLo = kNumTables / 2,
- kNumTablesHalfHi = (kNumTables + 1) / 2,
- kUnrolledLoopCount = 8,
- kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
- };
- // May be set to size_t or uint32, whichever is faster.
- typedef uint32 Entry;
- #define DECLARE_MUL_TABLE(block_size, num_stripes) \
- Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
- [kNumTables][kTableEntries]
- CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
- #undef DECLARE_MUL_TABLE
- GfUtil<Crc> base_;
- #if !CRCUTIL_USE_MM_CRC32
- TableEntry crc_word_[sizeof(Word)][256];
- friend class RollingCrc32cSSE4;
- #endif // !CRCUTIL_USE_MM_CRC32
- } GCC_ALIGN_ATTRIBUTE(16);
- class RollingCrc32cSSE4 {
- public:
- typedef Crc32cSSE4::Crc Crc;
- typedef Crc32cSSE4::TableEntry TableEntry;
- typedef Crc32cSSE4::Word Word;
- RollingCrc32cSSE4() {}
- // Initializes internal data structures.
- // Retains reference to "crc" instance -- it is used by Start().
- RollingCrc32cSSE4(const Crc32cSSE4 &crc,
- size_t roll_window_bytes,
- const Crc &start_value) {
- Init(crc, roll_window_bytes, start_value);
- }
- void Init(const Crc32cSSE4 &crc,
- size_t roll_window_bytes,
- const Crc &start_value);
- // Computes crc of "roll_window_bytes" using
- // "start_value" of "crc" (see Init()).
- Crc Start(const void *data) const {
- return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
- }
- // Computes CRC of "roll_window_bytes" starting in next position.
- Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
- Crc crc = old_crc;
- CRC_UPDATE_BYTE(crc, byte_in);
- crc ^= out_[byte_out];
- return crc;
- }
- // Returns start value.
- Crc StartValue() const { return start_value_; }
- // Returns length of roll window.
- size_t WindowBytes() const { return roll_window_bytes_; }
- protected:
- typedef Crc Entry;
- Entry out_[256];
- // Used only by Start().
- Crc start_value_;
- const Crc32cSSE4 *crc_;
- size_t roll_window_bytes_;
- #if !CRCUTIL_USE_MM_CRC32
- TableEntry crc_word_[sizeof(Word)][256];
- #endif // !CRCUTIL_USE_MM_CRC32
- } GCC_ALIGN_ATTRIBUTE(16);
- #pragma pack(pop)
- } // namespace crcutil
- #endif // HAVE_I386 || HAVE_AMD64
- #endif // CRCUTIL_CRC32C_SSE4_H_
|