|
@@ -1,687 +1,687 @@
|
|
-// Copyright 2010 Google Inc. All rights reserved.
|
|
|
|
-//
|
|
|
|
-// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
-// you may not use this file except in compliance with the License.
|
|
|
|
-// You may obtain a copy of the License at
|
|
|
|
-//
|
|
|
|
-// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
-//
|
|
|
|
-// Unless required by applicable law or agreed to in writing, software
|
|
|
|
-// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
-// See the License for the specific language governing permissions and
|
|
|
|
-// limitations under the License.
|
|
|
|
-
|
|
|
|
-// Defines GenericCrc class which implements arbitrary CRCs.
|
|
|
|
-//
|
|
|
|
-// Please read crc.pdf to understand how it all works.
|
|
|
|
-
|
|
|
|
-#ifndef CRCUTIL_GENERIC_CRC_H_
|
|
|
|
-#define CRCUTIL_GENERIC_CRC_H_
|
|
|
|
-
|
|
|
|
-#include "base_types.h" // uint8
|
|
|
|
-#include "crc_casts.h" // TO_BYTE(), Downcast<>.
|
|
|
|
-#include "gf_util.h" // GfUtil<Crc> class.
|
|
|
|
-#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16)
|
|
|
|
-#include "uint128_sse2.h" // uint128_sse2 type (if necessary)
|
|
|
|
-
|
|
|
|
-namespace crcutil {
|
|
|
|
-
|
|
|
|
-#pragma pack(push, 16)
|
|
|
|
-
|
|
|
|
-// Extends CRC by one byte.
|
|
|
|
-// Technically, if degree of a polynomial does not exceed 8,
|
|
|
|
-// right shift by 8 bits is not required, but who cares about CRC-8?
|
|
|
|
-#define CRC_BYTE(table, crc, byte) do { \
|
|
|
|
- crc = ((sizeof(crc) > 1) ? SHIFT_RIGHT_SAFE(crc, 8) : 0) ^ \
|
|
|
|
- table->crc_word_[sizeof(Word) - 1][TO_BYTE(crc) ^ (byte)]; \
|
|
|
|
-} while (0)
|
|
|
|
-
|
|
|
|
-#define TABLE_ENTRY(table, byte, buf) \
|
|
|
|
- table[byte][Downcast<Word, uint8>(buf)]
|
|
|
|
-
|
|
|
|
-#define TABLE_ENTRY_LAST(table, buf) \
|
|
|
|
- table[sizeof(Word) - 1][buf]
|
|
|
|
-
|
|
|
|
-// Extends CRC by one word.
|
|
|
|
-#define CRC_WORD(table, crc, buf) do { \
|
|
|
|
- buf ^= Downcast<Crc, Word>(crc); \
|
|
|
|
- if (sizeof(crc) > sizeof(buf)) { \
|
|
|
|
- crc = SHIFT_RIGHT_SAFE(crc, sizeof(buf) * 8); \
|
|
|
|
- crc ^= TABLE_ENTRY(table->crc_word_, 0, buf); \
|
|
|
|
- } else { \
|
|
|
|
- crc = TABLE_ENTRY(table->crc_word_, 0, buf); \
|
|
|
|
- } \
|
|
|
|
- buf >>= 8; \
|
|
|
|
- for (size_t byte = 1; byte < sizeof(buf) - 1; ++byte) { \
|
|
|
|
- crc ^= TABLE_ENTRY(table->crc_word_, byte, buf); \
|
|
|
|
- buf >>= 8; \
|
|
|
|
- } \
|
|
|
|
- crc ^= TABLE_ENTRY_LAST(table->crc_word_, buf); \
|
|
|
|
-} while (0)
|
|
|
|
-
|
|
|
|
-// Process beginning of data block byte by byte until source pointer
|
|
|
|
-// becomes perfectly aligned on Word boundary.
|
|
|
|
-#define ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word) do { \
|
|
|
|
- while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) { \
|
|
|
|
- if (src >= end) { \
|
|
|
|
- return (crc ^ table->Base().Canonize()); \
|
|
|
|
- } \
|
|
|
|
- CRC_BYTE(table, crc, *src); \
|
|
|
|
- src += 1; \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-// On amd64, enforcing alignment is 2-4% slower on small (<= 64 bytes) blocks
|
|
|
|
-// but 6-10% faster on larger blocks (>= 2KB).
|
|
|
|
-// Break-even point (+-1%) is around 1KB (Q9650, E6600).
|
|
|
|
-//
|
|
|
|
-#define ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, table, src, end, crc, Word) \
|
|
|
|
-do { \
|
|
|
|
- if (sizeof(Word) > 8 || (bytes) > CRCUTIL_MIN_ALIGN_SIZE) { \
|
|
|
|
- ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word); \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
-
|
|
|
|
-#if defined(_MSC_VER)
|
|
|
|
-#pragma warning(push)
|
|
|
|
-#pragma warning(disable: 4127) // conditional expression is constant
|
|
|
|
-#endif // defined(_MSC_VER)
|
|
|
|
-
|
|
|
|
-// Forward declarations.
|
|
|
|
-template<typename CrcImplementation> class RollingCrc;
|
|
|
|
-
|
|
|
|
-// Crc is the type used internally and to return values of N-bit CRC.
|
|
|
|
-// It should be at least as large as "TableEntry" and "Word" but
|
|
|
|
-// may be larger (e.g. for 16-bit CRC, TableEntry and Word may be
|
|
|
|
-// set to uint16 but Crc may be set to uint32).
|
|
|
|
-//
|
|
|
|
-// TableEntry is the type of values stored in the tables.
|
|
|
|
-// To implement N-bit CRC, TableEntry should be large enough
|
|
|
|
-// to store N bits.
|
|
|
|
-//
|
|
|
|
-// Word is the type used to read data sizeof(Word) at a time.
|
|
|
|
-// Ideally, it shoulde be "most suitable for given architecture"
|
|
|
|
-// integer type -- typically "size_t".
|
|
|
|
-//
|
|
|
|
-// kStride is the number of words processed in interleaved manner by
|
|
|
|
-// CrcMultiword() and CrcWordblock(). Shall be either 3 or 4.
|
|
|
|
-// Optimal value depends on hardware architecture (AMD64, ARM, etc).
|
|
|
|
-//
|
|
|
|
-template<typename _Crc, typename _TableEntry, typename _Word, int kStride>
|
|
|
|
- class GenericCrc {
|
|
|
|
- public:
|
|
|
|
- // Make Crc, TableEntry, and Word types visible (used by RollingCrc etc.)
|
|
|
|
- typedef _Crc Crc;
|
|
|
|
- typedef _TableEntry TableEntry;
|
|
|
|
- typedef _Word Word;
|
|
|
|
-
|
|
|
|
- GenericCrc() {}
|
|
|
|
-
|
|
|
|
- // Initializes the tables given generating polynomial of degree.
|
|
|
|
- // If "canonical" is true, crc value will be XOR'ed with (-1) before and
|
|
|
|
- // after actual CRC computation.
|
|
|
|
- GenericCrc(const Crc &generating_polynomial, size_t degree, bool canonical) {
|
|
|
|
- Init(generating_polynomial, degree, canonical);
|
|
|
|
- }
|
|
|
|
- void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
|
|
|
|
- base_.Init(generating_polynomial, degree, canonical);
|
|
|
|
-
|
|
|
|
- // Instead of computing
|
|
|
|
- // table[j][i] = MultiplyUnnormalized(i, 8, k),
|
|
|
|
- // for all i = 0...255, we may notice that
|
|
|
|
- // if i = 2**n then for all m = 1...(i-1)
|
|
|
|
- // MultiplyUnnormalized(i + m, 8, k) =
|
|
|
|
- // MultiplyUnnormalized(i ^ m, 8, k) =
|
|
|
|
- // MultiplyUnnormalized(i, 8, k) ^ MultiplyUnnormalized(m, 8, k) =
|
|
|
|
- // MultiplyUnnormalized(i, 8, k) ^ crc_word_interleaved[j][m] =
|
|
|
|
- // table[i] ^ table[m].
|
|
|
|
-#if 0
|
|
|
|
- for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
- Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
|
|
|
|
- for (size_t i = 0; i < 256; ++i) {
|
|
|
|
- Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
|
|
|
|
- this->crc_word_interleaved_[j][i] = Downcast<Crc, TableEntry>(temp);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-#else
|
|
|
|
- for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
- Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
|
|
|
|
- TableEntry *table = this->crc_word_interleaved_[j];
|
|
|
|
- table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
|
|
|
|
- for (size_t i = 1; i < 256; i <<= 1) {
|
|
|
|
- TableEntry value = Downcast<Crc, TableEntry>(
|
|
|
|
- Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
|
|
|
|
- table[i] = value;
|
|
|
|
- for (size_t m = 1; m < i; ++m) {
|
|
|
|
- table[i + m] = value ^ table[m];
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
-#if 0
|
|
|
|
- for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
- Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
|
|
|
|
- for (size_t i = 0; i < 256; ++i) {
|
|
|
|
- Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
|
|
|
|
- this->crc_word_[j][i] = Downcast<Crc, TableEntry>(temp);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-#else
|
|
|
|
- for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
- Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
|
|
|
|
- TableEntry *table = this->crc_word_[j];
|
|
|
|
- table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
|
|
|
|
- for (size_t i = 1; i < 256; i <<= 1) {
|
|
|
|
- TableEntry value = Downcast<Crc, TableEntry>(
|
|
|
|
- Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
|
|
|
|
- table[i] = value;
|
|
|
|
- for (size_t m = 1; m < i; ++m) {
|
|
|
|
- table[i + m] = value ^ table[m];
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-#endif
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Default CRC implementation
|
|
|
|
- Crc CrcDefault(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
-#if HAVE_AMD64 || HAVE_I386
|
|
|
|
- return CrcMultiword(data, bytes, start);
|
|
|
|
-#else
|
|
|
|
- // Very few CPUs have multiple ALUs and speculative execution
|
|
|
|
- // (Itanium is an exception) so sophisticated algorithms will
|
|
|
|
- // not perform better than good old Sarwate algorithm.
|
|
|
|
- return CrcByteUnrolled(data, bytes, start);
|
|
|
|
-#endif // HAVE_AMD64 || HAVE_I386
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Returns base class.
|
|
|
|
- const GfUtil<Crc> &Base() const { return base_; }
|
|
|
|
-
|
|
|
|
- protected:
|
|
|
|
- // Canonical, byte-by-byte CRC computation.
|
|
|
|
- Crc CrcByte(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
- const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
- Crc crc = start ^ Base().Canonize();
|
|
|
|
- for (const uint8 *end = src + bytes; src < end; ++src) {
|
|
|
|
- CRC_BYTE(this, crc, *src);
|
|
|
|
- }
|
|
|
|
- return (crc ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Byte-by-byte CRC with main loop unrolled.
|
|
|
|
- Crc CrcByteUnrolled(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
- if (bytes == 0) {
|
|
|
|
- return start;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
- const uint8 *end = src + bytes;
|
|
|
|
- Crc crc = start ^ Base().Canonize();
|
|
|
|
-
|
|
|
|
- // Unroll loop 4 times.
|
|
|
|
- end -= 3;
|
|
|
|
- for (; src < end; src += 4) {
|
|
|
|
- PREFETCH(src);
|
|
|
|
- CRC_BYTE(this, crc, src[0]);
|
|
|
|
- CRC_BYTE(this, crc, src[1]);
|
|
|
|
- CRC_BYTE(this, crc, src[2]);
|
|
|
|
- CRC_BYTE(this, crc, src[3]);
|
|
|
|
- }
|
|
|
|
- end += 3;
|
|
|
|
-
|
|
|
|
- // Compute CRC of remaining bytes.
|
|
|
|
- for (; src < end; ++src) {
|
|
|
|
- CRC_BYTE(this, crc, *src);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return (crc ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Canonical, byte-by-byte CRC computation.
|
|
|
|
- Crc CrcByteWord(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
- const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
- const uint8 *end = src + bytes;
|
|
|
|
- Crc crc0 = start ^ Base().Canonize();
|
|
|
|
-
|
|
|
|
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Crc);
|
|
|
|
- if (src >= end) {
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Process 4*sizeof(Crc) bytes at a time.
|
|
|
|
- end -= 4 * sizeof(Crc) - 1;
|
|
|
|
- for (; src < end; src += 4 * sizeof(Crc)) {
|
|
|
|
- for (size_t i = 0; i < 4; ++i) {
|
|
|
|
- crc0 ^= reinterpret_cast<const Crc *>(src)[i];
|
|
|
|
- if (i == 0) {
|
|
|
|
- PREFETCH(src);
|
|
|
|
- }
|
|
|
|
- for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
|
|
|
|
- CRC_BYTE(this, crc0, 0);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- end += 4 * sizeof(Crc) - 1;
|
|
|
|
-
|
|
|
|
- // Process sizeof(Crc) bytes at a time.
|
|
|
|
- end -= sizeof(Crc) - 1;
|
|
|
|
- for (; src < end; src += sizeof(Crc)) {
|
|
|
|
- crc0 ^= reinterpret_cast<const Crc *>(src)[0];
|
|
|
|
- for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
|
|
|
|
- CRC_BYTE(this, crc0, 0);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- end += sizeof(Crc) - 1;
|
|
|
|
-
|
|
|
|
- // Compute CRC of remaining bytes.
|
|
|
|
- for (;src < end; ++src) {
|
|
|
|
- CRC_BYTE(this, crc0, *src);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Faster, word-by-word CRC.
|
|
|
|
- Crc CrcWord(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
- const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
- const uint8 *end = src + bytes;
|
|
|
|
- Crc crc0 = start ^ Base().Canonize();
|
|
|
|
-
|
|
|
|
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
|
|
|
|
- if (src >= end) {
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Process 4 sizeof(Word) bytes at once.
|
|
|
|
- end -= 4 * sizeof(Word) - 1;
|
|
|
|
- for (; src < end; src += 4 * sizeof(Word)) {
|
|
|
|
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
- PREFETCH(src);
|
|
|
|
- CRC_WORD(this, crc0, buf0);
|
|
|
|
- buf0 = reinterpret_cast<const Word *>(src)[1];
|
|
|
|
- CRC_WORD(this, crc0, buf0);
|
|
|
|
- buf0 = reinterpret_cast<const Word *>(src)[2];
|
|
|
|
- CRC_WORD(this, crc0, buf0);
|
|
|
|
- buf0 = reinterpret_cast<const Word *>(src)[3];
|
|
|
|
- CRC_WORD(this, crc0, buf0);
|
|
|
|
- }
|
|
|
|
- end += 4 * sizeof(Word) - 1;
|
|
|
|
-
|
|
|
|
- // Process sizeof(Word) bytes at a time.
|
|
|
|
- end -= sizeof(Word) - 1;
|
|
|
|
- for (; src < end; src += sizeof(Word)) {
|
|
|
|
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
- CRC_WORD(this, crc0, buf0);
|
|
|
|
- }
|
|
|
|
- end += sizeof(Word) - 1;
|
|
|
|
-
|
|
|
|
- // Compute CRC of remaining bytes.
|
|
|
|
- for (;src < end; ++src) {
|
|
|
|
- CRC_BYTE(this, crc0, *src);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-#define REPEAT_FROM_1(macro) \
|
|
|
|
- macro(1); \
|
|
|
|
- macro(2); \
|
|
|
|
- macro(3); \
|
|
|
|
- macro(4); \
|
|
|
|
- macro(5); \
|
|
|
|
- macro(6); \
|
|
|
|
- macro(7);
|
|
|
|
-
|
|
|
|
-#define REPEAT_FROM_0(macro) \
|
|
|
|
- macro(0); \
|
|
|
|
- REPEAT_FROM_1(macro)
|
|
|
|
-
|
|
|
|
- // Faster, process adjusent blocks in parallel and concatenate CRCs.
|
|
|
|
- Crc CrcBlockword(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
- if (kStride < 2 || kStride > 8) {
|
|
|
|
- // Unsupported configuration;
|
|
|
|
- // fall back to something sensible.
|
|
|
|
- return CrcWord(data, bytes, start);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
- const uint8 *end = src + bytes;
|
|
|
|
- Crc crc0 = start ^ Base().Canonize();
|
|
|
|
- enum {
|
|
|
|
- // Add 16 to avoid false L1 cache collisions.
|
|
|
|
- kStripe = (15*1024 + 16) & ~(sizeof(Word) - 1),
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
|
|
|
|
- if (src >= end) {
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- end -= kStride * kStripe - 1;
|
|
|
|
- if (src < end) {
|
|
|
|
- Crc x_pow_8kStripe = Base().Xpow8N(kStripe);
|
|
|
|
- do {
|
|
|
|
- const uint8 *stripe_end = src + kStripe;
|
|
|
|
-
|
|
|
|
-#define INIT_CRC(reg) \
|
|
|
|
- Crc crc##reg; \
|
|
|
|
- if (kStride >= reg) { \
|
|
|
|
- crc##reg = 0; \
|
|
|
|
- }
|
|
|
|
- REPEAT_FROM_1(INIT_CRC);
|
|
|
|
-#undef INIT_CRC
|
|
|
|
-
|
|
|
|
- do {
|
|
|
|
-#define FIRST(reg) \
|
|
|
|
- Word buf##reg; \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- buf##reg = reinterpret_cast<const Word *>(src + reg * kStripe)[0]; \
|
|
|
|
- buf##reg ^= Downcast<Crc, Word>(crc##reg); \
|
|
|
|
- if (sizeof(crc##reg) > sizeof(buf##reg)) { \
|
|
|
|
- crc##reg = SHIFT_RIGHT_SAFE(crc##reg, sizeof(buf##reg) * 8); \
|
|
|
|
- crc##reg ^= TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
|
|
|
|
- } else { \
|
|
|
|
- crc##reg = TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
|
|
|
|
- } \
|
|
|
|
- buf##reg >>= 8; \
|
|
|
|
- }
|
|
|
|
- REPEAT_FROM_0(FIRST);
|
|
|
|
-#undef FIRST
|
|
|
|
-
|
|
|
|
- for (size_t byte = 1; byte < sizeof(buf0) - 1; ++byte) {
|
|
|
|
-#define NEXT(reg) do { \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- crc##reg ^= TABLE_ENTRY(this->crc_word_, byte, buf##reg); \
|
|
|
|
- buf##reg >>= 8; \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
- REPEAT_FROM_0(NEXT);
|
|
|
|
-#undef NEXT
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-#define LAST(reg) do { \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_, buf##reg); \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
- REPEAT_FROM_0(LAST);
|
|
|
|
-#undef LAST
|
|
|
|
-
|
|
|
|
- src += sizeof(Word);
|
|
|
|
- } while (src < stripe_end);
|
|
|
|
-
|
|
|
|
-#if 0
|
|
|
|
-// The code is left for illustrational purposes only.
|
|
|
|
-#define COMBINE(reg) do { \
|
|
|
|
- if (reg > 0 && kStride > reg) { \
|
|
|
|
- crc0 = Base().ChangeStartValue(crc##reg, kStripe, 0, crc0); \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
-#else
|
|
|
|
-#define COMBINE(reg) do { \
|
|
|
|
- if (reg > 0 && kStride > reg) { \
|
|
|
|
- crc0 = crc##reg ^ Base().Multiply(crc0, x_pow_8kStripe); \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
-#endif
|
|
|
|
- REPEAT_FROM_0(COMBINE);
|
|
|
|
-#undef COMBINE
|
|
|
|
-
|
|
|
|
- src += (kStride - 1) * kStripe;
|
|
|
|
- }
|
|
|
|
- while (src < end);
|
|
|
|
- }
|
|
|
|
- end += kStride * kStripe - 1;
|
|
|
|
-
|
|
|
|
- // Process sizeof(Word) bytes at a time.
|
|
|
|
- end -= sizeof(Word) - 1;
|
|
|
|
- for (; src < end; src += sizeof(Word)) {
|
|
|
|
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
- CRC_WORD(this, crc0, buf0);
|
|
|
|
- }
|
|
|
|
- end += sizeof(Word) - 1;
|
|
|
|
-
|
|
|
|
- // Compute CRC of remaining bytes.
|
|
|
|
- for (;src < end; ++src) {
|
|
|
|
- CRC_BYTE(this, crc0, *src);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Fastest, interleaved multi-byte CRC.
|
|
|
|
- Crc CrcMultiword(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
- if (kStride < 2 || kStride > 8) {
|
|
|
|
- // Unsupported configuration;
|
|
|
|
- // fall back to something sensible.
|
|
|
|
- return CrcWord(data, bytes, start);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
- const uint8 *end = src + bytes;
|
|
|
|
- Crc crc0 = start ^ Base().Canonize();
|
|
|
|
-
|
|
|
|
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
|
|
|
|
- if (src >= end) {
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Process kStride Word registers at once;
|
|
|
|
- // should have have at least 2*kInterleaveBytes of data to start.
|
|
|
|
- end -= 2*kInterleaveBytes - 1;
|
|
|
|
- if (src < end) {
|
|
|
|
- Crc crc_carryover;
|
|
|
|
- if (sizeof(Crc) > sizeof(Word)) {
|
|
|
|
- // crc_carryover is used if and only if Crc is wider than Word.
|
|
|
|
- crc_carryover = 0;
|
|
|
|
- }
|
|
|
|
-#define INIT_CRC(reg) \
|
|
|
|
- Crc crc##reg; \
|
|
|
|
- if (reg > 0 && kStride > reg) { \
|
|
|
|
- crc##reg = 0; \
|
|
|
|
- }
|
|
|
|
- REPEAT_FROM_1(INIT_CRC);
|
|
|
|
-#undef INIT_CRC
|
|
|
|
-
|
|
|
|
-#define INIT_BUF(reg) \
|
|
|
|
- Word buf##reg; \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
|
|
|
|
- }
|
|
|
|
- REPEAT_FROM_0(INIT_BUF);
|
|
|
|
-#undef INIT_BUF
|
|
|
|
-
|
|
|
|
- do {
|
|
|
|
- PREFETCH(src);
|
|
|
|
- src += kInterleaveBytes;
|
|
|
|
-
|
|
|
|
- if (sizeof(Crc) > sizeof(Word)) {
|
|
|
|
- crc0 ^= crc_carryover;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-#define FIRST(reg, next_reg) do { \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- buf##reg ^= Downcast<Crc, Word>(crc##reg); \
|
|
|
|
- if (sizeof(Crc) > sizeof(Word)) { \
|
|
|
|
- if (reg < kStride - 1) { \
|
|
|
|
- crc##next_reg ^= SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
|
|
|
|
- } else { \
|
|
|
|
- crc_carryover = SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
|
|
|
|
- } \
|
|
|
|
- } \
|
|
|
|
- crc##reg = TABLE_ENTRY(this->crc_word_interleaved_, 0, buf##reg); \
|
|
|
|
- buf##reg >>= 8; \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
- FIRST(0, 1);
|
|
|
|
- FIRST(1, 2);
|
|
|
|
- FIRST(2, 3);
|
|
|
|
- FIRST(3, 4);
|
|
|
|
- FIRST(4, 5);
|
|
|
|
- FIRST(5, 6);
|
|
|
|
- FIRST(6, 7);
|
|
|
|
- FIRST(7, 0);
|
|
|
|
-#undef FIRST
|
|
|
|
-
|
|
|
|
- for (size_t byte = 1; byte < sizeof(Word) - 1; ++byte) {
|
|
|
|
-#define NEXT(reg) do { \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- crc##reg ^= \
|
|
|
|
- TABLE_ENTRY(this->crc_word_interleaved_, byte, buf##reg); \
|
|
|
|
- buf##reg >>= 8; \
|
|
|
|
- } \
|
|
|
|
-} while(0)
|
|
|
|
- REPEAT_FROM_0(NEXT);
|
|
|
|
-#undef NEXT
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-#define LAST(reg) do { \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_interleaved_, buf##reg); \
|
|
|
|
- buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
|
|
|
|
- } \
|
|
|
|
-} while(0)
|
|
|
|
- REPEAT_FROM_0(LAST);
|
|
|
|
-#undef LAST
|
|
|
|
- }
|
|
|
|
- while (src < end);
|
|
|
|
-
|
|
|
|
- if (sizeof(Crc) > sizeof(Word)) {
|
|
|
|
- crc0 ^= crc_carryover;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
-#define COMBINE(reg) do { \
|
|
|
|
- if (kStride > reg) { \
|
|
|
|
- if (reg != 0) { \
|
|
|
|
- crc0 ^= crc##reg; \
|
|
|
|
- } \
|
|
|
|
- CRC_WORD(this, crc0, buf##reg); \
|
|
|
|
- } \
|
|
|
|
-} while (0)
|
|
|
|
- REPEAT_FROM_0(COMBINE);
|
|
|
|
-#undef COMBINE
|
|
|
|
-
|
|
|
|
- src += kInterleaveBytes;
|
|
|
|
- }
|
|
|
|
- end += 2*kInterleaveBytes - 1;
|
|
|
|
-
|
|
|
|
- // Process sizeof(Word) bytes at once.
|
|
|
|
- end -= sizeof(Word) - 1;
|
|
|
|
- for (; src < end; src += sizeof(Word)) {
|
|
|
|
- Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
- CRC_WORD(this, crc0, buf0);
|
|
|
|
- }
|
|
|
|
- end += sizeof(Word) - 1;
|
|
|
|
-
|
|
|
|
- // Compute CRC of remaining bytes.
|
|
|
|
- for (;src < end; ++src) {
|
|
|
|
- CRC_BYTE(this, crc0, *src);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return (crc0 ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- protected:
|
|
|
|
- enum {
|
|
|
|
- kInterleaveBytes = sizeof(Word) * kStride,
|
|
|
|
- };
|
|
|
|
-
|
|
|
|
- // Multiplication tables used by CRCs.
|
|
|
|
- TableEntry crc_word_interleaved_[sizeof(Word)][256];
|
|
|
|
- TableEntry crc_word_[sizeof(Word)][256];
|
|
|
|
-
|
|
|
|
- // Base class stored after CRC tables so that the most frequently
|
|
|
|
- // used table is at offset 0 and may be accessed faster.
|
|
|
|
- GfUtil<Crc> base_;
|
|
|
|
-
|
|
|
|
- friend class RollingCrc< GenericCrc<Crc, TableEntry, Word, kStride> >;
|
|
|
|
-
|
|
|
|
- private:
|
|
|
|
- // CrcMultiword on amd64 may run at 1.2 CPU cycles per byte which is
|
|
|
|
- // noticeably faster than CrcWord (2.2-2.6 cycles/byte depending on
|
|
|
|
- // hardware and compiler). However, there are problems with compilers.
|
|
|
|
- //
|
|
|
|
- // Test system: P45 chipset, Intel Q9650 CPU, 800MHz 4-4-4-12 memory.
|
|
|
|
- //
|
|
|
|
- // 64-bit compiler, <= 64-bit CRC, 64-bit tables, 64-bit reads:
|
|
|
|
- // CL 15.00.307291.1 C++ >1.2< CPU cycles/byte
|
|
|
|
- // ICL 11.1.051 -O3 C++ 1.5 CPU cycles/byte
|
|
|
|
- // GCC 4.5 -O3 C++ 2.0 CPU cycles/byte
|
|
|
|
- // GCC 4.x -O3 ASM >1.2< CPU cycles/byte
|
|
|
|
- //
|
|
|
|
- // 32-bit compiler, MMX used, <= 64-bit CRC, 64-bit tables, 64-bit reads
|
|
|
|
- // CL 15.00.307291.1 C++ 2.0 CPU cycles/byte
|
|
|
|
- // GCC 4.5 -O3 C++ 1.9 CPU cycles/byte
|
|
|
|
- // ICL 11.1.051 -S C++ 1.6 CPU cycles/byte
|
|
|
|
- // GCC 4.x -O3 ASM >1.3< CPU cycles/byte
|
|
|
|
- //
|
|
|
|
- // So, use inline ASM code for GCC for both i386 and amd64.
|
|
|
|
-
|
|
|
|
- Crc CrcMultiwordI386Mmx(
|
|
|
|
- const void *data, size_t bytes, const Crc &start) const;
|
|
|
|
- Crc CrcMultiwordGccAmd64(
|
|
|
|
- const void *data, size_t bytes, const Crc &start) const;
|
|
|
|
- Crc CrcMultiwordGccAmd64Sse2(
|
|
|
|
- const uint8 *src, const uint8 *end, const Crc &start) const;
|
|
|
|
-} GCC_ALIGN_ATTRIBUTE(16);
|
|
|
|
-
|
|
|
|
-#undef REPEAT_FROM_0
|
|
|
|
-#undef REPEAT_FROM_1
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-// Specialized variants.
|
|
|
|
-#if CRCUTIL_USE_ASM
|
|
|
|
-
|
|
|
|
-#if (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
|
|
|
|
-
|
|
|
|
-// Declare specialized functions.
|
|
|
|
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
|
|
|
|
- const void *data, size_t bytes, const uint64 &start) const;
|
|
|
|
-
|
|
|
|
-#if HAVE_AMD64 && HAVE_SSE2
|
|
|
|
-template<>
|
|
|
|
-uint128_sse2
|
|
|
|
-GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword(
|
|
|
|
- const void *data, size_t bytes, const uint128_sse2 &start) const;
|
|
|
|
-#endif // HAVE_AMD64 && HAVE_SSE2
|
|
|
|
-
|
|
|
|
-#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER <= 150030729 && \
|
|
|
|
- (HAVE_I386 && HAVE_MMX)
|
|
|
|
-
|
|
|
|
-// Work around bug in MSC (present at least in v. 15.00.30729.1)
|
|
|
|
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
|
|
|
|
- const void *data,
|
|
|
|
- size_t bytes,
|
|
|
|
- const uint64 &start) const;
|
|
|
|
-template<> __forceinline
|
|
|
|
-uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
|
|
|
|
- const void *data,
|
|
|
|
- size_t bytes,
|
|
|
|
- const uint64 &start) const {
|
|
|
|
- typedef uint64 Word;
|
|
|
|
- typedef uint64 Crc;
|
|
|
|
- if (bytes <= 12) {
|
|
|
|
- const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
- uint64 crc = start ^ Base().Canonize();
|
|
|
|
- for (const uint8 *end = src + bytes; src < end; ++src) {
|
|
|
|
- CRC_BYTE(this, crc, *src);
|
|
|
|
- }
|
|
|
|
- return (crc ^ Base().Canonize());
|
|
|
|
- }
|
|
|
|
- return CrcMultiwordI386Mmx(data, bytes, start);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-#endif // (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
|
|
|
|
-
|
|
|
|
-#endif // CRCUTIL_USE_ASM
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-#pragma pack(pop)
|
|
|
|
-
|
|
|
|
-} // namespace crcutil
|
|
|
|
-
|
|
|
|
-#endif // CRCUTIL_GENERIC_CRC_H_
|
|
|
|
|
|
+// Copyright 2010 Google Inc. All rights reserved.
|
|
|
|
+//
|
|
|
|
+// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
+// you may not use this file except in compliance with the License.
|
|
|
|
+// You may obtain a copy of the License at
|
|
|
|
+//
|
|
|
|
+// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
+//
|
|
|
|
+// Unless required by applicable law or agreed to in writing, software
|
|
|
|
+// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
+// See the License for the specific language governing permissions and
|
|
|
|
+// limitations under the License.
|
|
|
|
+
|
|
|
|
+// Defines GenericCrc class which implements arbitrary CRCs.
|
|
|
|
+//
|
|
|
|
+// Please read crc.pdf to understand how it all works.
|
|
|
|
+
|
|
|
|
+#ifndef CRCUTIL_GENERIC_CRC_H_
|
|
|
|
+#define CRCUTIL_GENERIC_CRC_H_
|
|
|
|
+
|
|
|
|
+#include "base_types.h" // uint8
|
|
|
|
+#include "crc_casts.h" // TO_BYTE(), Downcast<>.
|
|
|
|
+#include "gf_util.h" // GfUtil<Crc> class.
|
|
|
|
+#include "platform.h" // GCC_ALIGN_ATTRIBUTE(16)
|
|
|
|
+#include "uint128_sse2.h" // uint128_sse2 type (if necessary)
|
|
|
|
+
|
|
|
|
+namespace crcutil {
|
|
|
|
+
|
|
|
|
+#pragma pack(push, 16)
|
|
|
|
+
|
|
|
|
+// Extends CRC by one byte.
|
|
|
|
+// Technically, if degree of a polynomial does not exceed 8,
|
|
|
|
+// right shift by 8 bits is not required, but who cares about CRC-8?
|
|
|
|
+#define CRC_BYTE(table, crc, byte) do { \
|
|
|
|
+ crc = ((sizeof(crc) > 1) ? SHIFT_RIGHT_SAFE(crc, 8) : 0) ^ \
|
|
|
|
+ table->crc_word_[sizeof(Word) - 1][TO_BYTE(crc) ^ (byte)]; \
|
|
|
|
+} while (0)
|
|
|
|
+
|
|
|
|
+#define TABLE_ENTRY(table, byte, buf) \
|
|
|
|
+ table[byte][Downcast<Word, uint8>(buf)]
|
|
|
|
+
|
|
|
|
+#define TABLE_ENTRY_LAST(table, buf) \
|
|
|
|
+ table[sizeof(Word) - 1][buf]
|
|
|
|
+
|
|
|
|
+// Extends CRC by one word.
|
|
|
|
+#define CRC_WORD(table, crc, buf) do { \
|
|
|
|
+ buf ^= Downcast<Crc, Word>(crc); \
|
|
|
|
+ if (sizeof(crc) > sizeof(buf)) { \
|
|
|
|
+ crc = SHIFT_RIGHT_SAFE(crc, sizeof(buf) * 8); \
|
|
|
|
+ crc ^= TABLE_ENTRY(table->crc_word_, 0, buf); \
|
|
|
|
+ } else { \
|
|
|
|
+ crc = TABLE_ENTRY(table->crc_word_, 0, buf); \
|
|
|
|
+ } \
|
|
|
|
+ buf >>= 8; \
|
|
|
|
+ for (size_t byte = 1; byte < sizeof(buf) - 1; ++byte) { \
|
|
|
|
+ crc ^= TABLE_ENTRY(table->crc_word_, byte, buf); \
|
|
|
|
+ buf >>= 8; \
|
|
|
|
+ } \
|
|
|
|
+ crc ^= TABLE_ENTRY_LAST(table->crc_word_, buf); \
|
|
|
|
+} while (0)
|
|
|
|
+
|
|
|
|
+// Process beginning of data block byte by byte until source pointer
|
|
|
|
+// becomes perfectly aligned on Word boundary.
|
|
|
|
+#define ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word) do { \
|
|
|
|
+ while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) { \
|
|
|
|
+ if (src >= end) { \
|
|
|
|
+ return (crc ^ table->Base().Canonize()); \
|
|
|
|
+ } \
|
|
|
|
+ CRC_BYTE(table, crc, *src); \
|
|
|
|
+ src += 1; \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+// On amd64, enforcing alignment is 2-4% slower on small (<= 64 bytes) blocks
|
|
|
|
+// but 6-10% faster on larger blocks (>= 2KB).
|
|
|
|
+// Break-even point (+-1%) is around 1KB (Q9650, E6600).
|
|
|
|
+//
|
|
|
|
+#define ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, table, src, end, crc, Word) \
|
|
|
|
+do { \
|
|
|
|
+ if (sizeof(Word) > 8 || (bytes) > CRCUTIL_MIN_ALIGN_SIZE) { \
|
|
|
|
+ ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word); \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+
|
|
|
|
+#if defined(_MSC_VER)
|
|
|
|
+#pragma warning(push)
|
|
|
|
+#pragma warning(disable: 4127) // conditional expression is constant
|
|
|
|
+#endif // defined(_MSC_VER)
|
|
|
|
+
|
|
|
|
+// Forward declarations.
|
|
|
|
+template<typename CrcImplementation> class RollingCrc;
|
|
|
|
+
|
|
|
|
+// Crc is the type used internally and to return values of N-bit CRC.
|
|
|
|
+// It should be at least as large as "TableEntry" and "Word" but
|
|
|
|
+// may be larger (e.g. for 16-bit CRC, TableEntry and Word may be
|
|
|
|
+// set to uint16 but Crc may be set to uint32).
|
|
|
|
+//
|
|
|
|
+// TableEntry is the type of values stored in the tables.
|
|
|
|
+// To implement N-bit CRC, TableEntry should be large enough
|
|
|
|
+// to store N bits.
|
|
|
|
+//
|
|
|
|
+// Word is the type used to read data sizeof(Word) at a time.
|
|
|
|
+// Ideally, it shoulde be "most suitable for given architecture"
|
|
|
|
+// integer type -- typically "size_t".
|
|
|
|
+//
|
|
|
|
+// kStride is the number of words processed in interleaved manner by
|
|
|
|
+// CrcMultiword() and CrcWordblock(). Shall be either 3 or 4.
|
|
|
|
+// Optimal value depends on hardware architecture (AMD64, ARM, etc).
|
|
|
|
+//
|
|
|
|
+template<typename _Crc, typename _TableEntry, typename _Word, int kStride>
|
|
|
|
+ class GenericCrc {
|
|
|
|
+ public:
|
|
|
|
+ // Make Crc, TableEntry, and Word types visible (used by RollingCrc etc.)
|
|
|
|
+ typedef _Crc Crc;
|
|
|
|
+ typedef _TableEntry TableEntry;
|
|
|
|
+ typedef _Word Word;
|
|
|
|
+
|
|
|
|
+ GenericCrc() {}
|
|
|
|
+
|
|
|
|
+ // Initializes the tables given generating polynomial of degree.
|
|
|
|
+ // If "canonical" is true, crc value will be XOR'ed with (-1) before and
|
|
|
|
+ // after actual CRC computation.
|
|
|
|
+ GenericCrc(const Crc &generating_polynomial, size_t degree, bool canonical) {
|
|
|
|
+ Init(generating_polynomial, degree, canonical);
|
|
|
|
+ }
|
|
|
|
+ void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
|
|
|
|
+ base_.Init(generating_polynomial, degree, canonical);
|
|
|
|
+
|
|
|
|
+ // Instead of computing
|
|
|
|
+ // table[j][i] = MultiplyUnnormalized(i, 8, k),
|
|
|
|
+ // for all i = 0...255, we may notice that
|
|
|
|
+ // if i = 2**n then for all m = 1...(i-1)
|
|
|
|
+ // MultiplyUnnormalized(i + m, 8, k) =
|
|
|
|
+ // MultiplyUnnormalized(i ^ m, 8, k) =
|
|
|
|
+ // MultiplyUnnormalized(i, 8, k) ^ MultiplyUnnormalized(m, 8, k) =
|
|
|
|
+ // MultiplyUnnormalized(i, 8, k) ^ crc_word_interleaved[j][m] =
|
|
|
|
+ // table[i] ^ table[m].
|
|
|
|
+#if 0
|
|
|
|
+ for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
+ Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
|
|
|
|
+ for (size_t i = 0; i < 256; ++i) {
|
|
|
|
+ Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
|
|
|
|
+ this->crc_word_interleaved_[j][i] = Downcast<Crc, TableEntry>(temp);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+#else
|
|
|
|
+ for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
+ Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
|
|
|
|
+ TableEntry *table = this->crc_word_interleaved_[j];
|
|
|
|
+ table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
|
|
|
|
+ for (size_t i = 1; i < 256; i <<= 1) {
|
|
|
|
+ TableEntry value = Downcast<Crc, TableEntry>(
|
|
|
|
+ Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
|
|
|
|
+ table[i] = value;
|
|
|
|
+ for (size_t m = 1; m < i; ++m) {
|
|
|
|
+ table[i + m] = value ^ table[m];
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
+
|
|
|
|
+#if 0
|
|
|
|
+ for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
+ Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
|
|
|
|
+ for (size_t i = 0; i < 256; ++i) {
|
|
|
|
+ Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
|
|
|
|
+ this->crc_word_[j][i] = Downcast<Crc, TableEntry>(temp);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+#else
|
|
|
|
+ for (size_t j = 0; j < sizeof(Word); ++j) {
|
|
|
|
+ Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
|
|
|
|
+ TableEntry *table = this->crc_word_[j];
|
|
|
|
+ table[0] = 0; // Init 0s entry -- multiply 0 by anything yields 0.
|
|
|
|
+ for (size_t i = 1; i < 256; i <<= 1) {
|
|
|
|
+ TableEntry value = Downcast<Crc, TableEntry>(
|
|
|
|
+ Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
|
|
|
|
+ table[i] = value;
|
|
|
|
+ for (size_t m = 1; m < i; ++m) {
|
|
|
|
+ table[i + m] = value ^ table[m];
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Default CRC implementation
|
|
|
|
+ Crc CrcDefault(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
+#if HAVE_AMD64 || HAVE_I386
|
|
|
|
+ return CrcMultiword(data, bytes, start);
|
|
|
|
+#else
|
|
|
|
+ // Very few CPUs have multiple ALUs and speculative execution
|
|
|
|
+ // (Itanium is an exception) so sophisticated algorithms will
|
|
|
|
+ // not perform better than good old Sarwate algorithm.
|
|
|
|
+ return CrcByteUnrolled(data, bytes, start);
|
|
|
|
+#endif // HAVE_AMD64 || HAVE_I386
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Returns base class.
|
|
|
|
+ const GfUtil<Crc> &Base() const { return base_; }
|
|
|
|
+
|
|
|
|
+ protected:
|
|
|
|
+ // Canonical, byte-by-byte CRC computation.
|
|
|
|
+ Crc CrcByte(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
+ const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
+ Crc crc = start ^ Base().Canonize();
|
|
|
|
+ for (const uint8 *end = src + bytes; src < end; ++src) {
|
|
|
|
+ CRC_BYTE(this, crc, *src);
|
|
|
|
+ }
|
|
|
|
+ return (crc ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Byte-by-byte CRC with main loop unrolled.
|
|
|
|
+ Crc CrcByteUnrolled(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
+ if (bytes == 0) {
|
|
|
|
+ return start;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
+ const uint8 *end = src + bytes;
|
|
|
|
+ Crc crc = start ^ Base().Canonize();
|
|
|
|
+
|
|
|
|
+ // Unroll loop 4 times.
|
|
|
|
+ end -= 3;
|
|
|
|
+ for (; src < end; src += 4) {
|
|
|
|
+ PREFETCH(src);
|
|
|
|
+ CRC_BYTE(this, crc, src[0]);
|
|
|
|
+ CRC_BYTE(this, crc, src[1]);
|
|
|
|
+ CRC_BYTE(this, crc, src[2]);
|
|
|
|
+ CRC_BYTE(this, crc, src[3]);
|
|
|
|
+ }
|
|
|
|
+ end += 3;
|
|
|
|
+
|
|
|
|
+ // Compute CRC of remaining bytes.
|
|
|
|
+ for (; src < end; ++src) {
|
|
|
|
+ CRC_BYTE(this, crc, *src);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return (crc ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Canonical, byte-by-byte CRC computation.
|
|
|
|
+ Crc CrcByteWord(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
+ const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
+ const uint8 *end = src + bytes;
|
|
|
|
+ Crc crc0 = start ^ Base().Canonize();
|
|
|
|
+
|
|
|
|
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Crc);
|
|
|
|
+ if (src >= end) {
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Process 4*sizeof(Crc) bytes at a time.
|
|
|
|
+ end -= 4 * sizeof(Crc) - 1;
|
|
|
|
+ for (; src < end; src += 4 * sizeof(Crc)) {
|
|
|
|
+ for (size_t i = 0; i < 4; ++i) {
|
|
|
|
+ crc0 ^= reinterpret_cast<const Crc *>(src)[i];
|
|
|
|
+ if (i == 0) {
|
|
|
|
+ PREFETCH(src);
|
|
|
|
+ }
|
|
|
|
+ for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
|
|
|
|
+ CRC_BYTE(this, crc0, 0);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ end += 4 * sizeof(Crc) - 1;
|
|
|
|
+
|
|
|
|
+ // Process sizeof(Crc) bytes at a time.
|
|
|
|
+ end -= sizeof(Crc) - 1;
|
|
|
|
+ for (; src < end; src += sizeof(Crc)) {
|
|
|
|
+ crc0 ^= reinterpret_cast<const Crc *>(src)[0];
|
|
|
|
+ for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
|
|
|
|
+ CRC_BYTE(this, crc0, 0);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ end += sizeof(Crc) - 1;
|
|
|
|
+
|
|
|
|
+ // Compute CRC of remaining bytes.
|
|
|
|
+ for (;src < end; ++src) {
|
|
|
|
+ CRC_BYTE(this, crc0, *src);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Faster, word-by-word CRC.
|
|
|
|
+ Crc CrcWord(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
+ const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
+ const uint8 *end = src + bytes;
|
|
|
|
+ Crc crc0 = start ^ Base().Canonize();
|
|
|
|
+
|
|
|
|
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
|
|
|
|
+ if (src >= end) {
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Process 4 sizeof(Word) bytes at once.
|
|
|
|
+ end -= 4 * sizeof(Word) - 1;
|
|
|
|
+ for (; src < end; src += 4 * sizeof(Word)) {
|
|
|
|
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
+ PREFETCH(src);
|
|
|
|
+ CRC_WORD(this, crc0, buf0);
|
|
|
|
+ buf0 = reinterpret_cast<const Word *>(src)[1];
|
|
|
|
+ CRC_WORD(this, crc0, buf0);
|
|
|
|
+ buf0 = reinterpret_cast<const Word *>(src)[2];
|
|
|
|
+ CRC_WORD(this, crc0, buf0);
|
|
|
|
+ buf0 = reinterpret_cast<const Word *>(src)[3];
|
|
|
|
+ CRC_WORD(this, crc0, buf0);
|
|
|
|
+ }
|
|
|
|
+ end += 4 * sizeof(Word) - 1;
|
|
|
|
+
|
|
|
|
+ // Process sizeof(Word) bytes at a time.
|
|
|
|
+ end -= sizeof(Word) - 1;
|
|
|
|
+ for (; src < end; src += sizeof(Word)) {
|
|
|
|
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
+ CRC_WORD(this, crc0, buf0);
|
|
|
|
+ }
|
|
|
|
+ end += sizeof(Word) - 1;
|
|
|
|
+
|
|
|
|
+ // Compute CRC of remaining bytes.
|
|
|
|
+ for (;src < end; ++src) {
|
|
|
|
+ CRC_BYTE(this, crc0, *src);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+#define REPEAT_FROM_1(macro) \
|
|
|
|
+ macro(1); \
|
|
|
|
+ macro(2); \
|
|
|
|
+ macro(3); \
|
|
|
|
+ macro(4); \
|
|
|
|
+ macro(5); \
|
|
|
|
+ macro(6); \
|
|
|
|
+ macro(7);
|
|
|
|
+
|
|
|
|
+#define REPEAT_FROM_0(macro) \
|
|
|
|
+ macro(0); \
|
|
|
|
+ REPEAT_FROM_1(macro)
|
|
|
|
+
|
|
|
|
+ // Faster, process adjusent blocks in parallel and concatenate CRCs.
|
|
|
|
+ Crc CrcBlockword(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
+ if (kStride < 2 || kStride > 8) {
|
|
|
|
+ // Unsupported configuration;
|
|
|
|
+ // fall back to something sensible.
|
|
|
|
+ return CrcWord(data, bytes, start);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
+ const uint8 *end = src + bytes;
|
|
|
|
+ Crc crc0 = start ^ Base().Canonize();
|
|
|
|
+ enum {
|
|
|
|
+ // Add 16 to avoid false L1 cache collisions.
|
|
|
|
+ kStripe = (15*1024 + 16) & ~(sizeof(Word) - 1),
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
|
|
|
|
+ if (src >= end) {
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ end -= kStride * kStripe - 1;
|
|
|
|
+ if (src < end) {
|
|
|
|
+ Crc x_pow_8kStripe = Base().Xpow8N(kStripe);
|
|
|
|
+ do {
|
|
|
|
+ const uint8 *stripe_end = src + kStripe;
|
|
|
|
+
|
|
|
|
+#define INIT_CRC(reg) \
|
|
|
|
+ Crc crc##reg; \
|
|
|
|
+ if (kStride >= reg) { \
|
|
|
|
+ crc##reg = 0; \
|
|
|
|
+ }
|
|
|
|
+ REPEAT_FROM_1(INIT_CRC);
|
|
|
|
+#undef INIT_CRC
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+#define FIRST(reg) \
|
|
|
|
+ Word buf##reg; \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ buf##reg = reinterpret_cast<const Word *>(src + reg * kStripe)[0]; \
|
|
|
|
+ buf##reg ^= Downcast<Crc, Word>(crc##reg); \
|
|
|
|
+ if (sizeof(crc##reg) > sizeof(buf##reg)) { \
|
|
|
|
+ crc##reg = SHIFT_RIGHT_SAFE(crc##reg, sizeof(buf##reg) * 8); \
|
|
|
|
+ crc##reg ^= TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
|
|
|
|
+ } else { \
|
|
|
|
+ crc##reg = TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
|
|
|
|
+ } \
|
|
|
|
+ buf##reg >>= 8; \
|
|
|
|
+ }
|
|
|
|
+ REPEAT_FROM_0(FIRST);
|
|
|
|
+#undef FIRST
|
|
|
|
+
|
|
|
|
+ for (size_t byte = 1; byte < sizeof(buf0) - 1; ++byte) {
|
|
|
|
+#define NEXT(reg) do { \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ crc##reg ^= TABLE_ENTRY(this->crc_word_, byte, buf##reg); \
|
|
|
|
+ buf##reg >>= 8; \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+ REPEAT_FROM_0(NEXT);
|
|
|
|
+#undef NEXT
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+#define LAST(reg) do { \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_, buf##reg); \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+ REPEAT_FROM_0(LAST);
|
|
|
|
+#undef LAST
|
|
|
|
+
|
|
|
|
+ src += sizeof(Word);
|
|
|
|
+ } while (src < stripe_end);
|
|
|
|
+
|
|
|
|
+#if 0
|
|
|
|
+// The code is left for illustrational purposes only.
|
|
|
|
+#define COMBINE(reg) do { \
|
|
|
|
+ if (reg > 0 && kStride > reg) { \
|
|
|
|
+ crc0 = Base().ChangeStartValue(crc##reg, kStripe, 0, crc0); \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+#else
|
|
|
|
+#define COMBINE(reg) do { \
|
|
|
|
+ if (reg > 0 && kStride > reg) { \
|
|
|
|
+ crc0 = crc##reg ^ Base().Multiply(crc0, x_pow_8kStripe); \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+#endif
|
|
|
|
+ REPEAT_FROM_0(COMBINE);
|
|
|
|
+#undef COMBINE
|
|
|
|
+
|
|
|
|
+ src += (kStride - 1) * kStripe;
|
|
|
|
+ }
|
|
|
|
+ while (src < end);
|
|
|
|
+ }
|
|
|
|
+ end += kStride * kStripe - 1;
|
|
|
|
+
|
|
|
|
+ // Process sizeof(Word) bytes at a time.
|
|
|
|
+ end -= sizeof(Word) - 1;
|
|
|
|
+ for (; src < end; src += sizeof(Word)) {
|
|
|
|
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
+ CRC_WORD(this, crc0, buf0);
|
|
|
|
+ }
|
|
|
|
+ end += sizeof(Word) - 1;
|
|
|
|
+
|
|
|
|
+ // Compute CRC of remaining bytes.
|
|
|
|
+ for (;src < end; ++src) {
|
|
|
|
+ CRC_BYTE(this, crc0, *src);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Fastest, interleaved multi-byte CRC.
|
|
|
|
+ Crc CrcMultiword(const void *data, size_t bytes, const Crc &start) const {
|
|
|
|
+ if (kStride < 2 || kStride > 8) {
|
|
|
|
+ // Unsupported configuration;
|
|
|
|
+ // fall back to something sensible.
|
|
|
|
+ return CrcWord(data, bytes, start);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
+ const uint8 *end = src + bytes;
|
|
|
|
+ Crc crc0 = start ^ Base().Canonize();
|
|
|
|
+
|
|
|
|
+ ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
|
|
|
|
+ if (src >= end) {
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Process kStride Word registers at once;
|
|
|
|
+ // should have have at least 2*kInterleaveBytes of data to start.
|
|
|
|
+ end -= 2*kInterleaveBytes - 1;
|
|
|
|
+ if (src < end) {
|
|
|
|
+ Crc crc_carryover;
|
|
|
|
+ if (sizeof(Crc) > sizeof(Word)) {
|
|
|
|
+ // crc_carryover is used if and only if Crc is wider than Word.
|
|
|
|
+ crc_carryover = 0;
|
|
|
|
+ }
|
|
|
|
+#define INIT_CRC(reg) \
|
|
|
|
+ Crc crc##reg; \
|
|
|
|
+ if (reg > 0 && kStride > reg) { \
|
|
|
|
+ crc##reg = 0; \
|
|
|
|
+ }
|
|
|
|
+ REPEAT_FROM_1(INIT_CRC);
|
|
|
|
+#undef INIT_CRC
|
|
|
|
+
|
|
|
|
+#define INIT_BUF(reg) \
|
|
|
|
+ Word buf##reg; \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
|
|
|
|
+ }
|
|
|
|
+ REPEAT_FROM_0(INIT_BUF);
|
|
|
|
+#undef INIT_BUF
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ PREFETCH(src);
|
|
|
|
+ src += kInterleaveBytes;
|
|
|
|
+
|
|
|
|
+ if (sizeof(Crc) > sizeof(Word)) {
|
|
|
|
+ crc0 ^= crc_carryover;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+#define FIRST(reg, next_reg) do { \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ buf##reg ^= Downcast<Crc, Word>(crc##reg); \
|
|
|
|
+ if (sizeof(Crc) > sizeof(Word)) { \
|
|
|
|
+ if (reg < kStride - 1) { \
|
|
|
|
+ crc##next_reg ^= SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
|
|
|
|
+ } else { \
|
|
|
|
+ crc_carryover = SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
|
|
|
|
+ } \
|
|
|
|
+ } \
|
|
|
|
+ crc##reg = TABLE_ENTRY(this->crc_word_interleaved_, 0, buf##reg); \
|
|
|
|
+ buf##reg >>= 8; \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+ FIRST(0, 1);
|
|
|
|
+ FIRST(1, 2);
|
|
|
|
+ FIRST(2, 3);
|
|
|
|
+ FIRST(3, 4);
|
|
|
|
+ FIRST(4, 5);
|
|
|
|
+ FIRST(5, 6);
|
|
|
|
+ FIRST(6, 7);
|
|
|
|
+ FIRST(7, 0);
|
|
|
|
+#undef FIRST
|
|
|
|
+
|
|
|
|
+ for (size_t byte = 1; byte < sizeof(Word) - 1; ++byte) {
|
|
|
|
+#define NEXT(reg) do { \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ crc##reg ^= \
|
|
|
|
+ TABLE_ENTRY(this->crc_word_interleaved_, byte, buf##reg); \
|
|
|
|
+ buf##reg >>= 8; \
|
|
|
|
+ } \
|
|
|
|
+} while(0)
|
|
|
|
+ REPEAT_FROM_0(NEXT);
|
|
|
|
+#undef NEXT
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+#define LAST(reg) do { \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_interleaved_, buf##reg); \
|
|
|
|
+ buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
|
|
|
|
+ } \
|
|
|
|
+} while(0)
|
|
|
|
+ REPEAT_FROM_0(LAST);
|
|
|
|
+#undef LAST
|
|
|
|
+ }
|
|
|
|
+ while (src < end);
|
|
|
|
+
|
|
|
|
+ if (sizeof(Crc) > sizeof(Word)) {
|
|
|
|
+ crc0 ^= crc_carryover;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+#define COMBINE(reg) do { \
|
|
|
|
+ if (kStride > reg) { \
|
|
|
|
+ if (reg != 0) { \
|
|
|
|
+ crc0 ^= crc##reg; \
|
|
|
|
+ } \
|
|
|
|
+ CRC_WORD(this, crc0, buf##reg); \
|
|
|
|
+ } \
|
|
|
|
+} while (0)
|
|
|
|
+ REPEAT_FROM_0(COMBINE);
|
|
|
|
+#undef COMBINE
|
|
|
|
+
|
|
|
|
+ src += kInterleaveBytes;
|
|
|
|
+ }
|
|
|
|
+ end += 2*kInterleaveBytes - 1;
|
|
|
|
+
|
|
|
|
+ // Process sizeof(Word) bytes at once.
|
|
|
|
+ end -= sizeof(Word) - 1;
|
|
|
|
+ for (; src < end; src += sizeof(Word)) {
|
|
|
|
+ Word buf0 = reinterpret_cast<const Word *>(src)[0];
|
|
|
|
+ CRC_WORD(this, crc0, buf0);
|
|
|
|
+ }
|
|
|
|
+ end += sizeof(Word) - 1;
|
|
|
|
+
|
|
|
|
+ // Compute CRC of remaining bytes.
|
|
|
|
+ for (;src < end; ++src) {
|
|
|
|
+ CRC_BYTE(this, crc0, *src);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return (crc0 ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ protected:
|
|
|
|
+ enum {
|
|
|
|
+ kInterleaveBytes = sizeof(Word) * kStride,
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ // Multiplication tables used by CRCs.
|
|
|
|
+ TableEntry crc_word_interleaved_[sizeof(Word)][256];
|
|
|
|
+ TableEntry crc_word_[sizeof(Word)][256];
|
|
|
|
+
|
|
|
|
+ // Base class stored after CRC tables so that the most frequently
|
|
|
|
+ // used table is at offset 0 and may be accessed faster.
|
|
|
|
+ GfUtil<Crc> base_;
|
|
|
|
+
|
|
|
|
+ friend class RollingCrc< GenericCrc<Crc, TableEntry, Word, kStride> >;
|
|
|
|
+
|
|
|
|
+ private:
|
|
|
|
+ // CrcMultiword on amd64 may run at 1.2 CPU cycles per byte which is
|
|
|
|
+ // noticeably faster than CrcWord (2.2-2.6 cycles/byte depending on
|
|
|
|
+ // hardware and compiler). However, there are problems with compilers.
|
|
|
|
+ //
|
|
|
|
+ // Test system: P45 chipset, Intel Q9650 CPU, 800MHz 4-4-4-12 memory.
|
|
|
|
+ //
|
|
|
|
+ // 64-bit compiler, <= 64-bit CRC, 64-bit tables, 64-bit reads:
|
|
|
|
+ // CL 15.00.307291.1 C++ >1.2< CPU cycles/byte
|
|
|
|
+ // ICL 11.1.051 -O3 C++ 1.5 CPU cycles/byte
|
|
|
|
+ // GCC 4.5 -O3 C++ 2.0 CPU cycles/byte
|
|
|
|
+ // GCC 4.x -O3 ASM >1.2< CPU cycles/byte
|
|
|
|
+ //
|
|
|
|
+ // 32-bit compiler, MMX used, <= 64-bit CRC, 64-bit tables, 64-bit reads
|
|
|
|
+ // CL 15.00.307291.1 C++ 2.0 CPU cycles/byte
|
|
|
|
+ // GCC 4.5 -O3 C++ 1.9 CPU cycles/byte
|
|
|
|
+ // ICL 11.1.051 -S C++ 1.6 CPU cycles/byte
|
|
|
|
+ // GCC 4.x -O3 ASM >1.3< CPU cycles/byte
|
|
|
|
+ //
|
|
|
|
+ // So, use inline ASM code for GCC for both i386 and amd64.
|
|
|
|
+
|
|
|
|
+ Crc CrcMultiwordI386Mmx(
|
|
|
|
+ const void *data, size_t bytes, const Crc &start) const;
|
|
|
|
+ Crc CrcMultiwordGccAmd64(
|
|
|
|
+ const void *data, size_t bytes, const Crc &start) const;
|
|
|
|
+ Crc CrcMultiwordGccAmd64Sse2(
|
|
|
|
+ const uint8 *src, const uint8 *end, const Crc &start) const;
|
|
|
|
+} GCC_ALIGN_ATTRIBUTE(16);
|
|
|
|
+
|
|
|
|
+#undef REPEAT_FROM_0
|
|
|
|
+#undef REPEAT_FROM_1
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+// Specialized variants.
|
|
|
|
+#if CRCUTIL_USE_ASM
|
|
|
|
+
|
|
|
|
+#if (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
|
|
|
|
+
|
|
|
|
+// Declare specialized functions.
|
|
|
|
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
|
|
|
|
+ const void *data, size_t bytes, const uint64 &start) const;
|
|
|
|
+
|
|
|
|
+#if HAVE_AMD64 && HAVE_SSE2
|
|
|
|
+template<>
|
|
|
|
+uint128_sse2
|
|
|
|
+GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword(
|
|
|
|
+ const void *data, size_t bytes, const uint128_sse2 &start) const;
|
|
|
|
+#endif // HAVE_AMD64 && HAVE_SSE2
|
|
|
|
+
|
|
|
|
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER <= 150030729 && \
|
|
|
|
+ (HAVE_I386 && HAVE_MMX)
|
|
|
|
+
|
|
|
|
+// Work around bug in MSC (present at least in v. 15.00.30729.1)
|
|
|
|
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
|
|
|
|
+ const void *data,
|
|
|
|
+ size_t bytes,
|
|
|
|
+ const uint64 &start) const;
|
|
|
|
+template<> __forceinline
|
|
|
|
+uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
|
|
|
|
+ const void *data,
|
|
|
|
+ size_t bytes,
|
|
|
|
+ const uint64 &start) const {
|
|
|
|
+ typedef uint64 Word;
|
|
|
|
+ typedef uint64 Crc;
|
|
|
|
+ if (bytes <= 12) {
|
|
|
|
+ const uint8 *src = static_cast<const uint8 *>(data);
|
|
|
|
+ uint64 crc = start ^ Base().Canonize();
|
|
|
|
+ for (const uint8 *end = src + bytes; src < end; ++src) {
|
|
|
|
+ CRC_BYTE(this, crc, *src);
|
|
|
|
+ }
|
|
|
|
+ return (crc ^ Base().Canonize());
|
|
|
|
+ }
|
|
|
|
+ return CrcMultiwordI386Mmx(data, bytes, start);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+#endif // (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
|
|
|
|
+
|
|
|
|
+#endif // CRCUTIL_USE_ASM
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+#pragma pack(pop)
|
|
|
|
+
|
|
|
|
+} // namespace crcutil
|
|
|
|
+
|
|
|
|
+#endif // CRCUTIL_GENERIC_CRC_H_
|