123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- // Copyright 2010 Google Inc. All rights reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- // Implements 64-bit multiword CRC using MMX built-in functions.
- #include "generic_crc.h"
- #if CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
- namespace crcutil {
- template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data, size_t bytes, const uint64 &start)
- const GCC_OMIT_FRAME_POINTER;
- #if !defined(_MSC_VER)
- template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
- const void *data,
- size_t bytes,
- const uint64 &start) const {
- if (bytes <= 7) {
- const uint8 *src = static_cast<const uint8 *>(data);
- uint64 crc = start ^ Base().Canonize();
- for (const uint8 *end = src + bytes; src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ Base().Canonize());
- }
- return CrcMultiwordI386Mmx(data, bytes, start);
- }
- #else
- #pragma warning(push)
- // CL: uninitialized local variable 'crc1' used
- // Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
- #pragma warning(disable: 4700)
- #pragma warning(disable: 4619) // there is no warning number '592'
- // ICL: variable "crc1" is used before its value is set
- // Wrong: crc1 = XOR(crc1, crc1) sets it to 0.
- #pragma warning(disable: 592)
- #endif // !defined(_MSC_VER)
- #define MM64(adr) reinterpret_cast<const __m64 *>(adr)
- #define MM64_TABLE(byte) MM64(crc_word_interleaved_[byte])
- #define CRC_WORD_MMX(this, crc, buf) do { \
- buf = _mm_xor_si64(buf, crc); \
- uint32 tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
- buf = _mm_srli_si64(buf, 32); \
- crc = MM64(crc_word_[0])[TO_BYTE(tmp)]; \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[1])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[2])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[3])[tmp]); \
- tmp = static_cast<uint32>(_mm_cvtsi64_si32(buf)); \
- crc = _mm_xor_si64(crc, MM64(crc_word_[4])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[5])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[6])[TO_BYTE(tmp)]); \
- tmp >>= 8; \
- crc = _mm_xor_si64(crc, MM64(crc_word_[7])[tmp]); \
- } while (0)
- template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
- const void *data, size_t bytes, const uint64 &start) const {
- const uint8 *src = static_cast<const uint8 *>(data);
- const uint8 *end = src + bytes;
- uint64 crc = start ^ Base().Canonize();
- ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc, uint64);
- if (src >= end) {
- return (crc ^ Base().Canonize());
- }
- // Process 4 registers of sizeof(uint64) bytes at once.
- bytes = static_cast<size_t>(end - src) & ~(4*8 - 1);
- if (bytes > 4*8) {
- const uint8 *stop = src + bytes - 4*8;
- union {
- __m64 m64;
- uint64 u64;
- } temp;
- __m64 crc0;
- __m64 crc1;
- __m64 crc2;
- __m64 crc3;
- __m64 buf0 = MM64(src)[0];
- __m64 buf1 = MM64(src)[1];
- __m64 buf2 = MM64(src)[2];
- __m64 buf3 = MM64(src)[3];
- temp.u64 = crc;
- crc0 = temp.m64;
- #if defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
- // There is no way to suppress a warning in GCC;
- // generate extra assignments.
- temp.u64 = 0;
- crc1 = temp.m64;
- crc2 = temp.m64;
- crc3 = temp.m64;
- #else
- crc1 = _mm_xor_si64(crc1, crc1);
- crc2 = _mm_xor_si64(crc2, crc2);
- crc3 = _mm_xor_si64(crc3, crc3);
- #endif // defined(__GNUC__) && !GCC_VERSION_AVAILABLE(4, 4)
- do {
- PREFETCH(src);
- src += 4*8;
- buf0 = _mm_xor_si64(buf0, crc0);
- buf1 = _mm_xor_si64(buf1, crc1);
- buf2 = _mm_xor_si64(buf2, crc2);
- buf3 = _mm_xor_si64(buf3, crc3);
- uint32 tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
- uint32 tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
- uint32 tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
- uint32 tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
- buf0 = _mm_srli_si64(buf0, 32);
- buf1 = _mm_srli_si64(buf1, 32);
- buf2 = _mm_srli_si64(buf2, 32);
- buf3 = _mm_srli_si64(buf3, 32);
- crc0 = MM64_TABLE(0)[TO_BYTE(tmp0)];
- tmp0 >>= 8;
- crc1 = MM64_TABLE(0)[TO_BYTE(tmp1)];
- tmp1 >>= 8;
- crc2 = MM64_TABLE(0)[TO_BYTE(tmp2)];
- tmp2 >>= 8;
- crc3 = MM64_TABLE(0)[TO_BYTE(tmp3)];
- tmp3 >>= 8;
- #define XOR(byte) do { \
- crc0 = _mm_xor_si64(crc0, MM64_TABLE(byte)[TO_BYTE(tmp0)]); \
- tmp0 >>= 8; \
- crc1 = _mm_xor_si64(crc1, MM64_TABLE(byte)[TO_BYTE(tmp1)]); \
- tmp1 >>= 8; \
- crc2 = _mm_xor_si64(crc2, MM64_TABLE(byte)[TO_BYTE(tmp2)]); \
- tmp2 >>= 8; \
- crc3 = _mm_xor_si64(crc3, MM64_TABLE(byte)[TO_BYTE(tmp3)]); \
- tmp3 >>= 8; \
- } while (0)
- XOR(1);
- XOR(2);
- crc0 = _mm_xor_si64(crc0, MM64_TABLE(3)[tmp0]);
- tmp0 = static_cast<uint32>(_mm_cvtsi64_si32(buf0));
- crc1 = _mm_xor_si64(crc1, MM64_TABLE(3)[tmp1]);
- tmp1 = static_cast<uint32>(_mm_cvtsi64_si32(buf1));
- crc2 = _mm_xor_si64(crc2, MM64_TABLE(3)[tmp2]);
- tmp2 = static_cast<uint32>(_mm_cvtsi64_si32(buf2));
- crc3 = _mm_xor_si64(crc3, MM64_TABLE(3)[tmp3]);
- tmp3 = static_cast<uint32>(_mm_cvtsi64_si32(buf3));
- XOR(4);
- XOR(5);
- XOR(6);
- #undef XOR
- crc0 = _mm_xor_si64(crc0, MM64_TABLE(sizeof(uint64) - 1)[tmp0]);
- buf0 = MM64(src)[0];
- crc1 = _mm_xor_si64(crc1, MM64_TABLE(sizeof(uint64) - 1)[tmp1]);
- buf1 = MM64(src)[1];
- crc2 = _mm_xor_si64(crc2, MM64_TABLE(sizeof(uint64) - 1)[tmp2]);
- buf2 = MM64(src)[2];
- crc3 = _mm_xor_si64(crc3, MM64_TABLE(sizeof(uint64) - 1)[tmp3]);
- buf3 = MM64(src)[3];
- }
- while (src < stop);
- CRC_WORD_MMX(this, crc0, buf0);
- buf1 = _mm_xor_si64(buf1, crc1);
- CRC_WORD_MMX(this, crc0, buf1);
- buf2 = _mm_xor_si64(buf2, crc2);
- CRC_WORD_MMX(this, crc0, buf2);
- buf3 = _mm_xor_si64(buf3, crc3);
- CRC_WORD_MMX(this, crc0, buf3);
- temp.m64 = crc0;
- crc = temp.u64;
- _mm_empty();
- src += 4*8;
- }
- // Process sizeof(uint64) bytes at once.
- bytes = static_cast<size_t>(end - src) & ~(sizeof(uint64) - 1);
- if (bytes > 0) {
- union {
- __m64 m64;
- uint64 u64;
- } temp;
- __m64 crc0;
- temp.u64 = crc;
- crc0 = temp.m64;
- for (const uint8 *stop = src + bytes; src < stop; src += sizeof(uint64)) {
- __m64 buf0 = MM64(src)[0];
- CRC_WORD_MMX(this, crc0, buf0);
- }
- temp.m64 = crc0;
- crc = temp.u64;
- _mm_empty();
- }
- // Compute CRC of remaining bytes.
- for (;src < end; ++src) {
- CRC_BYTE(this, crc, *src);
- }
- return (crc ^ Base().Canonize());
- }
- #if defined(_MSC_VER)
- #pragma warning(pop)
- #endif // defined(_MSC_VER)
- } // namespace crcutil
- #endif // CRCUTIL_USE_ASM && HAVE_I386 && HAVE_MMX
|