Browse Source

Restoring authorship annotation for <f0b0s@yandex-team.ru>. Commit 1 of 2.

f0b0s 3 years ago
parent
commit
deabc5260a

+ 66 - 66
contrib/libs/crcutil/aligned_alloc.h

@@ -1,66 +1,66 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Poor man's platform-independent implementation of aligned memory allocator.
-
-#ifndef CRCUTIL_ALIGNED_ALLOC_H_
-#define CRCUTIL_ALIGNED_ALLOC_H_
-
-#include "std_headers.h"    // size_t, ptrdiff_t
-
-namespace crcutil {
-
-// Allocates a block of memory of "size" bytes so that a field
-// at "field_offset" is aligned on "align" boundary.
-//
-// NB #1: "align" shall be exact power of two.
-//
-// NB #2: memory allocated by AlignedAlloc should be release by AlignedFree().
-//
-inline void *AlignedAlloc(size_t size,
-                          size_t field_offset,
-                          size_t align,
-                          const void **allocated_mem) {
-  if (align == 0 || (align & (align - 1)) != 0 || align < sizeof(char *)) {
-    align = sizeof(*allocated_mem);
-  }
-  size += align - 1 + sizeof(*allocated_mem);
-  char *allocated_memory = new char[size];
-  char *aligned_memory = allocated_memory + sizeof(*allocated_mem);
-  field_offset &= align - 1;
-  size_t actual_alignment =
-      reinterpret_cast<size_t>(aligned_memory + field_offset) & (align - 1);
-  if (actual_alignment != 0) {
-    aligned_memory += align - actual_alignment;
-  }
-  reinterpret_cast<char **>(aligned_memory)[-1] = allocated_memory;
-
-  if (allocated_mem != NULL) {
-    *allocated_mem = allocated_memory;
-  }
-
-  return aligned_memory;
-}
-
-// Frees memory allocated by AlignedAlloc().
-inline void AlignedFree(void *aligned_memory) {
-  if (aligned_memory != NULL) {
-    char *allocated_memory = reinterpret_cast<char **>(aligned_memory)[-1];
-    delete[] allocated_memory;
-  }
-}
-
-}  // namespace crcutil
-
-#endif  // CRCUTIL_ALIGNED_ALLOC_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+// 
+// Poor man's platform-independent implementation of aligned memory allocator. 
+ 
+#ifndef CRCUTIL_ALIGNED_ALLOC_H_ 
+#define CRCUTIL_ALIGNED_ALLOC_H_ 
+ 
+#include "std_headers.h"    // size_t, ptrdiff_t 
+ 
+namespace crcutil { 
+ 
+// Allocates a block of memory of "size" bytes so that a field 
+// at "field_offset" is aligned on "align" boundary. 
+// 
+// NB #1: "align" shall be exact power of two. 
+// 
+// NB #2: memory allocated by AlignedAlloc should be release by AlignedFree(). 
+// 
+inline void *AlignedAlloc(size_t size, 
+                          size_t field_offset, 
+                          size_t align, 
+                          const void **allocated_mem) { 
+  if (align == 0 || (align & (align - 1)) != 0 || align < sizeof(char *)) { 
+    align = sizeof(*allocated_mem); 
+  } 
+  size += align - 1 + sizeof(*allocated_mem); 
+  char *allocated_memory = new char[size]; 
+  char *aligned_memory = allocated_memory + sizeof(*allocated_mem); 
+  field_offset &= align - 1; 
+  size_t actual_alignment = 
+      reinterpret_cast<size_t>(aligned_memory + field_offset) & (align - 1); 
+  if (actual_alignment != 0) { 
+    aligned_memory += align - actual_alignment; 
+  } 
+  reinterpret_cast<char **>(aligned_memory)[-1] = allocated_memory; 
+ 
+  if (allocated_mem != NULL) { 
+    *allocated_mem = allocated_memory; 
+  } 
+ 
+  return aligned_memory; 
+} 
+ 
+// Frees memory allocated by AlignedAlloc(). 
+inline void AlignedFree(void *aligned_memory) { 
+  if (aligned_memory != NULL) { 
+    char *allocated_memory = reinterpret_cast<char **>(aligned_memory)[-1]; 
+    delete[] allocated_memory; 
+  } 
+} 
+ 
+}  // namespace crcutil 
+ 
+#endif  // CRCUTIL_ALIGNED_ALLOC_H_ 

+ 73 - 73
contrib/libs/crcutil/base_types.h

@@ -1,73 +1,73 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Defines 8/16/32/64-bit integer types.
-//
-// Either uint64 or uint32 will map to size_t.
-// This way, specialized variants of CRC implementation
-// parameterized by "size_t" will be reused when
-// parameterized by "uint64" or "uint32".
-// In their turn, specialized verisons are parameterized
-// by "size_t" so that one version of the code is optimal
-// both on 32-bit and 64-bit platforms.
-
-#ifndef CRCUTIL_BASE_TYPES_H_
-#define CRCUTIL_BASE_TYPES_H_
-
-#include "std_headers.h"    // size_t, ptrdiff_t
-
-namespace crcutil {
-
-template<typename A, typename B> class ChooseFirstIfSame {
- public:
-  template<bool same_size, typename AA, typename BB> class ChooseFirstIfTrue {
-   public:
-    typedef AA Type;
-  };
-  template<typename AA, typename BB> class ChooseFirstIfTrue<false, AA, BB> {
-   public:
-    typedef BB Type;
-  };
-
-  typedef typename ChooseFirstIfTrue<sizeof(A) == sizeof(B), A, B>::Type Type;
-};
-
-typedef unsigned char uint8;
-typedef signed char int8;
-
-typedef unsigned short uint16;
-typedef short int16;
-
-typedef ChooseFirstIfSame<size_t, unsigned int>::Type uint32;
-typedef ChooseFirstIfSame<ptrdiff_t, int>::Type int32;
-
-#if defined(_MSC_VER)
-typedef ChooseFirstIfSame<size_t, unsigned __int64>::Type uint64;
-typedef ChooseFirstIfSame<ptrdiff_t, __int64>::Type int64;
-#define HAVE_UINT64 1
-#elif defined(__GNUC__)
-typedef ChooseFirstIfSame<size_t, unsigned long long>::Type uint64;
-typedef ChooseFirstIfSame<ptrdiff_t, long long>::Type int64;
-#define HAVE_UINT64 1
-#else
-// TODO: ensure that everything compiles and works when HAVE_UINT64 is false.
-// TODO: remove HAVE_UINT64 and use sizeof(uint64) instead?
-#define HAVE_UINT64 0
-typedef uint32 uint64;
-typedef int32 int64;
-#endif
-
-}  // namespace crcutil
-
-#endif  // CRCUTIL_BASE_TYPES_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Defines 8/16/32/64-bit integer types. 
+// 
+// Either uint64 or uint32 will map to size_t. 
+// This way, specialized variants of CRC implementation 
+// parameterized by "size_t" will be reused when 
+// parameterized by "uint64" or "uint32". 
+// In their turn, specialized verisons are parameterized 
+// by "size_t" so that one version of the code is optimal 
+// both on 32-bit and 64-bit platforms. 
+ 
+#ifndef CRCUTIL_BASE_TYPES_H_ 
+#define CRCUTIL_BASE_TYPES_H_ 
+ 
+#include "std_headers.h"    // size_t, ptrdiff_t 
+ 
+namespace crcutil { 
+ 
+template<typename A, typename B> class ChooseFirstIfSame { 
+ public: 
+  template<bool same_size, typename AA, typename BB> class ChooseFirstIfTrue { 
+   public: 
+    typedef AA Type; 
+  }; 
+  template<typename AA, typename BB> class ChooseFirstIfTrue<false, AA, BB> { 
+   public: 
+    typedef BB Type; 
+  }; 
+ 
+  typedef typename ChooseFirstIfTrue<sizeof(A) == sizeof(B), A, B>::Type Type; 
+}; 
+ 
+typedef unsigned char uint8; 
+typedef signed char int8; 
+ 
+typedef unsigned short uint16; 
+typedef short int16; 
+ 
+typedef ChooseFirstIfSame<size_t, unsigned int>::Type uint32; 
+typedef ChooseFirstIfSame<ptrdiff_t, int>::Type int32; 
+ 
+#if defined(_MSC_VER) 
+typedef ChooseFirstIfSame<size_t, unsigned __int64>::Type uint64; 
+typedef ChooseFirstIfSame<ptrdiff_t, __int64>::Type int64; 
+#define HAVE_UINT64 1 
+#elif defined(__GNUC__) 
+typedef ChooseFirstIfSame<size_t, unsigned long long>::Type uint64; 
+typedef ChooseFirstIfSame<ptrdiff_t, long long>::Type int64; 
+#define HAVE_UINT64 1 
+#else 
+// TODO: ensure that everything compiles and works when HAVE_UINT64 is false. 
+// TODO: remove HAVE_UINT64 and use sizeof(uint64) instead? 
+#define HAVE_UINT64 0 
+typedef uint32 uint64; 
+typedef int32 int64; 
+#endif 
+ 
+}  // namespace crcutil 
+ 
+#endif  // CRCUTIL_BASE_TYPES_H_ 

+ 126 - 126
contrib/libs/crcutil/bob_jenkins_rng.h

@@ -1,126 +1,126 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Glorified C++ version of Bob Jenkins' random number generator.
-// See http://burtleburtle.net/bob/rand/smallprng.html for more details.
-
-#ifndef CRCUTIL_BOB_JENKINS_RNG_H_
-#define CRCUTIL_BOB_JENKINS_RNG_H_
-
-#include "base_types.h"
-
-#if !defined(_MSC_VER)
-#define _rotl(value, bits) \
-  static_cast<uint32>(((value) << (bits)) + ((value) >> (32 - (bits))))
-#define _rotl64(value, bits) \
-  static_cast<uint64>(((value) << (bits)) + ((value) >> (64 - (bits))))
-#endif  // !defined(_MSC_VER)
-
-namespace crcutil {
-
-#pragma pack(push, 8)
-
-template<typename T> class BobJenkinsRng;
-
-template<> class BobJenkinsRng<uint32> {
- public:
-  typedef uint32 value;
-
-  value Get() {
-    value e = a_ - _rotl(b_, 23);
-    a_ = b_ ^ _rotl(c_, 16);
-    b_ = c_ + _rotl(d_, 11);
-    c_ = d_ + e;
-    d_ = e + a_;
-    return (d_);
-  }
-
-  void Init(value seed) {
-    a_ = 0xf1ea5eed;
-    b_ = seed;
-    c_ = seed;
-    d_ = seed;
-    for (size_t i = 0; i < 20; ++i) {
-      (void) Get();
-    }
-  }
-
-  explicit BobJenkinsRng(value seed) {
-    Init(seed);
-  }
-
-  BobJenkinsRng() {
-    Init(0x1234567);
-  }
-
- private:
-  value a_;
-  value b_;
-  value c_;
-  value d_;
-};
-
-
-#if HAVE_UINT64
-
-template<> class BobJenkinsRng<uint64> {
- public:
-  typedef uint64 value;
-
-  value Get() {
-    value e = a_ - _rotl64(b_, 7);
-    a_ = b_ ^ _rotl64(c_, 13);
-    b_ = c_ + _rotl64(d_, 37);
-    c_ = d_ + e;
-    d_ = e + a_;
-    return d_;
-  }
-
-  void Init(value seed) {
-    a_ = 0xf1ea5eed;
-    b_ = seed;
-    c_ = seed;
-    d_ = seed;
-    for (size_t i = 0; i < 20; ++i) {
-      (void) Get();
-    }
-  }
-
-  explicit BobJenkinsRng(value seed) {
-    Init(seed);
-  }
-
-  BobJenkinsRng() {
-    Init(0x1234567);
-  }
-
- private:
-  value a_;
-  value b_;
-  value c_;
-  value d_;
-};
-
-#endif  // HAVE_UINT64
-
-#if !defined(_MSC_VER)
-#undef _rotl
-#undef _rotl64
-#endif  // !defined(_MSC_VER)
-
-#pragma pack(pop)
-
-}  // namespace crcutil
-
-#endif  // CRCUTIL_BOB_JENKINS_RNG_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Glorified C++ version of Bob Jenkins' random number generator. 
+// See http://burtleburtle.net/bob/rand/smallprng.html for more details. 
+ 
+#ifndef CRCUTIL_BOB_JENKINS_RNG_H_ 
+#define CRCUTIL_BOB_JENKINS_RNG_H_ 
+ 
+#include "base_types.h" 
+ 
+#if !defined(_MSC_VER) 
+#define _rotl(value, bits) \ 
+  static_cast<uint32>(((value) << (bits)) + ((value) >> (32 - (bits)))) 
+#define _rotl64(value, bits) \ 
+  static_cast<uint64>(((value) << (bits)) + ((value) >> (64 - (bits)))) 
+#endif  // !defined(_MSC_VER) 
+ 
+namespace crcutil { 
+ 
+#pragma pack(push, 8) 
+ 
+template<typename T> class BobJenkinsRng; 
+ 
+template<> class BobJenkinsRng<uint32> { 
+ public: 
+  typedef uint32 value; 
+ 
+  value Get() { 
+    value e = a_ - _rotl(b_, 23); 
+    a_ = b_ ^ _rotl(c_, 16); 
+    b_ = c_ + _rotl(d_, 11); 
+    c_ = d_ + e; 
+    d_ = e + a_; 
+    return (d_); 
+  } 
+ 
+  void Init(value seed) { 
+    a_ = 0xf1ea5eed; 
+    b_ = seed; 
+    c_ = seed; 
+    d_ = seed; 
+    for (size_t i = 0; i < 20; ++i) { 
+      (void) Get(); 
+    } 
+  } 
+ 
+  explicit BobJenkinsRng(value seed) { 
+    Init(seed); 
+  } 
+ 
+  BobJenkinsRng() { 
+    Init(0x1234567); 
+  } 
+ 
+ private: 
+  value a_; 
+  value b_; 
+  value c_; 
+  value d_; 
+}; 
+ 
+ 
+#if HAVE_UINT64 
+ 
+template<> class BobJenkinsRng<uint64> { 
+ public: 
+  typedef uint64 value; 
+ 
+  value Get() { 
+    value e = a_ - _rotl64(b_, 7); 
+    a_ = b_ ^ _rotl64(c_, 13); 
+    b_ = c_ + _rotl64(d_, 37); 
+    c_ = d_ + e; 
+    d_ = e + a_; 
+    return d_; 
+  } 
+ 
+  void Init(value seed) { 
+    a_ = 0xf1ea5eed; 
+    b_ = seed; 
+    c_ = seed; 
+    d_ = seed; 
+    for (size_t i = 0; i < 20; ++i) { 
+      (void) Get(); 
+    } 
+  } 
+ 
+  explicit BobJenkinsRng(value seed) { 
+    Init(seed); 
+  } 
+ 
+  BobJenkinsRng() { 
+    Init(0x1234567); 
+  } 
+ 
+ private: 
+  value a_; 
+  value b_; 
+  value c_; 
+  value d_; 
+}; 
+ 
+#endif  // HAVE_UINT64 
+ 
+#if !defined(_MSC_VER) 
+#undef _rotl 
+#undef _rotl64 
+#endif  // !defined(_MSC_VER) 
+ 
+#pragma pack(pop) 
+ 
+}  // namespace crcutil 
+ 
+#endif  // CRCUTIL_BOB_JENKINS_RNG_H_ 

+ 363 - 363
contrib/libs/crcutil/crc32c_sse4.cc

@@ -1,369 +1,369 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements CRC32C using Intel's SSE4 crc32 instruction.
-// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
-// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
-
-#include "crc32c_sse4.h"
-
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Implements CRC32C using Intel's SSE4 crc32 instruction. 
+// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, 
+// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. 
+ 
+#include "crc32c_sse4.h" 
+ 
 #include <util/system/compiler.h>
 
-#if HAVE_I386 || HAVE_AMD64
-
-namespace crcutil {
-
-#define UPDATE_STRIPE_CRCS(index, block_size, num_stripes) do { \
-  CRC_UPDATE_WORD(crc0, \
-      reinterpret_cast<const size_t *>(src + \
-          0 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
-  CRC_UPDATE_WORD(crc1, \
-      reinterpret_cast<const size_t *>(src + \
-          1 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
-  CRC_UPDATE_WORD(crc2, \
-      reinterpret_cast<const size_t *>(src + \
-          2 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
-  if (num_stripes > 3) { \
-    CRC_UPDATE_WORD(crc3, \
-        reinterpret_cast<const size_t *>(src + \
-            3 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \
-  } \
-} while (0)
-
-// Multiplies "crc" by "x**(8 *  STRIPE_SIZE(block_size)"
-// using appropriate multiplication table(s).
-//
-#if 0
-
-// This variant is for illustration purposes only.
-// Actual implementation below:
-// 1. Splits the computation into 2 data-independent paths
-//    by independently multiplying lower and upper halves
-//    of "crc0" in interleaved manner, and combining the
-//    results in the end.
-// 2. Removing redundant "crc0 = 0" etc. in the beginning.
-// 3. Removing redundant shifts of "tmp0" and "tmp1" in the last round.
-#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \
-  size_t tmp0 = crc0; \
-  crc0 = 0; \
-  for (size_t i = 0; i < kNumTables; ++i) { \
-    crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-            [i][tmp0 & (kTableEntries - 1)]; \
-    tmp0 >>= kTableEntryBits; \
-  } \
-} while (0)
-
-#else
-
-#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \
-  size_t tmp0 = crc0; \
-  size_t tmp1 = crc0 >> (kTableEntryBits * kNumTablesHalfHi); \
-  crc0 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-         [0][tmp0 & (kTableEntries - 1)]; \
-  tmp0 >>= kTableEntryBits; \
-  size_t crc1 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-                [kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \
-  tmp1 >>= kTableEntryBits; \
-  for (size_t i = 1; i < kNumTablesHalfLo - 1; ++i) { \
-    crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-            [i][tmp0 & (kTableEntries - 1)]; \
-    tmp0 >>= kTableEntryBits; \
-    crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-            [i + kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \
-    tmp1 >>= kTableEntryBits; \
-  } \
-  crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-          [kNumTablesHalfLo - 1][tmp0 & (kTableEntries - 1)]; \
-  if (kNumTables & 1) { \
-    tmp0 >>= kTableEntryBits; \
-  } \
-  crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-          [kNumTables - 1][tmp1]; \
-  if (kNumTables & 1) { \
-    crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-            [kNumTablesHalfLo][tmp0 & (kTableEntries - 1)]; \
-  } \
-  crc0 ^= crc1; \
-} while (0)
-
-#endif
-
-// Given CRCs (crc0, crc1, etc.) of consequitive
-// stripes of STRIPE_SIZE(block_size) bytes each,
-// produces CRC of concatenated stripes.
-#define COMBINE_STRIPE_CRCS(block_size, num_stripes) do { \
-  MULTIPLY_CRC(crc0, block_size, num_stripes); \
-  crc0 ^= crc1; \
-  MULTIPLY_CRC(crc0, block_size, num_stripes); \
-  crc0 ^= crc2; \
-  if (num_stripes > 3) { \
-    MULTIPLY_CRC(crc0, block_size, num_stripes); \
-    crc0 ^= crc3; \
-  } \
-} while (0)
-
-// Processes input BLOCK_SIZE(block) bytes per iteration
-// by splitting a block of BLOCK_SIZE(block) bytes into N
-// equally-sized stripes of STRIPE_SIZE(block_size) each,
-// computing CRC of each stripe, and concatenating stripe CRCs.
-#define PROCESS_BLOCK(block_size, num_stripes) do { \
-  while (bytes >= CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
-    Crc crc1 = 0; \
-    Crc crc2 = 0; \
-    Crc crc3; \
-    if (num_stripes > 3) crc3 = 0; \
-    { \
-      const uint8 *stripe_end = src + \
-          (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) / \
-              kUnrolledLoopBytes) * kUnrolledLoopBytes; \
-      do { \
-        UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \
-        UPDATE_STRIPE_CRCS(1, block_size, num_stripes); \
-        UPDATE_STRIPE_CRCS(2, block_size, num_stripes); \
-        UPDATE_STRIPE_CRCS(3, block_size, num_stripes); \
-        UPDATE_STRIPE_CRCS(4, block_size, num_stripes); \
-        UPDATE_STRIPE_CRCS(5, block_size, num_stripes); \
-        UPDATE_STRIPE_CRCS(6, block_size, num_stripes); \
-        UPDATE_STRIPE_CRCS(7, block_size, num_stripes); \
-        src += kUnrolledLoopBytes; \
-      } while (src < stripe_end); \
-      if ((CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \
-          kUnrolledLoopBytes) != 0) { \
-        stripe_end += \
-            CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \
-                kUnrolledLoopBytes; \
-        do { \
-          UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \
-          src += sizeof(size_t); \
-        } while (src < stripe_end); \
-      } \
-    } \
-    COMBINE_STRIPE_CRCS(block_size, num_stripes); \
-    src += CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * \
-           ((num_stripes) - 1); \
-    bytes = static_cast<size_t>(end - src); \
-  } \
- no_more_##block_size##_##num_stripes:; \
-} while (0)
-
+#if HAVE_I386 || HAVE_AMD64 
+ 
+namespace crcutil { 
+ 
+#define UPDATE_STRIPE_CRCS(index, block_size, num_stripes) do { \ 
+  CRC_UPDATE_WORD(crc0, \ 
+      reinterpret_cast<const size_t *>(src + \ 
+          0 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ 
+  CRC_UPDATE_WORD(crc1, \ 
+      reinterpret_cast<const size_t *>(src + \ 
+          1 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ 
+  CRC_UPDATE_WORD(crc2, \ 
+      reinterpret_cast<const size_t *>(src + \ 
+          2 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ 
+  if (num_stripes > 3) { \ 
+    CRC_UPDATE_WORD(crc3, \ 
+        reinterpret_cast<const size_t *>(src + \ 
+            3 * CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes))[index]); \ 
+  } \ 
+} while (0) 
+ 
+// Multiplies "crc" by "x**(8 *  STRIPE_SIZE(block_size)" 
+// using appropriate multiplication table(s). 
+// 
+#if 0 
+ 
+// This variant is for illustration purposes only. 
+// Actual implementation below: 
+// 1. Splits the computation into 2 data-independent paths 
+//    by independently multiplying lower and upper halves 
+//    of "crc0" in interleaved manner, and combining the 
+//    results in the end. 
+// 2. Removing redundant "crc0 = 0" etc. in the beginning. 
+// 3. Removing redundant shifts of "tmp0" and "tmp1" in the last round. 
+#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \ 
+  size_t tmp0 = crc0; \ 
+  crc0 = 0; \ 
+  for (size_t i = 0; i < kNumTables; ++i) { \ 
+    crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+            [i][tmp0 & (kTableEntries - 1)]; \ 
+    tmp0 >>= kTableEntryBits; \ 
+  } \ 
+} while (0) 
+ 
+#else 
+ 
+#define MULTIPLY_CRC(crc0, block_size, num_stripes) do { \ 
+  size_t tmp0 = crc0; \ 
+  size_t tmp1 = crc0 >> (kTableEntryBits * kNumTablesHalfHi); \ 
+  crc0 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+         [0][tmp0 & (kTableEntries - 1)]; \ 
+  tmp0 >>= kTableEntryBits; \ 
+  size_t crc1 = CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+                [kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \ 
+  tmp1 >>= kTableEntryBits; \ 
+  for (size_t i = 1; i < kNumTablesHalfLo - 1; ++i) { \ 
+    crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+            [i][tmp0 & (kTableEntries - 1)]; \ 
+    tmp0 >>= kTableEntryBits; \ 
+    crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+            [i + kNumTablesHalfHi][tmp1 & (kTableEntries - 1)]; \ 
+    tmp1 >>= kTableEntryBits; \ 
+  } \ 
+  crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+          [kNumTablesHalfLo - 1][tmp0 & (kTableEntries - 1)]; \ 
+  if (kNumTables & 1) { \ 
+    tmp0 >>= kTableEntryBits; \ 
+  } \ 
+  crc1 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+          [kNumTables - 1][tmp1]; \ 
+  if (kNumTables & 1) { \ 
+    crc0 ^= CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+            [kNumTablesHalfLo][tmp0 & (kTableEntries - 1)]; \ 
+  } \ 
+  crc0 ^= crc1; \ 
+} while (0) 
+ 
+#endif 
+ 
+// Given CRCs (crc0, crc1, etc.) of consequitive 
+// stripes of STRIPE_SIZE(block_size) bytes each, 
+// produces CRC of concatenated stripes. 
+#define COMBINE_STRIPE_CRCS(block_size, num_stripes) do { \ 
+  MULTIPLY_CRC(crc0, block_size, num_stripes); \ 
+  crc0 ^= crc1; \ 
+  MULTIPLY_CRC(crc0, block_size, num_stripes); \ 
+  crc0 ^= crc2; \ 
+  if (num_stripes > 3) { \ 
+    MULTIPLY_CRC(crc0, block_size, num_stripes); \ 
+    crc0 ^= crc3; \ 
+  } \ 
+} while (0) 
+ 
+// Processes input BLOCK_SIZE(block) bytes per iteration 
+// by splitting a block of BLOCK_SIZE(block) bytes into N 
+// equally-sized stripes of STRIPE_SIZE(block_size) each, 
+// computing CRC of each stripe, and concatenating stripe CRCs. 
+#define PROCESS_BLOCK(block_size, num_stripes) do { \ 
+  while (bytes >= CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \ 
+    Crc crc1 = 0; \ 
+    Crc crc2 = 0; \ 
+    Crc crc3; \ 
+    if (num_stripes > 3) crc3 = 0; \ 
+    { \ 
+      const uint8 *stripe_end = src + \ 
+          (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) / \ 
+              kUnrolledLoopBytes) * kUnrolledLoopBytes; \ 
+      do { \ 
+        UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \ 
+        UPDATE_STRIPE_CRCS(1, block_size, num_stripes); \ 
+        UPDATE_STRIPE_CRCS(2, block_size, num_stripes); \ 
+        UPDATE_STRIPE_CRCS(3, block_size, num_stripes); \ 
+        UPDATE_STRIPE_CRCS(4, block_size, num_stripes); \ 
+        UPDATE_STRIPE_CRCS(5, block_size, num_stripes); \ 
+        UPDATE_STRIPE_CRCS(6, block_size, num_stripes); \ 
+        UPDATE_STRIPE_CRCS(7, block_size, num_stripes); \ 
+        src += kUnrolledLoopBytes; \ 
+      } while (src < stripe_end); \ 
+      if ((CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \ 
+          kUnrolledLoopBytes) != 0) { \ 
+        stripe_end += \ 
+            CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) % \ 
+                kUnrolledLoopBytes; \ 
+        do { \ 
+          UPDATE_STRIPE_CRCS(0, block_size, num_stripes); \ 
+          src += sizeof(size_t); \ 
+        } while (src < stripe_end); \ 
+      } \ 
+    } \ 
+    COMBINE_STRIPE_CRCS(block_size, num_stripes); \ 
+    src += CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * \ 
+           ((num_stripes) - 1); \ 
+    bytes = static_cast<size_t>(end - src); \ 
+  } \ 
+ no_more_##block_size##_##num_stripes:; \ 
+} while (0) 
+ 
 Y_NO_SANITIZE("undefined")
-size_t Crc32cSSE4::Crc32c(const void *data, size_t bytes, Crc crc0) const {
-  const uint8 *src = static_cast<const uint8 *>(data);
-  const uint8 *end = src + bytes;
-  crc0 ^= Base().Canonize();
-
-  // If we don't have too much data to process,
-  // do not waste time trying to align input etc.
-  // Noticeably improves performance on small inputs.
-  if (bytes < 4 * sizeof(size_t)) goto less_than_4_size_t;
-  if (bytes < 8 * sizeof(size_t)) goto less_than_8_size_t;
-  if (bytes < 16 * sizeof(size_t)) goto less_than_16_size_t;
-
-#define PROCESS_TAIL_IF_SMALL(block_size, num_stripes) do { \
-  if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
-    goto no_more_##block_size##_##num_stripes; \
-  } \
-} while (0)
-#define NOOP(block_size, num_stripes)
-
-  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(PROCESS_TAIL_IF_SMALL,
-                                             NOOP,
-                                             NOOP);
-
-#undef PROCESS_TAIL_IF_SMALL
-
-
-  // Do not use ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() here because:
-  // 1. It uses CRC_BYTE() which won't work.
-  // 2. Its threshold may be incorrect becuase Crc32 that uses
-  //    native CPU crc32 instruction is much faster than
-  //    generic table-based CRC computation.
-  //
-  // In case of X5550 CPU, break even point is at 2KB -- exactly.
-  if (bytes >= 2 * 1024) {
-    while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) {
-      if (src >= end) {
-        return (crc0 ^ Base().Canonize());
-      }
-      CRC_UPDATE_BYTE(crc0, src[0]);
-      src += 1;
-    }
-    bytes = static_cast<size_t>(end - src);
-  }
-  if (src >= end) {
-    return (crc0 ^ Base().Canonize());
-  }
-
-  // Quickly skip processing of too large blocks
-  // Noticeably improves performance on small inputs.
-#define SKIP_BLOCK_IF_NEEDED(block_size, num_stripes) do { \
-  if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \
-    goto no_more_##block_size##_##num_stripes; \
-  } \
-} while (0)
-
-  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(NOOP,
-                                             SKIP_BLOCK_IF_NEEDED,
-                                             SKIP_BLOCK_IF_NEEDED);
-
-#undef SKIP_BLOCK_IF_NEEDED
-
-  // Process data in all blocks.
-  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING(PROCESS_BLOCK,
-                                              PROCESS_BLOCK,
-                                              PROCESS_BLOCK);
-
-  // Finish the tail word-by-word and then byte-by-byte.
-#define CRC_UPDATE_WORD_4(index) do { \
-  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index]); \
-  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 1]); \
-  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 2]); \
-  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 3]); \
-} while (0)
-
-  if (bytes >= 4 * 4 * sizeof(size_t)) {
-    end -= 4 * 4 * sizeof(size_t);
-    do {
-      CRC_UPDATE_WORD_4(4 * 0);
-      CRC_UPDATE_WORD_4(4 * 1);
-      CRC_UPDATE_WORD_4(4 * 2);
-      CRC_UPDATE_WORD_4(4 * 3);
-      src += 4 * 4 * sizeof(size_t);
-    } while (src <= end);
-    end += 4 * 4 * sizeof(size_t);
-    bytes = static_cast<size_t>(end - src);
-  }
- less_than_16_size_t:
-
-  if (bytes >= 4 * 2 * sizeof(size_t)) {
-    CRC_UPDATE_WORD_4(4 * 0);
-    CRC_UPDATE_WORD_4(4 * 1);
-    src += 4 * 2 * sizeof(size_t);
-    bytes -= 4 * 2 * sizeof(size_t);
-  }
- less_than_8_size_t:
-
-  if (bytes >= 4 * sizeof(size_t)) {
-    CRC_UPDATE_WORD_4(0);
-    src += 4 * sizeof(size_t);
-    bytes -= 4 * sizeof(size_t);
-  }
- less_than_4_size_t:
-
-  if (bytes >= 1 * sizeof(size_t)) {
-    end -= 1 * sizeof(size_t);
-    do {
-      CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[0]);
-      src += 1 * sizeof(size_t);
-    } while (src <= end);
-    end += 1 * sizeof(size_t);
-  }
-
-  while (src < end) {
-    CRC_UPDATE_BYTE(crc0, src[0]);
-    src += 1;
-  }
-
-  return (crc0 ^ Base().Canonize());
-}
-
-
-void Crc32cSSE4::Init(bool constant) {
-  base_.Init(FixedGeneratingPolynomial(), FixedDegree(), constant);
-
-#define INIT_MUL_TABLE(block_size, num_stripes) do { \
-  size_t multiplier = \
-      Base().Xpow8N(CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes)); \
-  for (size_t table = 0; table < kNumTables; ++table) { \
-    for (size_t entry = 0; entry < kTableEntries; ++entry) { \
-      size_t value = static_cast<uint32>(entry << (kTableEntryBits * table)); \
-      CRC32C_SSE4_MUL_TABLE(block_size, num_stripes)[table][entry] = \
-            static_cast<Entry>(Base().Multiply(value, multiplier)); \
-    } \
-  } \
-} while (0)
-
-  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(INIT_MUL_TABLE);
-
-#undef INIT_MUL_TABLE
-
-#if !CRCUTIL_USE_MM_CRC32
-  for (size_t j = 0; j < sizeof(Word); ++j) {
-    Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + 32);
-    for (size_t i = 0; i < 256; ++i) {
-      crc_word_[j][i] = Base().MultiplyUnnormalized(i, 8, k);
-    }
-  }
-#endif  // !CRCUTIL_USE_MM_CRC32
-}
-
-
-bool Crc32cSSE4::IsSSE42Available() {
-#if defined(_MSC_VER)
-  int cpu_info[4];
-  __cpuid(cpu_info, 1);
+size_t Crc32cSSE4::Crc32c(const void *data, size_t bytes, Crc crc0) const { 
+  const uint8 *src = static_cast<const uint8 *>(data); 
+  const uint8 *end = src + bytes; 
+  crc0 ^= Base().Canonize(); 
+ 
+  // If we don't have too much data to process, 
+  // do not waste time trying to align input etc. 
+  // Noticeably improves performance on small inputs. 
+  if (bytes < 4 * sizeof(size_t)) goto less_than_4_size_t; 
+  if (bytes < 8 * sizeof(size_t)) goto less_than_8_size_t; 
+  if (bytes < 16 * sizeof(size_t)) goto less_than_16_size_t; 
+ 
+#define PROCESS_TAIL_IF_SMALL(block_size, num_stripes) do { \ 
+  if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \ 
+    goto no_more_##block_size##_##num_stripes; \ 
+  } \ 
+} while (0) 
+#define NOOP(block_size, num_stripes) 
+ 
+  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(PROCESS_TAIL_IF_SMALL, 
+                                             NOOP, 
+                                             NOOP); 
+ 
+#undef PROCESS_TAIL_IF_SMALL 
+ 
+ 
+  // Do not use ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() here because: 
+  // 1. It uses CRC_BYTE() which won't work. 
+  // 2. Its threshold may be incorrect becuase Crc32 that uses 
+  //    native CPU crc32 instruction is much faster than 
+  //    generic table-based CRC computation. 
+  // 
+  // In case of X5550 CPU, break even point is at 2KB -- exactly. 
+  if (bytes >= 2 * 1024) { 
+    while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) { 
+      if (src >= end) { 
+        return (crc0 ^ Base().Canonize()); 
+      } 
+      CRC_UPDATE_BYTE(crc0, src[0]); 
+      src += 1; 
+    } 
+    bytes = static_cast<size_t>(end - src); 
+  } 
+  if (src >= end) { 
+    return (crc0 ^ Base().Canonize()); 
+  } 
+ 
+  // Quickly skip processing of too large blocks 
+  // Noticeably improves performance on small inputs. 
+#define SKIP_BLOCK_IF_NEEDED(block_size, num_stripes) do { \ 
+  if (bytes < CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes)) { \ 
+    goto no_more_##block_size##_##num_stripes; \ 
+  } \ 
+} while (0) 
+ 
+  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(NOOP, 
+                                             SKIP_BLOCK_IF_NEEDED, 
+                                             SKIP_BLOCK_IF_NEEDED); 
+ 
+#undef SKIP_BLOCK_IF_NEEDED 
+ 
+  // Process data in all blocks. 
+  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING(PROCESS_BLOCK, 
+                                              PROCESS_BLOCK, 
+                                              PROCESS_BLOCK); 
+ 
+  // Finish the tail word-by-word and then byte-by-byte. 
+#define CRC_UPDATE_WORD_4(index) do { \ 
+  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index]); \ 
+  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 1]); \ 
+  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 2]); \ 
+  CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[index + 3]); \ 
+} while (0) 
+ 
+  if (bytes >= 4 * 4 * sizeof(size_t)) { 
+    end -= 4 * 4 * sizeof(size_t); 
+    do { 
+      CRC_UPDATE_WORD_4(4 * 0); 
+      CRC_UPDATE_WORD_4(4 * 1); 
+      CRC_UPDATE_WORD_4(4 * 2); 
+      CRC_UPDATE_WORD_4(4 * 3); 
+      src += 4 * 4 * sizeof(size_t); 
+    } while (src <= end); 
+    end += 4 * 4 * sizeof(size_t); 
+    bytes = static_cast<size_t>(end - src); 
+  } 
+ less_than_16_size_t: 
+ 
+  if (bytes >= 4 * 2 * sizeof(size_t)) { 
+    CRC_UPDATE_WORD_4(4 * 0); 
+    CRC_UPDATE_WORD_4(4 * 1); 
+    src += 4 * 2 * sizeof(size_t); 
+    bytes -= 4 * 2 * sizeof(size_t); 
+  } 
+ less_than_8_size_t: 
+ 
+  if (bytes >= 4 * sizeof(size_t)) { 
+    CRC_UPDATE_WORD_4(0); 
+    src += 4 * sizeof(size_t); 
+    bytes -= 4 * sizeof(size_t); 
+  } 
+ less_than_4_size_t: 
+ 
+  if (bytes >= 1 * sizeof(size_t)) { 
+    end -= 1 * sizeof(size_t); 
+    do { 
+      CRC_UPDATE_WORD(crc0, reinterpret_cast<const size_t *>(src)[0]); 
+      src += 1 * sizeof(size_t); 
+    } while (src <= end); 
+    end += 1 * sizeof(size_t); 
+  } 
+ 
+  while (src < end) { 
+    CRC_UPDATE_BYTE(crc0, src[0]); 
+    src += 1; 
+  } 
+ 
+  return (crc0 ^ Base().Canonize()); 
+} 
+ 
+ 
+void Crc32cSSE4::Init(bool constant) { 
+  base_.Init(FixedGeneratingPolynomial(), FixedDegree(), constant); 
+ 
+#define INIT_MUL_TABLE(block_size, num_stripes) do { \ 
+  size_t multiplier = \ 
+      Base().Xpow8N(CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes)); \ 
+  for (size_t table = 0; table < kNumTables; ++table) { \ 
+    for (size_t entry = 0; entry < kTableEntries; ++entry) { \ 
+      size_t value = static_cast<uint32>(entry << (kTableEntryBits * table)); \ 
+      CRC32C_SSE4_MUL_TABLE(block_size, num_stripes)[table][entry] = \ 
+            static_cast<Entry>(Base().Multiply(value, multiplier)); \ 
+    } \ 
+  } \ 
+} while (0) 
+ 
+  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(INIT_MUL_TABLE); 
+ 
+#undef INIT_MUL_TABLE 
+ 
+#if !CRCUTIL_USE_MM_CRC32 
+  for (size_t j = 0; j < sizeof(Word); ++j) { 
+    Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + 32); 
+    for (size_t i = 0; i < 256; ++i) { 
+      crc_word_[j][i] = Base().MultiplyUnnormalized(i, 8, k); 
+    } 
+  } 
+#endif  // !CRCUTIL_USE_MM_CRC32 
+} 
+ 
+ 
+bool Crc32cSSE4::IsSSE42Available() { 
+#if defined(_MSC_VER) 
+  int cpu_info[4]; 
+  __cpuid(cpu_info, 1); 
   return ((cpu_info[2] & (1 << 20)) != 0);
-#elif defined(__GNUC__) && (HAVE_AMD64 || HAVE_I386)
-  // Not using "cpuid.h" intentionally: it is missing from
-  // too many installations.
-  uint32 eax;
-  uint32 ecx;
-  uint32 edx;
-  __asm__ volatile(
-#if HAVE_I386 && defined(__PIC__)
+#elif defined(__GNUC__) && (HAVE_AMD64 || HAVE_I386) 
+  // Not using "cpuid.h" intentionally: it is missing from 
+  // too many installations. 
+  uint32 eax; 
+  uint32 ecx; 
+  uint32 edx; 
+  __asm__ volatile( 
+#if HAVE_I386 && defined(__PIC__) 
     "push %%ebx\n"
-    "cpuid\n"
+    "cpuid\n" 
     "pop %%ebx\n"
-#else
-    "cpuid\n"
-#endif  // HAVE_I386 && defined(__PIC__)
-    : "=a" (eax), "=c" (ecx), "=d" (edx)
-    : "a" (1), "2" (0)
-    : "%ebx"
-  );
-  return ((ecx & (1 << 20)) != 0);
-#else
-  return false;
-#endif
-}
-
-
-void RollingCrc32cSSE4::Init(const Crc32cSSE4 &crc,
-                             size_t roll_window_bytes,
-                             const Crc &start_value) {
-  crc_ = &crc;
-  roll_window_bytes_ = roll_window_bytes;
-  start_value_ = start_value;
-
-  Crc add = crc.Base().Canonize() ^ start_value;
-  add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes));
-  add ^= crc.Base().Canonize();
-  Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1);
-  add = crc.Base().Multiply(add, mul);
-
-  mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree());
-  for (size_t i = 0; i < 256; ++i) {
-    out_[i] = static_cast<Entry>(
-                  crc.Base().MultiplyUnnormalized(
-                      static_cast<Crc>(i), 8, mul) ^ add);
-  }
-
-#if !CRCUTIL_USE_MM_CRC32
-  memcpy(crc_word_, crc_->crc_word_, sizeof(crc_word_));
-#endif  // !CRCUTIL_USE_MM_CRC32
-}
-
-}  // namespace crcutil
-
-#endif  // HAVE_I386 || HAVE_AMD64
+#else 
+    "cpuid\n" 
+#endif  // HAVE_I386 && defined(__PIC__) 
+    : "=a" (eax), "=c" (ecx), "=d" (edx) 
+    : "a" (1), "2" (0) 
+    : "%ebx" 
+  ); 
+  return ((ecx & (1 << 20)) != 0); 
+#else 
+  return false; 
+#endif 
+} 
+ 
+ 
+void RollingCrc32cSSE4::Init(const Crc32cSSE4 &crc, 
+                             size_t roll_window_bytes, 
+                             const Crc &start_value) { 
+  crc_ = &crc; 
+  roll_window_bytes_ = roll_window_bytes; 
+  start_value_ = start_value; 
+ 
+  Crc add = crc.Base().Canonize() ^ start_value; 
+  add = crc.Base().Multiply(add, crc.Base().Xpow8N(roll_window_bytes)); 
+  add ^= crc.Base().Canonize(); 
+  Crc mul = crc.Base().One() ^ crc.Base().Xpow8N(1); 
+  add = crc.Base().Multiply(add, mul); 
+ 
+  mul = crc.Base().XpowN(8 * roll_window_bytes + crc.Base().Degree()); 
+  for (size_t i = 0; i < 256; ++i) { 
+    out_[i] = static_cast<Entry>( 
+                  crc.Base().MultiplyUnnormalized( 
+                      static_cast<Crc>(i), 8, mul) ^ add); 
+  } 
+ 
+#if !CRCUTIL_USE_MM_CRC32 
+  memcpy(crc_word_, crc_->crc_word_, sizeof(crc_word_)); 
+#endif  // !CRCUTIL_USE_MM_CRC32 
+} 
+ 
+}  // namespace crcutil 
+ 
+#endif  // HAVE_I386 || HAVE_AMD64 

+ 252 - 252
contrib/libs/crcutil/crc32c_sse4.h

@@ -1,252 +1,252 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Implements CRC32C using Intel's SSE4 crc32 instruction.
-// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero,
-// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise.
-
-#ifndef CRCUTIL_CRC32C_SSE4_H_
-#define CRCUTIL_CRC32C_SSE4_H_
-
-#include "gf_util.h"              // base types, gf_util class, etc.
-#include "crc32c_sse4_intrin.h"   // _mm_crc32_u* intrinsics
-
-#if HAVE_I386 || HAVE_AMD64
-
-#if CRCUTIL_USE_MM_CRC32
-
-#if HAVE_I386
-#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value)))
-#else
-#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value)))
-#endif  // HAVE_I386
-
-#define CRC_UPDATE_BYTE(crc, value) \
-    (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value)))
-
-#else
-
-#include "generic_crc.h"
-
-#define CRC_UPDATE_WORD(crc, value) do { \
-  size_t buf = (value); \
-  CRC_WORD(this, crc, buf); \
-} while (0)
-#define CRC_UPDATE_BYTE(crc, value) do { \
-  CRC_BYTE(this, crc, (value)); \
-} while (0)
-
-#endif  // CRCUTIL_USE_MM_CRC32
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// Since the same pieces should be parameterized in many different places
-// and we do not want to introduce a mistake which is rather hard to find,
-// use a macro to enumerate all block sizes.
-//
-// Block sizes and number of stripes were tuned for best performance.
-//
-// All constants should be literal constants (too lazy to fix the macro).
-//
-// The use of different "macro_first", "macro", and "macro_last"
-// allows generation of different code for smallest, in between,
-// and largest block sizes.
-//
-// This macro shall be kept in sync with
-// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING.
-// Failure to do so will cause compile-time error.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \
-    macro_smallest, macro, macro_largest) \
-  macro_smallest(512, 3); \
-  macro(1024, 3); \
-  macro(4096, 3); \
-  macro_largest(32768, 3)
-
-// This macro shall be kept in sync with
-// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING.
-// Failure to do so will cause compile-time error.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \
-    macro_smallest, macro, macro_largest) \
-  macro_largest(32768, 3); \
-  macro(4096, 3); \
-  macro(1024, 3); \
-  macro_smallest(512, 3)
-
-// Enumerates all block sizes.
-#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \
-  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro)
-
-#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \
-  (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1))
-
-#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \
-  (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes))
-
-#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-  mul_table_##block_size##_##num_blocks##_
-
-class RollingCrc32cSSE4;
-
-class Crc32cSSE4 {
- public:
-  // Exports Crc, TableEntry, and Word (needed by RollingCrc).
-  typedef size_t Crc;
-  typedef Crc Word;
-  typedef Crc TableEntry;
-
-  Crc32cSSE4() {}
-
-  // Initializes the tables given generating polynomial of degree (degree).
-  // If "canonical" is true, crc value will be XOR'ed with (-1) before and
-  // after actual CRC computation.
-  explicit Crc32cSSE4(bool canonical) {
-    Init(canonical);
-  }
-  void Init(bool canonical);
-
-  // Initializes the tables given generating polynomial of degree.
-  // If "canonical" is true, crc value will be XOR'ed with (-1) before and
-  // after actual CRC computation.
-  // Provided for compatibility with GenericCrc.
-  Crc32cSSE4(const Crc &generating_polynomial,
-            size_t degree,
-            bool canonical) {
-    Init(generating_polynomial, degree, canonical);
-  }
-  void Init(const Crc &generating_polynomial,
-            size_t degree,
-            bool canonical) {
-    if (generating_polynomial == FixedGeneratingPolynomial() &&
-        degree == FixedDegree()) {
-      Init(canonical);
-    }
-  }
-
-  // Returns fixed generating polymonial the class implements.
-  static Crc FixedGeneratingPolynomial() {
-    return 0x82f63b78;
-  }
-
-  // Returns degree of fixed generating polymonial the class implements.
-  static Crc FixedDegree() {
-    return 32;
-  }
-
-  // Returns base class.
-  const GfUtil<Crc> &Base() const { return base_; }
-
-  // Computes CRC32.
-  size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const {
-    return Crc32c(data, bytes, crc);
-  }
-
-  // Returns true iff crc32 instruction is available.
-  static bool IsSSE42Available();
-
- protected:
-  // Actual implementation.
-  size_t Crc32c(const void *data, size_t bytes, Crc crc) const;
-
-  enum {
-    kTableEntryBits = 8,
-    kTableEntries = 1 << kTableEntryBits,
-    kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits,
-    kNumTablesHalfLo = kNumTables / 2,
-    kNumTablesHalfHi = (kNumTables + 1) / 2,
-
-    kUnrolledLoopCount = 8,
-    kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t),
-  };
-
-  // May be set to size_t or uint32, whichever is faster.
-  typedef uint32 Entry;
-
-#define DECLARE_MUL_TABLE(block_size, num_stripes) \
-  Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \
-      [kNumTables][kTableEntries]
-
-  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE);
-
-#undef DECLARE_MUL_TABLE
-
-  GfUtil<Crc> base_;
-
-#if !CRCUTIL_USE_MM_CRC32
-  TableEntry crc_word_[sizeof(Word)][256];
-  friend class RollingCrc32cSSE4;
-#endif  // !CRCUTIL_USE_MM_CRC32
-} GCC_ALIGN_ATTRIBUTE(16);
-
-class RollingCrc32cSSE4 {
- public:
-  typedef Crc32cSSE4::Crc Crc;
-  typedef Crc32cSSE4::TableEntry TableEntry;
-  typedef Crc32cSSE4::Word Word;
-
-  RollingCrc32cSSE4() {}
-
-  // Initializes internal data structures.
-  // Retains reference to "crc" instance -- it is used by Start().
-  RollingCrc32cSSE4(const Crc32cSSE4 &crc,
-            size_t roll_window_bytes,
-            const Crc &start_value) {
-    Init(crc, roll_window_bytes, start_value);
-  }
-  void Init(const Crc32cSSE4 &crc,
-            size_t roll_window_bytes,
-            const Crc &start_value);
-
-  // Computes crc of "roll_window_bytes" using
-  // "start_value" of "crc" (see Init()).
-  Crc Start(const void *data) const {
-    return crc_->CrcDefault(data, roll_window_bytes_, start_value_);
-  }
-
-  // Computes CRC of "roll_window_bytes" starting in next position.
-  Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const {
-    Crc crc = old_crc;
-    CRC_UPDATE_BYTE(crc, byte_in);
-    crc ^= out_[byte_out];
-    return crc;
-  }
-
-  // Returns start value.
-  Crc StartValue() const { return start_value_; }
-
-  // Returns length of roll window.
-  size_t WindowBytes() const { return roll_window_bytes_; }
-
- protected:
-  typedef Crc Entry;
-  Entry out_[256];
-
-  // Used only by Start().
-  Crc start_value_;
-  const Crc32cSSE4 *crc_;
-  size_t roll_window_bytes_;
-
-#if !CRCUTIL_USE_MM_CRC32
-  TableEntry crc_word_[sizeof(Word)][256];
-#endif  // !CRCUTIL_USE_MM_CRC32
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#pragma pack(pop)
-
-}  // namespace crcutil
-
-#endif  // HAVE_I386 || HAVE_AMD64
-
-#endif  // CRCUTIL_CRC32C_SSE4_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Implements CRC32C using Intel's SSE4 crc32 instruction. 
+// Uses _mm_crc32_u64/32/8 intrinsics if CRCUTIL_USE_MM_CRC32 is not zero, 
+// emilates intrinsics via CRC_WORD/CRC_BYTE otherwise. 
+ 
+#ifndef CRCUTIL_CRC32C_SSE4_H_ 
+#define CRCUTIL_CRC32C_SSE4_H_ 
+ 
+#include "gf_util.h"              // base types, gf_util class, etc. 
+#include "crc32c_sse4_intrin.h"   // _mm_crc32_u* intrinsics 
+ 
+#if HAVE_I386 || HAVE_AMD64 
+ 
+#if CRCUTIL_USE_MM_CRC32 
+ 
+#if HAVE_I386 
+#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u32(crc, (value))) 
+#else 
+#define CRC_UPDATE_WORD(crc, value) (crc = _mm_crc32_u64(crc, (value))) 
+#endif  // HAVE_I386 
+ 
+#define CRC_UPDATE_BYTE(crc, value) \ 
+    (crc = _mm_crc32_u8(static_cast<uint32>(crc), static_cast<uint8>(value))) 
+ 
+#else 
+ 
+#include "generic_crc.h" 
+ 
+#define CRC_UPDATE_WORD(crc, value) do { \ 
+  size_t buf = (value); \ 
+  CRC_WORD(this, crc, buf); \ 
+} while (0) 
+#define CRC_UPDATE_BYTE(crc, value) do { \ 
+  CRC_BYTE(this, crc, (value)); \ 
+} while (0) 
+ 
+#endif  // CRCUTIL_USE_MM_CRC32 
+ 
+namespace crcutil { 
+ 
+#pragma pack(push, 16) 
+ 
+// Since the same pieces should be parameterized in many different places 
+// and we do not want to introduce a mistake which is rather hard to find, 
+// use a macro to enumerate all block sizes. 
+// 
+// Block sizes and number of stripes were tuned for best performance. 
+// 
+// All constants should be literal constants (too lazy to fix the macro). 
+// 
+// The use of different "macro_first", "macro", and "macro_last" 
+// allows generation of different code for smallest, in between, 
+// and largest block sizes. 
+// 
+// This macro shall be kept in sync with 
+// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING. 
+// Failure to do so will cause compile-time error. 
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING( \ 
+    macro_smallest, macro, macro_largest) \ 
+  macro_smallest(512, 3); \ 
+  macro(1024, 3); \ 
+  macro(4096, 3); \ 
+  macro_largest(32768, 3) 
+ 
+// This macro shall be kept in sync with 
+// CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING. 
+// Failure to do so will cause compile-time error. 
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_DESCENDING( \ 
+    macro_smallest, macro, macro_largest) \ 
+  macro_largest(32768, 3); \ 
+  macro(4096, 3); \ 
+  macro(1024, 3); \ 
+  macro_smallest(512, 3) 
+ 
+// Enumerates all block sizes. 
+#define CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(macro) \ 
+  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS_ASCENDING(macro, macro, macro) 
+ 
+#define CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) \ 
+  (((block_size) / (num_stripes)) & ~(sizeof(size_t) - 1)) 
+ 
+#define CRC32C_SSE4_BLOCK_SIZE(block_size, num_stripes) \ 
+  (CRC32C_SSE4_STRIPE_SIZE(block_size, num_stripes) * (num_stripes)) 
+ 
+#define CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+  mul_table_##block_size##_##num_blocks##_ 
+ 
+class RollingCrc32cSSE4; 
+ 
+class Crc32cSSE4 { 
+ public: 
+  // Exports Crc, TableEntry, and Word (needed by RollingCrc). 
+  typedef size_t Crc; 
+  typedef Crc Word; 
+  typedef Crc TableEntry; 
+ 
+  Crc32cSSE4() {} 
+ 
+  // Initializes the tables given generating polynomial of degree (degree). 
+  // If "canonical" is true, crc value will be XOR'ed with (-1) before and 
+  // after actual CRC computation. 
+  explicit Crc32cSSE4(bool canonical) { 
+    Init(canonical); 
+  } 
+  void Init(bool canonical); 
+ 
+  // Initializes the tables given generating polynomial of degree. 
+  // If "canonical" is true, crc value will be XOR'ed with (-1) before and 
+  // after actual CRC computation. 
+  // Provided for compatibility with GenericCrc. 
+  Crc32cSSE4(const Crc &generating_polynomial, 
+            size_t degree, 
+            bool canonical) { 
+    Init(generating_polynomial, degree, canonical); 
+  } 
+  void Init(const Crc &generating_polynomial, 
+            size_t degree, 
+            bool canonical) { 
+    if (generating_polynomial == FixedGeneratingPolynomial() && 
+        degree == FixedDegree()) { 
+      Init(canonical); 
+    } 
+  } 
+ 
+  // Returns fixed generating polymonial the class implements. 
+  static Crc FixedGeneratingPolynomial() { 
+    return 0x82f63b78; 
+  } 
+ 
+  // Returns degree of fixed generating polymonial the class implements. 
+  static Crc FixedDegree() { 
+    return 32; 
+  } 
+ 
+  // Returns base class. 
+  const GfUtil<Crc> &Base() const { return base_; } 
+ 
+  // Computes CRC32. 
+  size_t CrcDefault(const void *data, size_t bytes, const Crc &crc) const { 
+    return Crc32c(data, bytes, crc); 
+  } 
+ 
+  // Returns true iff crc32 instruction is available. 
+  static bool IsSSE42Available(); 
+ 
+ protected: 
+  // Actual implementation. 
+  size_t Crc32c(const void *data, size_t bytes, Crc crc) const; 
+ 
+  enum { 
+    kTableEntryBits = 8, 
+    kTableEntries = 1 << kTableEntryBits, 
+    kNumTables = (32 + kTableEntryBits - 1) / kTableEntryBits, 
+    kNumTablesHalfLo = kNumTables / 2, 
+    kNumTablesHalfHi = (kNumTables + 1) / 2, 
+ 
+    kUnrolledLoopCount = 8, 
+    kUnrolledLoopBytes = kUnrolledLoopCount * sizeof(size_t), 
+  }; 
+ 
+  // May be set to size_t or uint32, whichever is faster. 
+  typedef uint32 Entry; 
+ 
+#define DECLARE_MUL_TABLE(block_size, num_stripes) \ 
+  Entry CRC32C_SSE4_MUL_TABLE(block_size, num_stripes) \ 
+      [kNumTables][kTableEntries] 
+ 
+  CRC32C_SSE4_ENUMERATE_ALL_BLOCKS(DECLARE_MUL_TABLE); 
+ 
+#undef DECLARE_MUL_TABLE 
+ 
+  GfUtil<Crc> base_; 
+ 
+#if !CRCUTIL_USE_MM_CRC32 
+  TableEntry crc_word_[sizeof(Word)][256]; 
+  friend class RollingCrc32cSSE4; 
+#endif  // !CRCUTIL_USE_MM_CRC32 
+} GCC_ALIGN_ATTRIBUTE(16); 
+ 
+class RollingCrc32cSSE4 { 
+ public: 
+  typedef Crc32cSSE4::Crc Crc; 
+  typedef Crc32cSSE4::TableEntry TableEntry; 
+  typedef Crc32cSSE4::Word Word; 
+ 
+  RollingCrc32cSSE4() {} 
+ 
+  // Initializes internal data structures. 
+  // Retains reference to "crc" instance -- it is used by Start(). 
+  RollingCrc32cSSE4(const Crc32cSSE4 &crc, 
+            size_t roll_window_bytes, 
+            const Crc &start_value) { 
+    Init(crc, roll_window_bytes, start_value); 
+  } 
+  void Init(const Crc32cSSE4 &crc, 
+            size_t roll_window_bytes, 
+            const Crc &start_value); 
+ 
+  // Computes crc of "roll_window_bytes" using 
+  // "start_value" of "crc" (see Init()). 
+  Crc Start(const void *data) const { 
+    return crc_->CrcDefault(data, roll_window_bytes_, start_value_); 
+  } 
+ 
+  // Computes CRC of "roll_window_bytes" starting in next position. 
+  Crc Roll(const Crc &old_crc, size_t byte_out, size_t byte_in) const { 
+    Crc crc = old_crc; 
+    CRC_UPDATE_BYTE(crc, byte_in); 
+    crc ^= out_[byte_out]; 
+    return crc; 
+  } 
+ 
+  // Returns start value. 
+  Crc StartValue() const { return start_value_; } 
+ 
+  // Returns length of roll window. 
+  size_t WindowBytes() const { return roll_window_bytes_; } 
+ 
+ protected: 
+  typedef Crc Entry; 
+  Entry out_[256]; 
+ 
+  // Used only by Start(). 
+  Crc start_value_; 
+  const Crc32cSSE4 *crc_; 
+  size_t roll_window_bytes_; 
+ 
+#if !CRCUTIL_USE_MM_CRC32 
+  TableEntry crc_word_[sizeof(Word)][256]; 
+#endif  // !CRCUTIL_USE_MM_CRC32 
+} GCC_ALIGN_ATTRIBUTE(16); 
+ 
+#pragma pack(pop) 
+ 
+}  // namespace crcutil 
+ 
+#endif  // HAVE_I386 || HAVE_AMD64 
+ 
+#endif  // CRCUTIL_CRC32C_SSE4_H_ 

+ 99 - 99
contrib/libs/crcutil/crc32c_sse4_intrin.h

@@ -1,99 +1,99 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Provides _mm_crc32_u64/32/8 intrinsics.
-
-#ifndef CRCUTIL_CRC32C_SSE4_INTRIN_H_
-#define CRCUTIL_CRC32C_SSE4_INTRIN_H_
-
-#include "platform.h"
-#include "base_types.h"
-
-#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
-
-#if defined(_MSC_VER) || defined(__SSE4_2__)
-
-#if defined(_MSC_VER)
-#pragma warning(push)
-// '_M_IA64' is not defined as a preprocessor macro
-#pragma warning(disable: 4668)
-#endif  // defined(_MSC_VER)
-
-#include <nmmintrin.h>
-
-#if defined(_MSC_VER)
-#pragma warning(pop)
-#endif  // defined(_MSC_VER)
-
-#elif GCC_VERSION_AVAILABLE(4, 5) && !defined(CRCUTIL_FORCE_ASM_CRC32C)
-// Allow the use of _mm_crc32_u* intrinsic when CRCUTIL_USE_MM_CRC32
-// is set irrespective of "-msse*" settings. This way, the sources
-// may be compiled with "-msse2 -mcrc32" and work on older CPUs,
-// while taking full advantage of "crc32" instruction on newer
-// CPUs (requires dynamic CPU detection). See "interface.cc".
-//
-// If neither -msse4 or -mcrc32 is provided and CRCUTIL_USE_MM_CRC32 is set
-// and CRCUTIL_FORCE_ASM_CRC32 is not set, compile-time error will happen.
-// Why? Becuase GCC disables __builtin_ia32_crc32* intrinsics when compiled
-// without -msse4 or -mcrc32. -msse4 could be detected at run time by checking
-// whether __SSE4_2__ is defined, but there is no way to tell whether the
-// sources are compiled with -mcrc32.
-
-extern __inline unsigned int __attribute__((
-    __gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u8(unsigned int __C, unsigned char __V) {
-  return __builtin_ia32_crc32qi(__C, __V);
-}
-#ifdef __x86_64__
-extern __inline unsigned long long __attribute__((
-    __gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u64(unsigned long long __C, unsigned long long __V) {
-  return __builtin_ia32_crc32di(__C, __V);
-}
-#else
-extern __inline unsigned int __attribute__((
-    __gnu_inline__, __always_inline__, __artificial__))
-_mm_crc32_u32(unsigned int __C, unsigned int __V) {
-  return __builtin_ia32_crc32si (__C, __V);
-}
-#endif  // __x86_64__
-
-#else
-
-// GCC 4.4.x and earlier: use inline asm.
-
-namespace crcutil {
-
-__forceinline uint64 _mm_crc32_u64(uint64 crc, uint64 value) {
-  asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
-  return crc;
-}
-
-__forceinline uint32 _mm_crc32_u32(uint32 crc, uint64 value) {
-  asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
-  return crc;
-}
-
-__forceinline uint32 _mm_crc32_u8(uint32 crc, uint8 value) {
-  asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value));
-  return crc;
-}
-
-}  // namespace crcutil
-
-#endif
-
-#endif  // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
-
-#endif  // CRCUTIL_CRC32C_SSE4_INTRIN_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Provides _mm_crc32_u64/32/8 intrinsics. 
+ 
+#ifndef CRCUTIL_CRC32C_SSE4_INTRIN_H_ 
+#define CRCUTIL_CRC32C_SSE4_INTRIN_H_ 
+ 
+#include "platform.h" 
+#include "base_types.h" 
+ 
+#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) 
+ 
+#if defined(_MSC_VER) || defined(__SSE4_2__) 
+ 
+#if defined(_MSC_VER) 
+#pragma warning(push) 
+// '_M_IA64' is not defined as a preprocessor macro 
+#pragma warning(disable: 4668) 
+#endif  // defined(_MSC_VER) 
+ 
+#include <nmmintrin.h> 
+ 
+#if defined(_MSC_VER) 
+#pragma warning(pop) 
+#endif  // defined(_MSC_VER) 
+ 
+#elif GCC_VERSION_AVAILABLE(4, 5) && !defined(CRCUTIL_FORCE_ASM_CRC32C) 
+// Allow the use of _mm_crc32_u* intrinsic when CRCUTIL_USE_MM_CRC32 
+// is set irrespective of "-msse*" settings. This way, the sources 
+// may be compiled with "-msse2 -mcrc32" and work on older CPUs, 
+// while taking full advantage of "crc32" instruction on newer 
+// CPUs (requires dynamic CPU detection). See "interface.cc". 
+// 
+// If neither -msse4 or -mcrc32 is provided and CRCUTIL_USE_MM_CRC32 is set 
+// and CRCUTIL_FORCE_ASM_CRC32 is not set, compile-time error will happen. 
+// Why? Becuase GCC disables __builtin_ia32_crc32* intrinsics when compiled 
+// without -msse4 or -mcrc32. -msse4 could be detected at run time by checking 
+// whether __SSE4_2__ is defined, but there is no way to tell whether the 
+// sources are compiled with -mcrc32. 
+ 
+extern __inline unsigned int __attribute__(( 
+    __gnu_inline__, __always_inline__, __artificial__)) 
+_mm_crc32_u8(unsigned int __C, unsigned char __V) { 
+  return __builtin_ia32_crc32qi(__C, __V); 
+} 
+#ifdef __x86_64__ 
+extern __inline unsigned long long __attribute__(( 
+    __gnu_inline__, __always_inline__, __artificial__)) 
+_mm_crc32_u64(unsigned long long __C, unsigned long long __V) { 
+  return __builtin_ia32_crc32di(__C, __V); 
+} 
+#else 
+extern __inline unsigned int __attribute__(( 
+    __gnu_inline__, __always_inline__, __artificial__)) 
+_mm_crc32_u32(unsigned int __C, unsigned int __V) { 
+  return __builtin_ia32_crc32si (__C, __V); 
+} 
+#endif  // __x86_64__ 
+ 
+#else 
+ 
+// GCC 4.4.x and earlier: use inline asm. 
+ 
+namespace crcutil { 
+ 
+__forceinline uint64 _mm_crc32_u64(uint64 crc, uint64 value) { 
+  asm("crc32q %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); 
+  return crc; 
+} 
+ 
+__forceinline uint32 _mm_crc32_u32(uint32 crc, uint64 value) { 
+  asm("crc32l %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); 
+  return crc; 
+} 
+ 
+__forceinline uint32 _mm_crc32_u8(uint32 crc, uint8 value) { 
+  asm("crc32b %[value], %[crc]\n" : [crc] "+r" (crc) : [value] "rm" (value)); 
+  return crc; 
+} 
+ 
+}  // namespace crcutil 
+ 
+#endif 
+ 
+#endif  // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) 
+ 
+#endif  // CRCUTIL_CRC32C_SSE4_INTRIN_H_ 

+ 68 - 68
contrib/libs/crcutil/crc_casts.h

@@ -1,68 +1,68 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Casting between integers and compound CRC types.
-
-#ifndef CRCUTIL_CRC_CASTS_H_
-#define CRCUTIL_CRC_CASTS_H_
-
-#include "base_types.h"   // uint8, uint64
-#include "platform.h"     // __forceinline
-
-namespace crcutil {
-
-// Downcasts a value of (oftentimes larger) Crc type to (smaller base integer)
-// Result type, enabling specialized downcasts implemented by "large integer"
-// classes (e.g. uint128_sse2).
-template<typename Crc, typename Result>
-__forceinline Result Downcast(const Crc &x) {
-  return static_cast<Result>(x);
-}
-
-// Extracts 8 least significant bits from a value of Crc type.
-#define TO_BYTE(x) Downcast<Crc, uint8>(x)
-
-// Converts a pair of uint64 bit values into single value of CRC type.
-// It is caller's responsibility to ensure that the input is correct.
-template<typename Crc>
-__forceinline Crc CrcFromUint64(uint64 lo, uint64 hi = 0) {
-  if (sizeof(Crc) <= sizeof(lo)) {
-    return static_cast<Crc>(lo);
-  } else {
-    // static_cast to keep compiler happy.
-    Crc result = static_cast<Crc>(hi);
-    result = SHIFT_LEFT_SAFE(result, 8 * sizeof(lo));
-    result ^= lo;
-    return result;
-  }
-}
-
-// Converts Crc value to a pair of uint64 values.
-template<typename Crc>
-__forceinline void Uint64FromCrc(const Crc &crc,
-                                 uint64 *lo, uint64 *hi = NULL) {
-  if (sizeof(*lo) >= sizeof(crc)) {
-    *lo = Downcast<Crc, uint64>(crc);
-    if (hi != NULL) {
-      *hi = 0;
-    }
-  } else {
-    *lo = Downcast<Crc, uint64>(crc);
-    *hi = Downcast<Crc, uint64>(SHIFT_RIGHT_SAFE(crc, 8 * sizeof(lo)));
-  }
-}
-
-}  // namespace crcutil
-
-#endif  // CRCUTIL_CRC_CASTS_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Casting between integers and compound CRC types. 
+ 
+#ifndef CRCUTIL_CRC_CASTS_H_ 
+#define CRCUTIL_CRC_CASTS_H_ 
+ 
+#include "base_types.h"   // uint8, uint64 
+#include "platform.h"     // __forceinline 
+ 
+namespace crcutil { 
+ 
+// Downcasts a value of (oftentimes larger) Crc type to (smaller base integer) 
+// Result type, enabling specialized downcasts implemented by "large integer" 
+// classes (e.g. uint128_sse2). 
+template<typename Crc, typename Result> 
+__forceinline Result Downcast(const Crc &x) { 
+  return static_cast<Result>(x); 
+} 
+ 
+// Extracts 8 least significant bits from a value of Crc type. 
+#define TO_BYTE(x) Downcast<Crc, uint8>(x) 
+ 
+// Converts a pair of uint64 bit values into single value of CRC type. 
+// It is caller's responsibility to ensure that the input is correct. 
+template<typename Crc> 
+__forceinline Crc CrcFromUint64(uint64 lo, uint64 hi = 0) { 
+  if (sizeof(Crc) <= sizeof(lo)) { 
+    return static_cast<Crc>(lo); 
+  } else { 
+    // static_cast to keep compiler happy. 
+    Crc result = static_cast<Crc>(hi); 
+    result = SHIFT_LEFT_SAFE(result, 8 * sizeof(lo)); 
+    result ^= lo; 
+    return result; 
+  } 
+} 
+ 
+// Converts Crc value to a pair of uint64 values. 
+template<typename Crc> 
+__forceinline void Uint64FromCrc(const Crc &crc, 
+                                 uint64 *lo, uint64 *hi = NULL) { 
+  if (sizeof(*lo) >= sizeof(crc)) { 
+    *lo = Downcast<Crc, uint64>(crc); 
+    if (hi != NULL) { 
+      *hi = 0; 
+    } 
+  } else { 
+    *lo = Downcast<Crc, uint64>(crc); 
+    *hi = Downcast<Crc, uint64>(SHIFT_RIGHT_SAFE(crc, 8 * sizeof(lo))); 
+  } 
+} 
+ 
+}  // namespace crcutil 
+ 
+#endif  // CRCUTIL_CRC_CASTS_H_ 

+ 687 - 687
contrib/libs/crcutil/generic_crc.h

@@ -1,687 +1,687 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Defines GenericCrc class which implements arbitrary CRCs.
-//
-// Please read crc.pdf to understand how it all works.
-
-#ifndef CRCUTIL_GENERIC_CRC_H_
-#define CRCUTIL_GENERIC_CRC_H_
-
-#include "base_types.h"     // uint8
-#include "crc_casts.h"      // TO_BYTE(), Downcast<>.
-#include "gf_util.h"        // GfUtil<Crc> class.
-#include "platform.h"       // GCC_ALIGN_ATTRIBUTE(16)
-#include "uint128_sse2.h"   // uint128_sse2 type (if necessary)
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// Extends CRC by one byte.
-// Technically, if degree of a polynomial does not exceed 8,
-// right shift by 8 bits is not required, but who cares about CRC-8?
-#define CRC_BYTE(table, crc, byte) do { \
-  crc = ((sizeof(crc) > 1) ? SHIFT_RIGHT_SAFE(crc, 8) : 0) ^ \
-        table->crc_word_[sizeof(Word) - 1][TO_BYTE(crc) ^ (byte)]; \
-} while (0)
-
-#define TABLE_ENTRY(table, byte, buf) \
-  table[byte][Downcast<Word, uint8>(buf)]
-
-#define TABLE_ENTRY_LAST(table, buf) \
-  table[sizeof(Word) - 1][buf]
-
-// Extends CRC by one word.
-#define CRC_WORD(table, crc, buf) do { \
-  buf ^= Downcast<Crc, Word>(crc); \
-  if (sizeof(crc) > sizeof(buf)) { \
-    crc = SHIFT_RIGHT_SAFE(crc, sizeof(buf) * 8); \
-    crc ^= TABLE_ENTRY(table->crc_word_, 0, buf); \
-  } else { \
-    crc = TABLE_ENTRY(table->crc_word_, 0, buf); \
-  } \
-  buf >>= 8; \
-  for (size_t byte = 1; byte < sizeof(buf) - 1; ++byte) { \
-    crc ^= TABLE_ENTRY(table->crc_word_, byte, buf); \
-    buf >>= 8; \
-  } \
-  crc ^= TABLE_ENTRY_LAST(table->crc_word_, buf); \
-} while (0)
-
-// Process beginning of data block byte by byte until source pointer
-// becomes perfectly aligned on Word boundary.
-#define ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word) do { \
-  while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) { \
-    if (src >= end) { \
-      return (crc ^ table->Base().Canonize()); \
-    } \
-    CRC_BYTE(table, crc, *src); \
-    src += 1; \
-  } \
-} while (0)
-
-
-// On amd64, enforcing alignment is 2-4% slower on small (<= 64 bytes) blocks
-// but 6-10% faster on larger blocks (>= 2KB).
-// Break-even point (+-1%) is around 1KB (Q9650, E6600).
-//
-#define ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, table, src, end, crc, Word) \
-do { \
-  if (sizeof(Word) > 8 || (bytes) > CRCUTIL_MIN_ALIGN_SIZE) { \
-    ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word); \
-  } \
-} while (0)
-
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable: 4127)  // conditional expression is constant
-#endif  // defined(_MSC_VER)
-
-// Forward declarations.
-template<typename CrcImplementation> class RollingCrc;
-
-// Crc        is the type used internally and to return values of N-bit CRC.
-//            It should be at least as large as "TableEntry" and "Word" but
-//            may be larger (e.g. for 16-bit CRC, TableEntry and Word may be
-//            set to uint16 but Crc may be set to uint32).
-//
-// TableEntry is the type of values stored in the tables.
-//            To implement N-bit CRC, TableEntry should be large enough
-//            to store N bits.
-//
-// Word       is the type used to read data sizeof(Word) at a time.
-//            Ideally, it shoulde be "most suitable for given architecture"
-//            integer type -- typically "size_t".
-//
-// kStride    is the number of words processed in interleaved manner by
-//            CrcMultiword() and CrcWordblock(). Shall be either 3 or 4.
-//            Optimal value depends on hardware architecture (AMD64, ARM, etc).
-//
-template<typename _Crc, typename _TableEntry, typename _Word, int kStride>
-    class GenericCrc {
- public:
-  // Make Crc, TableEntry, and Word types visible (used by RollingCrc etc.)
-  typedef _Crc Crc;
-  typedef _TableEntry TableEntry;
-  typedef _Word Word;
-
-  GenericCrc() {}
-
-  // Initializes the tables given generating polynomial of degree.
-  // If "canonical" is true, crc value will be XOR'ed with (-1) before and
-  // after actual CRC computation.
-  GenericCrc(const Crc &generating_polynomial, size_t degree, bool canonical) {
-    Init(generating_polynomial, degree, canonical);
-  }
-  void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
-    base_.Init(generating_polynomial, degree, canonical);
-
-    // Instead of computing
-    //    table[j][i] = MultiplyUnnormalized(i, 8, k),
-    // for all i = 0...255, we may notice that
-    // if i = 2**n then for all m = 1...(i-1)
-    // MultiplyUnnormalized(i + m, 8, k) =
-    //    MultiplyUnnormalized(i ^ m, 8, k) =
-    //    MultiplyUnnormalized(i, 8, k) ^ MultiplyUnnormalized(m, 8, k) =
-    //    MultiplyUnnormalized(i, 8, k) ^ crc_word_interleaved[j][m] =
-    //    table[i] ^ table[m].
-#if 0
-    for (size_t j = 0; j < sizeof(Word); ++j) {
-      Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
-      for (size_t i = 0; i < 256; ++i) {
-        Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
-        this->crc_word_interleaved_[j][i] = Downcast<Crc, TableEntry>(temp);
-      }
-    }
-#else
-    for (size_t j = 0; j < sizeof(Word); ++j) {
-      Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree);
-      TableEntry *table = this->crc_word_interleaved_[j];
-      table[0] = 0;  // Init 0s entry -- multiply 0 by anything yields 0.
-      for (size_t i = 1; i < 256; i <<= 1) {
-        TableEntry value = Downcast<Crc, TableEntry>(
-            Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
-        table[i] = value;
-        for (size_t m = 1; m < i; ++m) {
-          table[i + m] = value ^ table[m];
-        }
-      }
-    }
-#endif
-
-#if 0
-    for (size_t j = 0; j < sizeof(Word); ++j) {
-      Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
-      for (size_t i = 0; i < 256; ++i) {
-        Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k);
-        this->crc_word_[j][i] = Downcast<Crc, TableEntry>(temp);
-      }
-    }
-#else
-    for (size_t j = 0; j < sizeof(Word); ++j) {
-      Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree);
-      TableEntry *table = this->crc_word_[j];
-      table[0] = 0;  // Init 0s entry -- multiply 0 by anything yields 0.
-      for (size_t i = 1; i < 256; i <<= 1) {
-        TableEntry value = Downcast<Crc, TableEntry>(
-            Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k));
-        table[i] = value;
-        for (size_t m = 1; m < i; ++m) {
-          table[i + m] = value ^ table[m];
-        }
-      }
-    }
-#endif
-  }
-
-  // Default CRC implementation
-  Crc CrcDefault(const void *data, size_t bytes, const Crc &start) const {
-#if HAVE_AMD64 || HAVE_I386
-    return CrcMultiword(data, bytes, start);
-#else
-    // Very few CPUs have multiple ALUs and speculative execution
-    // (Itanium is an exception) so sophisticated algorithms will
-    // not perform better than good old Sarwate algorithm.
-    return CrcByteUnrolled(data, bytes, start);
-#endif  // HAVE_AMD64 || HAVE_I386
-  }
-
-  // Returns base class.
-  const GfUtil<Crc> &Base() const { return base_; }
-
- protected:
-  // Canonical, byte-by-byte CRC computation.
-  Crc CrcByte(const void *data, size_t bytes, const Crc &start) const {
-    const uint8 *src = static_cast<const uint8 *>(data);
-    Crc crc = start ^ Base().Canonize();
-    for (const uint8 *end = src + bytes; src < end; ++src) {
-      CRC_BYTE(this, crc, *src);
-    }
-    return (crc ^ Base().Canonize());
-  }
-
-  // Byte-by-byte CRC with main loop unrolled.
-  Crc CrcByteUnrolled(const void *data, size_t bytes, const Crc &start) const {
-    if (bytes == 0) {
-      return start;
-    }
-
-    const uint8 *src = static_cast<const uint8 *>(data);
-    const uint8 *end = src + bytes;
-    Crc crc = start ^ Base().Canonize();
-
-    // Unroll loop 4 times.
-    end -= 3;
-    for (; src < end; src += 4) {
-      PREFETCH(src);
-      CRC_BYTE(this, crc, src[0]);
-      CRC_BYTE(this, crc, src[1]);
-      CRC_BYTE(this, crc, src[2]);
-      CRC_BYTE(this, crc, src[3]);
-    }
-    end += 3;
-
-    // Compute CRC of remaining bytes.
-    for (; src < end; ++src) {
-      CRC_BYTE(this, crc, *src);
-    }
-
-    return (crc ^ Base().Canonize());
-  }
-
-  // Canonical, byte-by-byte CRC computation.
-  Crc CrcByteWord(const void *data, size_t bytes, const Crc &start) const {
-    const uint8 *src = static_cast<const uint8 *>(data);
-    const uint8 *end = src + bytes;
-    Crc crc0 = start ^ Base().Canonize();
-
-    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Crc);
-    if (src >= end) {
-      return (crc0 ^ Base().Canonize());
-    }
-
-    // Process 4*sizeof(Crc) bytes at a time.
-    end -= 4 * sizeof(Crc) - 1;
-    for (; src < end; src += 4 * sizeof(Crc)) {
-      for (size_t i = 0; i < 4; ++i) {
-        crc0 ^= reinterpret_cast<const Crc *>(src)[i];
-        if (i == 0) {
-          PREFETCH(src);
-        }
-        for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
-          CRC_BYTE(this, crc0, 0);
-        }
-      }
-    }
-    end += 4 * sizeof(Crc) - 1;
-
-    // Process sizeof(Crc) bytes at a time.
-    end -= sizeof(Crc) - 1;
-    for (; src < end; src += sizeof(Crc)) {
-      crc0 ^= reinterpret_cast<const Crc *>(src)[0];
-      for (size_t byte = 0; byte < sizeof(crc0); ++byte) {
-        CRC_BYTE(this, crc0, 0);
-      }
-    }
-    end += sizeof(Crc) - 1;
-
-    // Compute CRC of remaining bytes.
-    for (;src < end; ++src) {
-      CRC_BYTE(this, crc0, *src);
-    }
-
-    return (crc0 ^ Base().Canonize());
-  }
-
-  // Faster, word-by-word CRC.
-  Crc CrcWord(const void *data, size_t bytes, const Crc &start) const {
-    const uint8 *src = static_cast<const uint8 *>(data);
-    const uint8 *end = src + bytes;
-    Crc crc0 = start ^ Base().Canonize();
-
-    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
-    if (src >= end) {
-      return (crc0 ^ Base().Canonize());
-    }
-
-    // Process 4 sizeof(Word) bytes at once.
-    end -= 4 * sizeof(Word) - 1;
-    for (; src < end; src += 4 * sizeof(Word)) {
-      Word buf0 = reinterpret_cast<const Word *>(src)[0];
-      PREFETCH(src);
-      CRC_WORD(this, crc0, buf0);
-      buf0 = reinterpret_cast<const Word *>(src)[1];
-      CRC_WORD(this, crc0, buf0);
-      buf0 = reinterpret_cast<const Word *>(src)[2];
-      CRC_WORD(this, crc0, buf0);
-      buf0 = reinterpret_cast<const Word *>(src)[3];
-      CRC_WORD(this, crc0, buf0);
-    }
-    end += 4 * sizeof(Word) - 1;
-
-    // Process sizeof(Word) bytes at a time.
-    end -= sizeof(Word) - 1;
-    for (; src < end; src += sizeof(Word)) {
-      Word buf0 = reinterpret_cast<const Word *>(src)[0];
-      CRC_WORD(this, crc0, buf0);
-    }
-    end += sizeof(Word) - 1;
-
-    // Compute CRC of remaining bytes.
-    for (;src < end; ++src) {
-      CRC_BYTE(this, crc0, *src);
-    }
-
-    return (crc0 ^ Base().Canonize());
-  }
-
-#define REPEAT_FROM_1(macro) \
-  macro(1); \
-  macro(2); \
-  macro(3); \
-  macro(4); \
-  macro(5); \
-  macro(6); \
-  macro(7);
-
-#define REPEAT_FROM_0(macro) \
-  macro(0); \
-  REPEAT_FROM_1(macro)
-
-  // Faster, process adjusent blocks in parallel and concatenate CRCs.
-  Crc CrcBlockword(const void *data, size_t bytes, const Crc &start) const {
-    if (kStride < 2 || kStride > 8) {
-      // Unsupported configuration;
-      // fall back to something sensible.
-      return CrcWord(data, bytes, start);
-    }
-
-    const uint8 *src = static_cast<const uint8 *>(data);
-    const uint8 *end = src + bytes;
-    Crc crc0 = start ^ Base().Canonize();
-    enum {
-      // Add 16 to avoid false L1 cache collisions.
-      kStripe = (15*1024 + 16) & ~(sizeof(Word) - 1),
-    };
-
-    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
-    if (src >= end) {
-      return (crc0 ^ Base().Canonize());
-    }
-
-    end -= kStride * kStripe - 1;
-    if (src < end) {
-      Crc x_pow_8kStripe = Base().Xpow8N(kStripe);
-      do {
-        const uint8 *stripe_end = src + kStripe;
-
-#define INIT_CRC(reg) \
-        Crc crc##reg; \
-        if (kStride >= reg) { \
-          crc##reg = 0; \
-        }
-        REPEAT_FROM_1(INIT_CRC);
-#undef INIT_CRC
-
-        do {
-#define FIRST(reg) \
-          Word buf##reg; \
-          if (kStride > reg) { \
-            buf##reg = reinterpret_cast<const Word *>(src + reg * kStripe)[0]; \
-            buf##reg ^= Downcast<Crc, Word>(crc##reg); \
-            if (sizeof(crc##reg) > sizeof(buf##reg)) { \
-              crc##reg = SHIFT_RIGHT_SAFE(crc##reg, sizeof(buf##reg) * 8); \
-              crc##reg ^= TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
-            } else { \
-              crc##reg = TABLE_ENTRY(this->crc_word_, 0, buf##reg); \
-            } \
-            buf##reg >>= 8; \
-          }
-          REPEAT_FROM_0(FIRST);
-#undef FIRST
-
-          for (size_t byte = 1; byte < sizeof(buf0) - 1; ++byte) {
-#define NEXT(reg) do { \
-            if (kStride > reg) { \
-              crc##reg ^= TABLE_ENTRY(this->crc_word_, byte, buf##reg); \
-              buf##reg >>= 8; \
-            } \
-} while (0)
-            REPEAT_FROM_0(NEXT);
-#undef NEXT
-          }
-
-#define LAST(reg) do { \
-          if (kStride > reg) { \
-            crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_, buf##reg); \
-          } \
-} while (0)
-          REPEAT_FROM_0(LAST);
-#undef LAST
-
-          src += sizeof(Word);
-        } while (src < stripe_end);
-
-#if 0
-// The code is left for illustrational purposes only.
-#define COMBINE(reg) do { \
-        if (reg > 0 && kStride > reg) { \
-          crc0 = Base().ChangeStartValue(crc##reg, kStripe, 0, crc0); \
-        } \
-} while (0)
-#else
-#define COMBINE(reg) do { \
-        if (reg > 0 && kStride > reg) { \
-          crc0 = crc##reg ^ Base().Multiply(crc0, x_pow_8kStripe); \
-        } \
-} while (0)
-#endif
-        REPEAT_FROM_0(COMBINE);
-#undef COMBINE
-
-        src += (kStride - 1) * kStripe;
-      }
-      while (src < end);
-    }
-    end += kStride * kStripe - 1;
-
-    // Process sizeof(Word) bytes at a time.
-    end -= sizeof(Word) - 1;
-    for (; src < end; src += sizeof(Word)) {
-      Word buf0 = reinterpret_cast<const Word *>(src)[0];
-      CRC_WORD(this, crc0, buf0);
-    }
-    end += sizeof(Word) - 1;
-
-    // Compute CRC of remaining bytes.
-    for (;src < end; ++src) {
-      CRC_BYTE(this, crc0, *src);
-    }
-
-    return (crc0 ^ Base().Canonize());
-  }
-
-  // Fastest, interleaved multi-byte CRC.
-  Crc CrcMultiword(const void *data, size_t bytes, const Crc &start) const {
-    if (kStride < 2 || kStride > 8) {
-      // Unsupported configuration;
-      // fall back to something sensible.
-      return CrcWord(data, bytes, start);
-    }
-
-    const uint8 *src = static_cast<const uint8 *>(data);
-    const uint8 *end = src + bytes;
-    Crc crc0 = start ^ Base().Canonize();
-
-    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word);
-    if (src >= end) {
-      return (crc0 ^ Base().Canonize());
-    }
-
-    // Process kStride Word registers at once;
-    // should have have at least 2*kInterleaveBytes of data to start.
-    end -= 2*kInterleaveBytes - 1;
-    if (src < end) {
-      Crc crc_carryover;
-      if (sizeof(Crc) > sizeof(Word)) {
-        // crc_carryover is used if and only if Crc is wider than Word.
-        crc_carryover = 0;
-      }
-#define INIT_CRC(reg) \
-      Crc crc##reg; \
-      if (reg > 0 && kStride > reg) { \
-        crc##reg = 0; \
-      }
-      REPEAT_FROM_1(INIT_CRC);
-#undef INIT_CRC
-
-#define INIT_BUF(reg) \
-      Word buf##reg; \
-      if (kStride > reg) { \
-        buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
-      }
-      REPEAT_FROM_0(INIT_BUF);
-#undef INIT_BUF
-
-      do {
-        PREFETCH(src);
-        src += kInterleaveBytes;
-
-        if (sizeof(Crc) > sizeof(Word)) {
-          crc0 ^= crc_carryover;
-        }
-
-#define FIRST(reg, next_reg) do { \
-        if (kStride > reg) { \
-          buf##reg ^= Downcast<Crc, Word>(crc##reg); \
-          if (sizeof(Crc) > sizeof(Word)) { \
-            if (reg < kStride - 1) { \
-              crc##next_reg ^= SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
-            } else { \
-              crc_carryover = SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \
-            } \
-          } \
-          crc##reg = TABLE_ENTRY(this->crc_word_interleaved_, 0, buf##reg); \
-          buf##reg >>= 8; \
-        } \
-} while (0)
-        FIRST(0, 1);
-        FIRST(1, 2);
-        FIRST(2, 3);
-        FIRST(3, 4);
-        FIRST(4, 5);
-        FIRST(5, 6);
-        FIRST(6, 7);
-        FIRST(7, 0);
-#undef FIRST
-
-        for (size_t byte = 1; byte < sizeof(Word) - 1; ++byte) {
-#define NEXT(reg) do { \
-          if (kStride > reg) { \
-            crc##reg ^= \
-                TABLE_ENTRY(this->crc_word_interleaved_, byte, buf##reg); \
-            buf##reg >>= 8; \
-          } \
-} while(0)
-          REPEAT_FROM_0(NEXT);
-#undef NEXT
-        }
-
-#define LAST(reg) do { \
-        if (kStride > reg) { \
-          crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_interleaved_, buf##reg); \
-          buf##reg = reinterpret_cast<const Word *>(src)[reg]; \
-        } \
-} while(0)
-        REPEAT_FROM_0(LAST);
-#undef LAST
-      }
-      while (src < end);
-
-      if (sizeof(Crc) > sizeof(Word)) {
-        crc0 ^= crc_carryover;
-      }
-
-#define COMBINE(reg) do { \
-      if (kStride > reg) { \
-        if (reg != 0) { \
-          crc0 ^= crc##reg; \
-        } \
-        CRC_WORD(this, crc0, buf##reg); \
-      } \
-} while (0)
-      REPEAT_FROM_0(COMBINE);
-#undef COMBINE
-
-      src += kInterleaveBytes;
-    }
-    end += 2*kInterleaveBytes - 1;
-
-    // Process sizeof(Word) bytes at once.
-    end -= sizeof(Word) - 1;
-    for (; src < end; src += sizeof(Word)) {
-      Word buf0 = reinterpret_cast<const Word *>(src)[0];
-      CRC_WORD(this, crc0, buf0);
-    }
-    end += sizeof(Word) - 1;
-
-    // Compute CRC of remaining bytes.
-    for (;src < end; ++src) {
-      CRC_BYTE(this, crc0, *src);
-    }
-
-    return (crc0 ^ Base().Canonize());
-  }
-
- protected:
-  enum {
-    kInterleaveBytes = sizeof(Word) * kStride,
-  };
-
-  // Multiplication tables used by CRCs.
-  TableEntry crc_word_interleaved_[sizeof(Word)][256];
-  TableEntry crc_word_[sizeof(Word)][256];
-
-  // Base class stored after CRC tables so that the most frequently
-  // used table is at offset 0 and may be accessed faster.
-  GfUtil<Crc> base_;
-
-  friend class RollingCrc< GenericCrc<Crc, TableEntry, Word, kStride> >;
-
- private:
-  // CrcMultiword on amd64 may run at 1.2 CPU cycles per byte which is
-  // noticeably faster than CrcWord (2.2-2.6 cycles/byte depending on
-  // hardware and compiler). However, there are problems with compilers.
-  //
-  // Test system: P45 chipset, Intel Q9650 CPU, 800MHz 4-4-4-12 memory.
-  //
-  // 64-bit compiler, <= 64-bit CRC, 64-bit tables, 64-bit reads:
-  // CL 15.00.307291.1  C++   >1.2< CPU cycles/byte
-  // ICL 11.1.051 -O3   C++    1.5  CPU cycles/byte
-  // GCC 4.5 -O3        C++    2.0  CPU cycles/byte
-  // GCC 4.x -O3        ASM   >1.2< CPU cycles/byte
-  //
-  // 32-bit compiler, MMX used, <= 64-bit CRC, 64-bit tables, 64-bit reads
-  // CL 15.00.307291.1  C++   2.0  CPU cycles/byte
-  // GCC 4.5 -O3        C++   1.9  CPU cycles/byte
-  // ICL 11.1.051 -S    C++   1.6  CPU cycles/byte
-  // GCC 4.x -O3        ASM  >1.3< CPU cycles/byte
-  //
-  // So, use inline ASM code for GCC for both i386 and amd64.
-
-  Crc CrcMultiwordI386Mmx(
-          const void *data, size_t bytes, const Crc &start) const;
-  Crc CrcMultiwordGccAmd64(
-          const void *data, size_t bytes, const Crc &start) const;
-  Crc CrcMultiwordGccAmd64Sse2(
-          const uint8 *src, const uint8 *end, const Crc &start) const;
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#undef REPEAT_FROM_0
-#undef REPEAT_FROM_1
-
-
-// Specialized variants.
-#if CRCUTIL_USE_ASM
-
-#if (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
-
-// Declare specialized functions.
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
-    const void *data, size_t bytes, const uint64 &start) const;
-
-#if HAVE_AMD64 && HAVE_SSE2
-template<>
-uint128_sse2
-GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword(
-    const void *data, size_t bytes, const uint128_sse2 &start) const;
-#endif  // HAVE_AMD64 && HAVE_SSE2
-
-#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER <= 150030729 && \
-      (HAVE_I386 && HAVE_MMX)
-
-// Work around bug in MSC (present at least in v. 15.00.30729.1)
-template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx(
-    const void *data,
-    size_t bytes,
-    const uint64 &start) const;
-template<> __forceinline
-uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword(
-    const void *data,
-    size_t bytes,
-    const uint64 &start) const {
-  typedef uint64 Word;
-  typedef uint64 Crc;
-  if (bytes <= 12) {
-    const uint8 *src = static_cast<const uint8 *>(data);
-    uint64 crc = start ^ Base().Canonize();
-    for (const uint8 *end = src + bytes; src < end; ++src) {
-      CRC_BYTE(this, crc, *src);
-    }
-    return (crc ^ Base().Canonize());
-  }
-  return CrcMultiwordI386Mmx(data, bytes, start);
-}
-
-#endif  // (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX)))
-
-#endif  // CRCUTIL_USE_ASM
-
-
-#pragma pack(pop)
-
-}  // namespace crcutil
-
-#endif  // CRCUTIL_GENERIC_CRC_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Defines GenericCrc class which implements arbitrary CRCs. 
+// 
+// Please read crc.pdf to understand how it all works. 
+ 
+#ifndef CRCUTIL_GENERIC_CRC_H_ 
+#define CRCUTIL_GENERIC_CRC_H_ 
+ 
+#include "base_types.h"     // uint8 
+#include "crc_casts.h"      // TO_BYTE(), Downcast<>. 
+#include "gf_util.h"        // GfUtil<Crc> class. 
+#include "platform.h"       // GCC_ALIGN_ATTRIBUTE(16) 
+#include "uint128_sse2.h"   // uint128_sse2 type (if necessary) 
+ 
+namespace crcutil { 
+ 
+#pragma pack(push, 16) 
+ 
+// Extends CRC by one byte. 
+// Technically, if degree of a polynomial does not exceed 8, 
+// right shift by 8 bits is not required, but who cares about CRC-8? 
+#define CRC_BYTE(table, crc, byte) do { \ 
+  crc = ((sizeof(crc) > 1) ? SHIFT_RIGHT_SAFE(crc, 8) : 0) ^ \ 
+        table->crc_word_[sizeof(Word) - 1][TO_BYTE(crc) ^ (byte)]; \ 
+} while (0) 
+ 
+#define TABLE_ENTRY(table, byte, buf) \ 
+  table[byte][Downcast<Word, uint8>(buf)] 
+ 
+#define TABLE_ENTRY_LAST(table, buf) \ 
+  table[sizeof(Word) - 1][buf] 
+ 
+// Extends CRC by one word. 
+#define CRC_WORD(table, crc, buf) do { \ 
+  buf ^= Downcast<Crc, Word>(crc); \ 
+  if (sizeof(crc) > sizeof(buf)) { \ 
+    crc = SHIFT_RIGHT_SAFE(crc, sizeof(buf) * 8); \ 
+    crc ^= TABLE_ENTRY(table->crc_word_, 0, buf); \ 
+  } else { \ 
+    crc = TABLE_ENTRY(table->crc_word_, 0, buf); \ 
+  } \ 
+  buf >>= 8; \ 
+  for (size_t byte = 1; byte < sizeof(buf) - 1; ++byte) { \ 
+    crc ^= TABLE_ENTRY(table->crc_word_, byte, buf); \ 
+    buf >>= 8; \ 
+  } \ 
+  crc ^= TABLE_ENTRY_LAST(table->crc_word_, buf); \ 
+} while (0) 
+ 
+// Process beginning of data block byte by byte until source pointer 
+// becomes perfectly aligned on Word boundary. 
+#define ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word) do { \ 
+  while ((reinterpret_cast<size_t>(src) & (sizeof(Word) - 1)) != 0) { \ 
+    if (src >= end) { \ 
+      return (crc ^ table->Base().Canonize()); \ 
+    } \ 
+    CRC_BYTE(table, crc, *src); \ 
+    src += 1; \ 
+  } \ 
+} while (0) 
+ 
+ 
+// On amd64, enforcing alignment is 2-4% slower on small (<= 64 bytes) blocks 
+// but 6-10% faster on larger blocks (>= 2KB). 
+// Break-even point (+-1%) is around 1KB (Q9650, E6600). 
+// 
+#define ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, table, src, end, crc, Word) \ 
+do { \ 
+  if (sizeof(Word) > 8 || (bytes) > CRCUTIL_MIN_ALIGN_SIZE) { \ 
+    ALIGN_ON_WORD_BOUNDARY(table, src, end, crc, Word); \ 
+  } \ 
+} while (0) 
+ 
+#if defined(_MSC_VER) 
+#pragma warning(push) 
+#pragma warning(disable: 4127)  // conditional expression is constant 
+#endif  // defined(_MSC_VER) 
+ 
+// Forward declarations. 
+template<typename CrcImplementation> class RollingCrc; 
+ 
+// Crc        is the type used internally and to return values of N-bit CRC. 
+//            It should be at least as large as "TableEntry" and "Word" but 
+//            may be larger (e.g. for 16-bit CRC, TableEntry and Word may be 
+//            set to uint16 but Crc may be set to uint32). 
+// 
+// TableEntry is the type of values stored in the tables. 
+//            To implement N-bit CRC, TableEntry should be large enough 
+//            to store N bits. 
+// 
+// Word       is the type used to read data sizeof(Word) at a time. 
+//            Ideally, it shoulde be "most suitable for given architecture" 
+//            integer type -- typically "size_t". 
+// 
+// kStride    is the number of words processed in interleaved manner by 
+//            CrcMultiword() and CrcWordblock(). Shall be either 3 or 4. 
+//            Optimal value depends on hardware architecture (AMD64, ARM, etc). 
+// 
+template<typename _Crc, typename _TableEntry, typename _Word, int kStride> 
+    class GenericCrc { 
+ public: 
+  // Make Crc, TableEntry, and Word types visible (used by RollingCrc etc.) 
+  typedef _Crc Crc; 
+  typedef _TableEntry TableEntry; 
+  typedef _Word Word; 
+ 
+  GenericCrc() {} 
+ 
+  // Initializes the tables given generating polynomial of degree. 
+  // If "canonical" is true, crc value will be XOR'ed with (-1) before and 
+  // after actual CRC computation. 
+  GenericCrc(const Crc &generating_polynomial, size_t degree, bool canonical) { 
+    Init(generating_polynomial, degree, canonical); 
+  } 
+  void Init(const Crc &generating_polynomial, size_t degree, bool canonical) { 
+    base_.Init(generating_polynomial, degree, canonical); 
+ 
+    // Instead of computing 
+    //    table[j][i] = MultiplyUnnormalized(i, 8, k), 
+    // for all i = 0...255, we may notice that 
+    // if i = 2**n then for all m = 1...(i-1) 
+    // MultiplyUnnormalized(i + m, 8, k) = 
+    //    MultiplyUnnormalized(i ^ m, 8, k) = 
+    //    MultiplyUnnormalized(i, 8, k) ^ MultiplyUnnormalized(m, 8, k) = 
+    //    MultiplyUnnormalized(i, 8, k) ^ crc_word_interleaved[j][m] = 
+    //    table[i] ^ table[m]. 
+#if 0 
+    for (size_t j = 0; j < sizeof(Word); ++j) { 
+      Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree); 
+      for (size_t i = 0; i < 256; ++i) { 
+        Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k); 
+        this->crc_word_interleaved_[j][i] = Downcast<Crc, TableEntry>(temp); 
+      } 
+    } 
+#else 
+    for (size_t j = 0; j < sizeof(Word); ++j) { 
+      Crc k = Base().XpowN((sizeof(Word) * kStride - 1 - j) * 8 + degree); 
+      TableEntry *table = this->crc_word_interleaved_[j]; 
+      table[0] = 0;  // Init 0s entry -- multiply 0 by anything yields 0. 
+      for (size_t i = 1; i < 256; i <<= 1) { 
+        TableEntry value = Downcast<Crc, TableEntry>( 
+            Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k)); 
+        table[i] = value; 
+        for (size_t m = 1; m < i; ++m) { 
+          table[i + m] = value ^ table[m]; 
+        } 
+      } 
+    } 
+#endif 
+ 
+#if 0 
+    for (size_t j = 0; j < sizeof(Word); ++j) { 
+      Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree); 
+      for (size_t i = 0; i < 256; ++i) { 
+        Crc temp = Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k); 
+        this->crc_word_[j][i] = Downcast<Crc, TableEntry>(temp); 
+      } 
+    } 
+#else 
+    for (size_t j = 0; j < sizeof(Word); ++j) { 
+      Crc k = Base().XpowN((sizeof(Word) - 1 - j) * 8 + degree); 
+      TableEntry *table = this->crc_word_[j]; 
+      table[0] = 0;  // Init 0s entry -- multiply 0 by anything yields 0. 
+      for (size_t i = 1; i < 256; i <<= 1) { 
+        TableEntry value = Downcast<Crc, TableEntry>( 
+            Base().MultiplyUnnormalized(static_cast<Crc>(i), 8, k)); 
+        table[i] = value; 
+        for (size_t m = 1; m < i; ++m) { 
+          table[i + m] = value ^ table[m]; 
+        } 
+      } 
+    } 
+#endif 
+  } 
+ 
+  // Default CRC implementation 
+  Crc CrcDefault(const void *data, size_t bytes, const Crc &start) const { 
+#if HAVE_AMD64 || HAVE_I386 
+    return CrcMultiword(data, bytes, start); 
+#else 
+    // Very few CPUs have multiple ALUs and speculative execution 
+    // (Itanium is an exception) so sophisticated algorithms will 
+    // not perform better than good old Sarwate algorithm. 
+    return CrcByteUnrolled(data, bytes, start); 
+#endif  // HAVE_AMD64 || HAVE_I386 
+  } 
+ 
+  // Returns base class. 
+  const GfUtil<Crc> &Base() const { return base_; } 
+ 
+ protected: 
+  // Canonical, byte-by-byte CRC computation. 
+  Crc CrcByte(const void *data, size_t bytes, const Crc &start) const { 
+    const uint8 *src = static_cast<const uint8 *>(data); 
+    Crc crc = start ^ Base().Canonize(); 
+    for (const uint8 *end = src + bytes; src < end; ++src) { 
+      CRC_BYTE(this, crc, *src); 
+    } 
+    return (crc ^ Base().Canonize()); 
+  } 
+ 
+  // Byte-by-byte CRC with main loop unrolled. 
+  Crc CrcByteUnrolled(const void *data, size_t bytes, const Crc &start) const { 
+    if (bytes == 0) { 
+      return start; 
+    } 
+ 
+    const uint8 *src = static_cast<const uint8 *>(data); 
+    const uint8 *end = src + bytes; 
+    Crc crc = start ^ Base().Canonize(); 
+ 
+    // Unroll loop 4 times. 
+    end -= 3; 
+    for (; src < end; src += 4) { 
+      PREFETCH(src); 
+      CRC_BYTE(this, crc, src[0]); 
+      CRC_BYTE(this, crc, src[1]); 
+      CRC_BYTE(this, crc, src[2]); 
+      CRC_BYTE(this, crc, src[3]); 
+    } 
+    end += 3; 
+ 
+    // Compute CRC of remaining bytes. 
+    for (; src < end; ++src) { 
+      CRC_BYTE(this, crc, *src); 
+    } 
+ 
+    return (crc ^ Base().Canonize()); 
+  } 
+ 
+  // Canonical, byte-by-byte CRC computation. 
+  Crc CrcByteWord(const void *data, size_t bytes, const Crc &start) const { 
+    const uint8 *src = static_cast<const uint8 *>(data); 
+    const uint8 *end = src + bytes; 
+    Crc crc0 = start ^ Base().Canonize(); 
+ 
+    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Crc); 
+    if (src >= end) { 
+      return (crc0 ^ Base().Canonize()); 
+    } 
+ 
+    // Process 4*sizeof(Crc) bytes at a time. 
+    end -= 4 * sizeof(Crc) - 1; 
+    for (; src < end; src += 4 * sizeof(Crc)) { 
+      for (size_t i = 0; i < 4; ++i) { 
+        crc0 ^= reinterpret_cast<const Crc *>(src)[i]; 
+        if (i == 0) { 
+          PREFETCH(src); 
+        } 
+        for (size_t byte = 0; byte < sizeof(crc0); ++byte) { 
+          CRC_BYTE(this, crc0, 0); 
+        } 
+      } 
+    } 
+    end += 4 * sizeof(Crc) - 1; 
+ 
+    // Process sizeof(Crc) bytes at a time. 
+    end -= sizeof(Crc) - 1; 
+    for (; src < end; src += sizeof(Crc)) { 
+      crc0 ^= reinterpret_cast<const Crc *>(src)[0]; 
+      for (size_t byte = 0; byte < sizeof(crc0); ++byte) { 
+        CRC_BYTE(this, crc0, 0); 
+      } 
+    } 
+    end += sizeof(Crc) - 1; 
+ 
+    // Compute CRC of remaining bytes. 
+    for (;src < end; ++src) { 
+      CRC_BYTE(this, crc0, *src); 
+    } 
+ 
+    return (crc0 ^ Base().Canonize()); 
+  } 
+ 
+  // Faster, word-by-word CRC. 
+  Crc CrcWord(const void *data, size_t bytes, const Crc &start) const { 
+    const uint8 *src = static_cast<const uint8 *>(data); 
+    const uint8 *end = src + bytes; 
+    Crc crc0 = start ^ Base().Canonize(); 
+ 
+    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word); 
+    if (src >= end) { 
+      return (crc0 ^ Base().Canonize()); 
+    } 
+ 
+    // Process 4 sizeof(Word) bytes at once. 
+    end -= 4 * sizeof(Word) - 1; 
+    for (; src < end; src += 4 * sizeof(Word)) { 
+      Word buf0 = reinterpret_cast<const Word *>(src)[0]; 
+      PREFETCH(src); 
+      CRC_WORD(this, crc0, buf0); 
+      buf0 = reinterpret_cast<const Word *>(src)[1]; 
+      CRC_WORD(this, crc0, buf0); 
+      buf0 = reinterpret_cast<const Word *>(src)[2]; 
+      CRC_WORD(this, crc0, buf0); 
+      buf0 = reinterpret_cast<const Word *>(src)[3]; 
+      CRC_WORD(this, crc0, buf0); 
+    } 
+    end += 4 * sizeof(Word) - 1; 
+ 
+    // Process sizeof(Word) bytes at a time. 
+    end -= sizeof(Word) - 1; 
+    for (; src < end; src += sizeof(Word)) { 
+      Word buf0 = reinterpret_cast<const Word *>(src)[0]; 
+      CRC_WORD(this, crc0, buf0); 
+    } 
+    end += sizeof(Word) - 1; 
+ 
+    // Compute CRC of remaining bytes. 
+    for (;src < end; ++src) { 
+      CRC_BYTE(this, crc0, *src); 
+    } 
+ 
+    return (crc0 ^ Base().Canonize()); 
+  } 
+ 
+#define REPEAT_FROM_1(macro) \ 
+  macro(1); \ 
+  macro(2); \ 
+  macro(3); \ 
+  macro(4); \ 
+  macro(5); \ 
+  macro(6); \ 
+  macro(7); 
+ 
+#define REPEAT_FROM_0(macro) \ 
+  macro(0); \ 
+  REPEAT_FROM_1(macro) 
+ 
+  // Faster, process adjusent blocks in parallel and concatenate CRCs. 
+  Crc CrcBlockword(const void *data, size_t bytes, const Crc &start) const { 
+    if (kStride < 2 || kStride > 8) { 
+      // Unsupported configuration; 
+      // fall back to something sensible. 
+      return CrcWord(data, bytes, start); 
+    } 
+ 
+    const uint8 *src = static_cast<const uint8 *>(data); 
+    const uint8 *end = src + bytes; 
+    Crc crc0 = start ^ Base().Canonize(); 
+    enum { 
+      // Add 16 to avoid false L1 cache collisions. 
+      kStripe = (15*1024 + 16) & ~(sizeof(Word) - 1), 
+    }; 
+ 
+    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word); 
+    if (src >= end) { 
+      return (crc0 ^ Base().Canonize()); 
+    } 
+ 
+    end -= kStride * kStripe - 1; 
+    if (src < end) { 
+      Crc x_pow_8kStripe = Base().Xpow8N(kStripe); 
+      do { 
+        const uint8 *stripe_end = src + kStripe; 
+ 
+#define INIT_CRC(reg) \ 
+        Crc crc##reg; \ 
+        if (kStride >= reg) { \ 
+          crc##reg = 0; \ 
+        } 
+        REPEAT_FROM_1(INIT_CRC); 
+#undef INIT_CRC 
+ 
+        do { 
+#define FIRST(reg) \ 
+          Word buf##reg; \ 
+          if (kStride > reg) { \ 
+            buf##reg = reinterpret_cast<const Word *>(src + reg * kStripe)[0]; \ 
+            buf##reg ^= Downcast<Crc, Word>(crc##reg); \ 
+            if (sizeof(crc##reg) > sizeof(buf##reg)) { \ 
+              crc##reg = SHIFT_RIGHT_SAFE(crc##reg, sizeof(buf##reg) * 8); \ 
+              crc##reg ^= TABLE_ENTRY(this->crc_word_, 0, buf##reg); \ 
+            } else { \ 
+              crc##reg = TABLE_ENTRY(this->crc_word_, 0, buf##reg); \ 
+            } \ 
+            buf##reg >>= 8; \ 
+          } 
+          REPEAT_FROM_0(FIRST); 
+#undef FIRST 
+ 
+          for (size_t byte = 1; byte < sizeof(buf0) - 1; ++byte) { 
+#define NEXT(reg) do { \ 
+            if (kStride > reg) { \ 
+              crc##reg ^= TABLE_ENTRY(this->crc_word_, byte, buf##reg); \ 
+              buf##reg >>= 8; \ 
+            } \ 
+} while (0) 
+            REPEAT_FROM_0(NEXT); 
+#undef NEXT 
+          } 
+ 
+#define LAST(reg) do { \ 
+          if (kStride > reg) { \ 
+            crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_, buf##reg); \ 
+          } \ 
+} while (0) 
+          REPEAT_FROM_0(LAST); 
+#undef LAST 
+ 
+          src += sizeof(Word); 
+        } while (src < stripe_end); 
+ 
+#if 0 
+// The code is left for illustrational purposes only. 
+#define COMBINE(reg) do { \ 
+        if (reg > 0 && kStride > reg) { \ 
+          crc0 = Base().ChangeStartValue(crc##reg, kStripe, 0, crc0); \ 
+        } \ 
+} while (0) 
+#else 
+#define COMBINE(reg) do { \ 
+        if (reg > 0 && kStride > reg) { \ 
+          crc0 = crc##reg ^ Base().Multiply(crc0, x_pow_8kStripe); \ 
+        } \ 
+} while (0) 
+#endif 
+        REPEAT_FROM_0(COMBINE); 
+#undef COMBINE 
+ 
+        src += (kStride - 1) * kStripe; 
+      } 
+      while (src < end); 
+    } 
+    end += kStride * kStripe - 1; 
+ 
+    // Process sizeof(Word) bytes at a time. 
+    end -= sizeof(Word) - 1; 
+    for (; src < end; src += sizeof(Word)) { 
+      Word buf0 = reinterpret_cast<const Word *>(src)[0]; 
+      CRC_WORD(this, crc0, buf0); 
+    } 
+    end += sizeof(Word) - 1; 
+ 
+    // Compute CRC of remaining bytes. 
+    for (;src < end; ++src) { 
+      CRC_BYTE(this, crc0, *src); 
+    } 
+ 
+    return (crc0 ^ Base().Canonize()); 
+  } 
+ 
+  // Fastest, interleaved multi-byte CRC. 
+  Crc CrcMultiword(const void *data, size_t bytes, const Crc &start) const { 
+    if (kStride < 2 || kStride > 8) { 
+      // Unsupported configuration; 
+      // fall back to something sensible. 
+      return CrcWord(data, bytes, start); 
+    } 
+ 
+    const uint8 *src = static_cast<const uint8 *>(data); 
+    const uint8 *end = src + bytes; 
+    Crc crc0 = start ^ Base().Canonize(); 
+ 
+    ALIGN_ON_WORD_BOUNDARY_IF_NEEDED(bytes, this, src, end, crc0, Word); 
+    if (src >= end) { 
+      return (crc0 ^ Base().Canonize()); 
+    } 
+ 
+    // Process kStride Word registers at once; 
+    // should have have at least 2*kInterleaveBytes of data to start. 
+    end -= 2*kInterleaveBytes - 1; 
+    if (src < end) { 
+      Crc crc_carryover; 
+      if (sizeof(Crc) > sizeof(Word)) { 
+        // crc_carryover is used if and only if Crc is wider than Word. 
+        crc_carryover = 0; 
+      } 
+#define INIT_CRC(reg) \ 
+      Crc crc##reg; \ 
+      if (reg > 0 && kStride > reg) { \ 
+        crc##reg = 0; \ 
+      } 
+      REPEAT_FROM_1(INIT_CRC); 
+#undef INIT_CRC 
+ 
+#define INIT_BUF(reg) \ 
+      Word buf##reg; \ 
+      if (kStride > reg) { \ 
+        buf##reg = reinterpret_cast<const Word *>(src)[reg]; \ 
+      } 
+      REPEAT_FROM_0(INIT_BUF); 
+#undef INIT_BUF 
+ 
+      do { 
+        PREFETCH(src); 
+        src += kInterleaveBytes; 
+ 
+        if (sizeof(Crc) > sizeof(Word)) { 
+          crc0 ^= crc_carryover; 
+        } 
+ 
+#define FIRST(reg, next_reg) do { \ 
+        if (kStride > reg) { \ 
+          buf##reg ^= Downcast<Crc, Word>(crc##reg); \ 
+          if (sizeof(Crc) > sizeof(Word)) { \ 
+            if (reg < kStride - 1) { \ 
+              crc##next_reg ^= SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \ 
+            } else { \ 
+              crc_carryover = SHIFT_RIGHT_SAFE(crc##reg, 8 * sizeof(buf0)); \ 
+            } \ 
+          } \ 
+          crc##reg = TABLE_ENTRY(this->crc_word_interleaved_, 0, buf##reg); \ 
+          buf##reg >>= 8; \ 
+        } \ 
+} while (0) 
+        FIRST(0, 1); 
+        FIRST(1, 2); 
+        FIRST(2, 3); 
+        FIRST(3, 4); 
+        FIRST(4, 5); 
+        FIRST(5, 6); 
+        FIRST(6, 7); 
+        FIRST(7, 0); 
+#undef FIRST 
+ 
+        for (size_t byte = 1; byte < sizeof(Word) - 1; ++byte) { 
+#define NEXT(reg) do { \ 
+          if (kStride > reg) { \ 
+            crc##reg ^= \ 
+                TABLE_ENTRY(this->crc_word_interleaved_, byte, buf##reg); \ 
+            buf##reg >>= 8; \ 
+          } \ 
+} while(0) 
+          REPEAT_FROM_0(NEXT); 
+#undef NEXT 
+        } 
+ 
+#define LAST(reg) do { \ 
+        if (kStride > reg) { \ 
+          crc##reg ^= TABLE_ENTRY_LAST(this->crc_word_interleaved_, buf##reg); \ 
+          buf##reg = reinterpret_cast<const Word *>(src)[reg]; \ 
+        } \ 
+} while(0) 
+        REPEAT_FROM_0(LAST); 
+#undef LAST 
+      } 
+      while (src < end); 
+ 
+      if (sizeof(Crc) > sizeof(Word)) { 
+        crc0 ^= crc_carryover; 
+      } 
+ 
+#define COMBINE(reg) do { \ 
+      if (kStride > reg) { \ 
+        if (reg != 0) { \ 
+          crc0 ^= crc##reg; \ 
+        } \ 
+        CRC_WORD(this, crc0, buf##reg); \ 
+      } \ 
+} while (0) 
+      REPEAT_FROM_0(COMBINE); 
+#undef COMBINE 
+ 
+      src += kInterleaveBytes; 
+    } 
+    end += 2*kInterleaveBytes - 1; 
+ 
+    // Process sizeof(Word) bytes at once. 
+    end -= sizeof(Word) - 1; 
+    for (; src < end; src += sizeof(Word)) { 
+      Word buf0 = reinterpret_cast<const Word *>(src)[0]; 
+      CRC_WORD(this, crc0, buf0); 
+    } 
+    end += sizeof(Word) - 1; 
+ 
+    // Compute CRC of remaining bytes. 
+    for (;src < end; ++src) { 
+      CRC_BYTE(this, crc0, *src); 
+    } 
+ 
+    return (crc0 ^ Base().Canonize()); 
+  } 
+ 
+ protected: 
+  enum { 
+    kInterleaveBytes = sizeof(Word) * kStride, 
+  }; 
+ 
+  // Multiplication tables used by CRCs. 
+  TableEntry crc_word_interleaved_[sizeof(Word)][256]; 
+  TableEntry crc_word_[sizeof(Word)][256]; 
+ 
+  // Base class stored after CRC tables so that the most frequently 
+  // used table is at offset 0 and may be accessed faster. 
+  GfUtil<Crc> base_; 
+ 
+  friend class RollingCrc< GenericCrc<Crc, TableEntry, Word, kStride> >; 
+ 
+ private: 
+  // CrcMultiword on amd64 may run at 1.2 CPU cycles per byte which is 
+  // noticeably faster than CrcWord (2.2-2.6 cycles/byte depending on 
+  // hardware and compiler). However, there are problems with compilers. 
+  // 
+  // Test system: P45 chipset, Intel Q9650 CPU, 800MHz 4-4-4-12 memory. 
+  // 
+  // 64-bit compiler, <= 64-bit CRC, 64-bit tables, 64-bit reads: 
+  // CL 15.00.307291.1  C++   >1.2< CPU cycles/byte 
+  // ICL 11.1.051 -O3   C++    1.5  CPU cycles/byte 
+  // GCC 4.5 -O3        C++    2.0  CPU cycles/byte 
+  // GCC 4.x -O3        ASM   >1.2< CPU cycles/byte 
+  // 
+  // 32-bit compiler, MMX used, <= 64-bit CRC, 64-bit tables, 64-bit reads 
+  // CL 15.00.307291.1  C++   2.0  CPU cycles/byte 
+  // GCC 4.5 -O3        C++   1.9  CPU cycles/byte 
+  // ICL 11.1.051 -S    C++   1.6  CPU cycles/byte 
+  // GCC 4.x -O3        ASM  >1.3< CPU cycles/byte 
+  // 
+  // So, use inline ASM code for GCC for both i386 and amd64. 
+ 
+  Crc CrcMultiwordI386Mmx( 
+          const void *data, size_t bytes, const Crc &start) const; 
+  Crc CrcMultiwordGccAmd64( 
+          const void *data, size_t bytes, const Crc &start) const; 
+  Crc CrcMultiwordGccAmd64Sse2( 
+          const uint8 *src, const uint8 *end, const Crc &start) const; 
+} GCC_ALIGN_ATTRIBUTE(16); 
+ 
+#undef REPEAT_FROM_0 
+#undef REPEAT_FROM_1 
+ 
+ 
+// Specialized variants. 
+#if CRCUTIL_USE_ASM 
+ 
+#if (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX))) 
+ 
+// Declare specialized functions. 
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword( 
+    const void *data, size_t bytes, const uint64 &start) const; 
+ 
+#if HAVE_AMD64 && HAVE_SSE2 
+template<> 
+uint128_sse2 
+GenericCrc<uint128_sse2, uint128_sse2, uint64, 4>::CrcMultiword( 
+    const void *data, size_t bytes, const uint128_sse2 &start) const; 
+#endif  // HAVE_AMD64 && HAVE_SSE2 
+ 
+#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER <= 150030729 && \ 
+      (HAVE_I386 && HAVE_MMX) 
+ 
+// Work around bug in MSC (present at least in v. 15.00.30729.1) 
+template<> uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiwordI386Mmx( 
+    const void *data, 
+    size_t bytes, 
+    const uint64 &start) const; 
+template<> __forceinline 
+uint64 GenericCrc<uint64, uint64, uint64, 4>::CrcMultiword( 
+    const void *data, 
+    size_t bytes, 
+    const uint64 &start) const { 
+  typedef uint64 Word; 
+  typedef uint64 Crc; 
+  if (bytes <= 12) { 
+    const uint8 *src = static_cast<const uint8 *>(data); 
+    uint64 crc = start ^ Base().Canonize(); 
+    for (const uint8 *end = src + bytes; src < end; ++src) { 
+      CRC_BYTE(this, crc, *src); 
+    } 
+    return (crc ^ Base().Canonize()); 
+  } 
+  return CrcMultiwordI386Mmx(data, bytes, start); 
+} 
+ 
+#endif  // (defined(__GNUC__) && (HAVE_AMD64 || (HAVE_I386 && HAVE_MMX))) 
+ 
+#endif  // CRCUTIL_USE_ASM 
+ 
+ 
+#pragma pack(pop) 
+ 
+}  // namespace crcutil 
+ 
+#endif  // CRCUTIL_GENERIC_CRC_H_ 

+ 304 - 304
contrib/libs/crcutil/gf_util.h

@@ -1,304 +1,304 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Defines GfUtil template class which implements
-// 1. some useful operations in GF(2^n),
-// 2. CRC helper function (e.g. concatenation of CRCs) which are
-//    not affected by specific implemenation of CRC computation per se.
-//
-// Please read crc.pdf to understand how it all works.
-
-#ifndef CRCUTIL_GF_UTIL_H_
-#define CRCUTIL_GF_UTIL_H_
-
-#include "base_types.h"   // uint8, uint64
-#include "crc_casts.h"    // TO_BYTE()
-#include "platform.h"     // GCC_ALIGN_ATTRIBUTE(16), SHIFT_*_SAFE
-
-namespace crcutil {
-
-#pragma pack(push, 16)
-
-// "Crc" is the type used internally and to return values of N-bit CRC.
-template<typename Crc> class GfUtil {
- public:
-  // Initializes the tables given generating polynomial of degree (degree).
-  // If "canonical" is true, starting CRC value and computed CRC value will be
-  // XOR-ed with 111...111.
-  GfUtil() {}
-  GfUtil(const Crc &generating_polynomial, size_t degree, bool canonical) {
-    Init(generating_polynomial, degree, canonical);
-  }
-  void Init(const Crc &generating_polynomial, size_t degree, bool canonical) {
-    Crc one = 1;
-    one <<= degree - 1;
-    this->generating_polynomial_ = generating_polynomial;
-    this->crc_bytes_ = (degree + 7) >> 3;
-    this->degree_ = degree;
-    this->one_ = one;
-    if (canonical) {
-      this->canonize_ = one | (one - 1);
-    } else {
-      this->canonize_ = 0;
-    }
-    this->normalize_[0] = 0;
-    this->normalize_[1] = generating_polynomial;
-
-    Crc k = one >> 1;
-    for (size_t i = 0; i < sizeof(uint64) * 8; ++i) {
-      this->x_pow_2n_[i] = k;
-      k = Multiply(k, k);
-    }
-
-    this->crc_of_crc_ = Multiply(this->canonize_,
-                                 this->one_ ^ Xpow8N((degree + 7) >> 3));
-
-    FindLCD(Xpow8N(this->crc_bytes_), &this->x_pow_minus_W_);
-  }
-
-  // Returns generating polynomial.
-  Crc GeneratingPolynomial() const {
-    return this->generating_polynomial_;
-  }
-
-  // Returns number of bits in CRC (degree of generating polynomial).
-  size_t Degree() const {
-    return this->degree_;
-  }
-
-  // Returns start/finish adjustment constant.
-  Crc Canonize() const {
-    return this->canonize_;
-  }
-
-  // Returns normalized value of 1.
-  Crc One() const {
-    return this->one_;
-  }
-
-  // Returns value of CRC(A, |A|, start_new) given known
-  // crc=CRC(A, |A|, start_old) -- without touching the data.
-  Crc ChangeStartValue(const Crc &crc, uint64 bytes,
-                       const Crc &start_old,
-                       const Crc &start_new) const {
-    return (crc ^ Multiply(start_new ^ start_old, Xpow8N(bytes)));
-  }
-
-  // Returns CRC of concatenation of blocks A and B when CRCs
-  // of blocks A and B are known -- without touching the data.
-  //
-  // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0),
-  // returns CRC(AB, |AB|, startA).
-  Crc Concatenate(const Crc &crc_A, const Crc &crc_B, uint64 bytes_B) const {
-    return ChangeStartValue(crc_B, bytes_B, 0 /* start_B */, crc_A);
-  }
-
-  // Returns CRC of sequence of zeroes -- without touching the data.
-  Crc CrcOfZeroes(uint64 bytes, const Crc &start) const {
-    Crc tmp = Multiply(start ^ this->canonize_, Xpow8N(bytes));
-    return (tmp ^ this->canonize_);
-  }
-
-  // Given CRC of a message, stores extra (degree + 7)/8 bytes after
-  // the message so that CRC(message+extra, start) = result.
-  // Does not change CRC start value (use ChangeStartValue for that).
-  // Returns number of stored bytes.
-  size_t StoreComplementaryCrc(void *dst,
-                               const Crc &message_crc,
-                               const Crc &result) const {
-    Crc crc0 = Multiply(result ^ this->canonize_, this->x_pow_minus_W_);
-    crc0 ^= message_crc ^ this->canonize_;
-    uint8 *d = reinterpret_cast<uint8 *>(dst);
-    for (size_t i = 0; i < this->crc_bytes_; ++i) {
-      d[i] = TO_BYTE(crc0);
-      crc0 >>= 8;
-    }
-    return this->crc_bytes_;
-  }
-
-  // Stores given CRC of a message as (degree + 7)/8 bytes filled
-  // with 0s to the right. Returns number of stored bytes.
-  // CRC of the message and stored CRC is a constant value returned
-  // by CrcOfCrc() -- it does not depend on contents of the message.
-  size_t StoreCrc(void *dst, const Crc &crc) const {
-    uint8 *d = reinterpret_cast<uint8 *>(dst);
-    Crc crc0 = crc;
-    for (size_t i = 0; i < this->crc_bytes_; ++i) {
-      d[i] = TO_BYTE(crc0);
-      crc0 >>= 8;
-    }
-    return this->crc_bytes_;
-  }
-
-  // Returns expected CRC value of CRC(Message,CRC(Message))
-  // when CRC is stored after the message. This value is fixed
-  // and does not depend on the message or CRC start value.
-  Crc CrcOfCrc() const {
-    return this->crc_of_crc_;
-  }
-
-  // Returns ((a * b) mod P) where "a" and "b" are of degree <= (D-1).
-  Crc Multiply(const Crc &aa, const Crc &bb) const {
-    Crc a = aa;
-    Crc b = bb;
-    if ((a ^ (a - 1)) < (b ^ (b - 1))) {
-      Crc temp = a;
-      a = b;
-      b = temp;
-    }
-
-    if (a == 0) {
-      return a;
-    }
-
-    Crc product = 0;
-    Crc one = this->one_;
-    for (; a != 0; a <<= 1) {
-      if ((a & one) != 0) {
-        product ^= b;
-        a ^= one;
-      }
-      b = (b >> 1) ^ this->normalize_[Downcast<Crc, size_t>(b & 1)];
-    }
-
-    return product;
-  }
-
-  // Returns ((unnorm * m) mod P) where degree of m is <= (D-1)
-  // and degree of value "unnorm" is provided explicitly.
-  Crc MultiplyUnnormalized(const Crc &unnorm, size_t degree,
-                           const Crc &m) const {
-    Crc v = unnorm;
-    Crc result = 0;
-    while (degree > this->degree_) {
-      degree -= this->degree_;
-      Crc value = v & (this->one_ | (this->one_ - 1));
-      result ^= Multiply(value, Multiply(m, XpowN(degree)));
-      v >>= this->degree_;
-    }
-    result ^= Multiply(v << (this->degree_ - degree), m);
-    return result;
-  }
-
-  // returns ((x ** n) mod P).
-  Crc XpowN(uint64 n) const {
-    Crc one = this->one_;
-    Crc result = one;
-
-    for (size_t i = 0; n != 0; ++i, n >>= 1) {
-      if (n & 1) {
-        result = Multiply(result, this->x_pow_2n_[i]);
-      }
-    }
-
-    return result;
-  }
-
-  // Returns (x ** (8 * n) mod P).
-  Crc Xpow8N(uint64 n) const {
-    return XpowN(n << 3);
-  }
-
-  // Returns remainder (A mod B) and sets *q = (A/B) of division
-  // of two polynomials:
-  //    A = dividend + dividend_x_pow_D_coef * x**degree,
-  //    B = divisor.
-  Crc Divide(const Crc &dividend0, int dividend_x_pow_D_coef,
-             const Crc &divisor0, Crc *q) const {
-    Crc divisor = divisor0;
-    Crc dividend = dividend0;
-    Crc quotient = 0;
-    Crc coef = this->one_;
-
-    while ((divisor & 1) == 0) {
-      divisor >>= 1;
-      coef >>= 1;
-    }
-
-    if (dividend_x_pow_D_coef) {
-      quotient = coef >> 1;
-      dividend ^= divisor >> 1;
-    }
-
-    Crc x_pow_degree_b = 1;
-    for (;;) {
-      if ((dividend & x_pow_degree_b) != 0) {
-        dividend ^= divisor;
-        quotient ^= coef;
-      }
-      if (coef == this->one_) {
-        break;
-      }
-      coef <<= 1;
-      x_pow_degree_b <<= 1;
-      divisor <<= 1;
-    }
-
-    *q = quotient;
-    return dividend;
-  }
-
-  // Extended Euclid's algorith -- for given A finds LCD(A, P) and
-  // value B such that (A * B) mod P = LCD(A, P).
-  Crc FindLCD(const Crc &A, Crc *B) const {
-    if (A == 0 || A == this->one_) {
-      *B = A;
-      return A;
-    }
-
-    // Actually, generating polynomial is
-    // (generating_polynomial_ + x**degree).
-    int r0_x_pow_D_coef = 1;
-    Crc r0 = this->generating_polynomial_;
-    Crc b0 = 0;
-    Crc r1 = A;
-    Crc b1 = this->one_;
-
-    for (;;) {
-      Crc q;
-      Crc r = Divide(r0, r0_x_pow_D_coef, r1, &q);
-      if (r == 0) {
-        break;
-      }
-      r0_x_pow_D_coef = 0;
-
-      r0 = r1;
-      r1 = r;
-
-      Crc b = b0 ^ Multiply(q, b1);
-      b0 = b1;
-      b1 = b;
-    }
-
-    *B = b1;
-    return r1;
-  }
-
- protected:
-  Crc canonize_;
-  Crc x_pow_2n_[sizeof(uint64) * 8];
-  Crc generating_polynomial_;
-  Crc one_;
-  Crc x_pow_minus_W_;
-  Crc crc_of_crc_;
-  Crc normalize_[2];
-  size_t crc_bytes_;
-  size_t degree_;
-} GCC_ALIGN_ATTRIBUTE(16);
-
-#pragma pack(pop)
-
-}  // namespace crcutil
-
-#endif  // CRCUTIL_GF_UTIL_H_
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// Defines GfUtil template class which implements 
+// 1. some useful operations in GF(2^n), 
+// 2. CRC helper function (e.g. concatenation of CRCs) which are 
+//    not affected by specific implemenation of CRC computation per se. 
+// 
+// Please read crc.pdf to understand how it all works. 
+ 
+#ifndef CRCUTIL_GF_UTIL_H_ 
+#define CRCUTIL_GF_UTIL_H_ 
+ 
+#include "base_types.h"   // uint8, uint64 
+#include "crc_casts.h"    // TO_BYTE() 
+#include "platform.h"     // GCC_ALIGN_ATTRIBUTE(16), SHIFT_*_SAFE 
+ 
+namespace crcutil { 
+ 
+#pragma pack(push, 16) 
+ 
+// "Crc" is the type used internally and to return values of N-bit CRC. 
+template<typename Crc> class GfUtil { 
+ public: 
+  // Initializes the tables given generating polynomial of degree (degree). 
+  // If "canonical" is true, starting CRC value and computed CRC value will be 
+  // XOR-ed with 111...111. 
+  GfUtil() {} 
+  GfUtil(const Crc &generating_polynomial, size_t degree, bool canonical) { 
+    Init(generating_polynomial, degree, canonical); 
+  } 
+  void Init(const Crc &generating_polynomial, size_t degree, bool canonical) { 
+    Crc one = 1; 
+    one <<= degree - 1; 
+    this->generating_polynomial_ = generating_polynomial; 
+    this->crc_bytes_ = (degree + 7) >> 3; 
+    this->degree_ = degree; 
+    this->one_ = one; 
+    if (canonical) { 
+      this->canonize_ = one | (one - 1); 
+    } else { 
+      this->canonize_ = 0; 
+    } 
+    this->normalize_[0] = 0; 
+    this->normalize_[1] = generating_polynomial; 
+ 
+    Crc k = one >> 1; 
+    for (size_t i = 0; i < sizeof(uint64) * 8; ++i) { 
+      this->x_pow_2n_[i] = k; 
+      k = Multiply(k, k); 
+    } 
+ 
+    this->crc_of_crc_ = Multiply(this->canonize_, 
+                                 this->one_ ^ Xpow8N((degree + 7) >> 3)); 
+ 
+    FindLCD(Xpow8N(this->crc_bytes_), &this->x_pow_minus_W_); 
+  } 
+ 
+  // Returns generating polynomial. 
+  Crc GeneratingPolynomial() const { 
+    return this->generating_polynomial_; 
+  } 
+ 
+  // Returns number of bits in CRC (degree of generating polynomial). 
+  size_t Degree() const { 
+    return this->degree_; 
+  } 
+ 
+  // Returns start/finish adjustment constant. 
+  Crc Canonize() const { 
+    return this->canonize_; 
+  } 
+ 
+  // Returns normalized value of 1. 
+  Crc One() const { 
+    return this->one_; 
+  } 
+ 
+  // Returns value of CRC(A, |A|, start_new) given known 
+  // crc=CRC(A, |A|, start_old) -- without touching the data. 
+  Crc ChangeStartValue(const Crc &crc, uint64 bytes, 
+                       const Crc &start_old, 
+                       const Crc &start_new) const { 
+    return (crc ^ Multiply(start_new ^ start_old, Xpow8N(bytes))); 
+  } 
+ 
+  // Returns CRC of concatenation of blocks A and B when CRCs 
+  // of blocks A and B are known -- without touching the data. 
+  // 
+  // To be precise, given CRC(A, |A|, startA) and CRC(B, |B|, 0), 
+  // returns CRC(AB, |AB|, startA). 
+  Crc Concatenate(const Crc &crc_A, const Crc &crc_B, uint64 bytes_B) const { 
+    return ChangeStartValue(crc_B, bytes_B, 0 /* start_B */, crc_A); 
+  } 
+ 
+  // Returns CRC of sequence of zeroes -- without touching the data. 
+  Crc CrcOfZeroes(uint64 bytes, const Crc &start) const { 
+    Crc tmp = Multiply(start ^ this->canonize_, Xpow8N(bytes)); 
+    return (tmp ^ this->canonize_); 
+  } 
+ 
+  // Given CRC of a message, stores extra (degree + 7)/8 bytes after 
+  // the message so that CRC(message+extra, start) = result. 
+  // Does not change CRC start value (use ChangeStartValue for that). 
+  // Returns number of stored bytes. 
+  size_t StoreComplementaryCrc(void *dst, 
+                               const Crc &message_crc, 
+                               const Crc &result) const { 
+    Crc crc0 = Multiply(result ^ this->canonize_, this->x_pow_minus_W_); 
+    crc0 ^= message_crc ^ this->canonize_; 
+    uint8 *d = reinterpret_cast<uint8 *>(dst); 
+    for (size_t i = 0; i < this->crc_bytes_; ++i) { 
+      d[i] = TO_BYTE(crc0); 
+      crc0 >>= 8; 
+    } 
+    return this->crc_bytes_; 
+  } 
+ 
+  // Stores given CRC of a message as (degree + 7)/8 bytes filled 
+  // with 0s to the right. Returns number of stored bytes. 
+  // CRC of the message and stored CRC is a constant value returned 
+  // by CrcOfCrc() -- it does not depend on contents of the message. 
+  size_t StoreCrc(void *dst, const Crc &crc) const { 
+    uint8 *d = reinterpret_cast<uint8 *>(dst); 
+    Crc crc0 = crc; 
+    for (size_t i = 0; i < this->crc_bytes_; ++i) { 
+      d[i] = TO_BYTE(crc0); 
+      crc0 >>= 8; 
+    } 
+    return this->crc_bytes_; 
+  } 
+ 
+  // Returns expected CRC value of CRC(Message,CRC(Message)) 
+  // when CRC is stored after the message. This value is fixed 
+  // and does not depend on the message or CRC start value. 
+  Crc CrcOfCrc() const { 
+    return this->crc_of_crc_; 
+  } 
+ 
+  // Returns ((a * b) mod P) where "a" and "b" are of degree <= (D-1). 
+  Crc Multiply(const Crc &aa, const Crc &bb) const { 
+    Crc a = aa; 
+    Crc b = bb; 
+    if ((a ^ (a - 1)) < (b ^ (b - 1))) { 
+      Crc temp = a; 
+      a = b; 
+      b = temp; 
+    } 
+ 
+    if (a == 0) { 
+      return a; 
+    } 
+ 
+    Crc product = 0; 
+    Crc one = this->one_; 
+    for (; a != 0; a <<= 1) { 
+      if ((a & one) != 0) { 
+        product ^= b; 
+        a ^= one; 
+      } 
+      b = (b >> 1) ^ this->normalize_[Downcast<Crc, size_t>(b & 1)]; 
+    } 
+ 
+    return product; 
+  } 
+ 
+  // Returns ((unnorm * m) mod P) where degree of m is <= (D-1) 
+  // and degree of value "unnorm" is provided explicitly. 
+  Crc MultiplyUnnormalized(const Crc &unnorm, size_t degree, 
+                           const Crc &m) const { 
+    Crc v = unnorm; 
+    Crc result = 0; 
+    while (degree > this->degree_) { 
+      degree -= this->degree_; 
+      Crc value = v & (this->one_ | (this->one_ - 1)); 
+      result ^= Multiply(value, Multiply(m, XpowN(degree))); 
+      v >>= this->degree_; 
+    } 
+    result ^= Multiply(v << (this->degree_ - degree), m); 
+    return result; 
+  } 
+ 
+  // returns ((x ** n) mod P). 
+  Crc XpowN(uint64 n) const { 
+    Crc one = this->one_; 
+    Crc result = one; 
+ 
+    for (size_t i = 0; n != 0; ++i, n >>= 1) { 
+      if (n & 1) { 
+        result = Multiply(result, this->x_pow_2n_[i]); 
+      } 
+    } 
+ 
+    return result; 
+  } 
+ 
+  // Returns (x ** (8 * n) mod P). 
+  Crc Xpow8N(uint64 n) const { 
+    return XpowN(n << 3); 
+  } 
+ 
+  // Returns remainder (A mod B) and sets *q = (A/B) of division 
+  // of two polynomials: 
+  //    A = dividend + dividend_x_pow_D_coef * x**degree, 
+  //    B = divisor. 
+  Crc Divide(const Crc &dividend0, int dividend_x_pow_D_coef, 
+             const Crc &divisor0, Crc *q) const { 
+    Crc divisor = divisor0; 
+    Crc dividend = dividend0; 
+    Crc quotient = 0; 
+    Crc coef = this->one_; 
+ 
+    while ((divisor & 1) == 0) { 
+      divisor >>= 1; 
+      coef >>= 1; 
+    } 
+ 
+    if (dividend_x_pow_D_coef) { 
+      quotient = coef >> 1; 
+      dividend ^= divisor >> 1; 
+    } 
+ 
+    Crc x_pow_degree_b = 1; 
+    for (;;) { 
+      if ((dividend & x_pow_degree_b) != 0) { 
+        dividend ^= divisor; 
+        quotient ^= coef; 
+      } 
+      if (coef == this->one_) { 
+        break; 
+      } 
+      coef <<= 1; 
+      x_pow_degree_b <<= 1; 
+      divisor <<= 1; 
+    } 
+ 
+    *q = quotient; 
+    return dividend; 
+  } 
+ 
+  // Extended Euclid's algorith -- for given A finds LCD(A, P) and 
+  // value B such that (A * B) mod P = LCD(A, P). 
+  Crc FindLCD(const Crc &A, Crc *B) const { 
+    if (A == 0 || A == this->one_) { 
+      *B = A; 
+      return A; 
+    } 
+ 
+    // Actually, generating polynomial is 
+    // (generating_polynomial_ + x**degree). 
+    int r0_x_pow_D_coef = 1; 
+    Crc r0 = this->generating_polynomial_; 
+    Crc b0 = 0; 
+    Crc r1 = A; 
+    Crc b1 = this->one_; 
+ 
+    for (;;) { 
+      Crc q; 
+      Crc r = Divide(r0, r0_x_pow_D_coef, r1, &q); 
+      if (r == 0) { 
+        break; 
+      } 
+      r0_x_pow_D_coef = 0; 
+ 
+      r0 = r1; 
+      r1 = r; 
+ 
+      Crc b = b0 ^ Multiply(q, b1); 
+      b0 = b1; 
+      b1 = b; 
+    } 
+ 
+    *B = b1; 
+    return r1; 
+  } 
+ 
+ protected: 
+  Crc canonize_; 
+  Crc x_pow_2n_[sizeof(uint64) * 8]; 
+  Crc generating_polynomial_; 
+  Crc one_; 
+  Crc x_pow_minus_W_; 
+  Crc crc_of_crc_; 
+  Crc normalize_[2]; 
+  size_t crc_bytes_; 
+  size_t degree_; 
+} GCC_ALIGN_ATTRIBUTE(16); 
+ 
+#pragma pack(pop) 
+ 
+}  // namespace crcutil 
+ 
+#endif  // CRCUTIL_GF_UTIL_H_ 

+ 306 - 306
contrib/libs/crcutil/interface.cc

@@ -1,307 +1,307 @@
-// Copyright 2010 Google Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This is the only file where all details of CRC implementation are buried.
-
-#include "interface.h"
-
-#include "aligned_alloc.h"
-#include "crc32c_sse4.h"
-#include "generic_crc.h"
-#include "protected_crc.h"
-#include "rolling_crc.h"
-
-// Align all CRC tables on kAlign boundary.
-// Shall be exact power of 2.
-static size_t kAlign = 4 * 1024;
-
-using namespace crcutil;
-
+// Copyright 2010 Google Inc.  All rights reserved. 
+// 
+// Licensed under the Apache License, Version 2.0 (the "License"); 
+// you may not use this file except in compliance with the License. 
+// You may obtain a copy of the License at 
+// 
+//      http://www.apache.org/licenses/LICENSE-2.0 
+// 
+// Unless required by applicable law or agreed to in writing, software 
+// distributed under the License is distributed on an "AS IS" BASIS, 
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+// See the License for the specific language governing permissions and 
+// limitations under the License. 
+ 
+// This is the only file where all details of CRC implementation are buried. 
+ 
+#include "interface.h" 
+ 
+#include "aligned_alloc.h" 
+#include "crc32c_sse4.h" 
+#include "generic_crc.h" 
+#include "protected_crc.h" 
+#include "rolling_crc.h" 
+ 
+// Align all CRC tables on kAlign boundary. 
+// Shall be exact power of 2. 
+static size_t kAlign = 4 * 1024; 
+ 
+using namespace crcutil; 
+ 
 #if (!defined(__clang__) && defined(__GNUC__))
-// Suppress 'invalid access to non-static data member ...  of NULL object'
-#undef offsetof
-#define offsetof(TYPE, MEMBER) (reinterpret_cast <size_t> \
-    ((&reinterpret_cast <const char &>( \
-        reinterpret_cast <const TYPE *>(1)->MEMBER))) - 1)
-#endif  // defined(__GNUC__)
-
-namespace crcutil_interface {
-
-template<typename CrcImplementation, typename RollingCrcImplementation>
-    class Implementation : public CRC {
- public:
-  typedef typename CrcImplementation::Crc Crc;
-  typedef Implementation<CrcImplementation, RollingCrcImplementation> Self;
-
-  Implementation(const Crc &poly,
-                 size_t degree,
-                 bool canonical,
-                 const Crc &roll_start_value,
-                 size_t roll_length)
-    : crc_(poly, degree, canonical),
-      rolling_crc_(crc_, roll_length, roll_start_value) {
-  }
-
-  static Self *Create(const Crc &poly,
-                      size_t degree,
-                      bool canonical,
-                      const Crc &roll_start_value,
-                      size_t roll_length,
-                      const void **allocated_memory) {
-    void *memory = AlignedAlloc(sizeof(Self),
-                                offsetof(Self, crc_),
-                                kAlign,
-                                allocated_memory);
-    return new(memory) Self(poly,
-                            degree,
-                            canonical,
-                            roll_start_value,
-                            roll_length);
-  }
-
-  virtual void Delete() {
-    AlignedFree(this);
-  }
-
-  void *operator new(size_t, void *p) {
-    return p;
-  }
-
-  virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo,
-                                    /* OUT */ UINT64 *hi = NULL) const {
-    SetValue(crc_.Base().GeneratingPolynomial(), lo, hi);
-  }
-
-  virtual size_t Degree() const {
-    return crc_.Base().Degree();
-  }
-
-  virtual void CanonizeValue(/* OUT */ UINT64 *lo,
-                             /* OUT */ UINT64 *hi = NULL) const {
-    SetValue(crc_.Base().Canonize(), lo, hi);
-  }
-
-  virtual void RollStartValue(/* OUT */ UINT64 *lo,
-                              /* OUT */ UINT64 *hi = NULL) const {
-    SetValue(rolling_crc_.StartValue(), lo, hi);
-  }
-
-  virtual size_t RollWindowBytes() const {
-    return rolling_crc_.WindowBytes();
-  }
-
-  virtual void SelfCheckValue(/* OUT */ UINT64 *lo,
-                              /* OUT */ UINT64 *hi = NULL) const {
-    Crc crc = crc_.CrcDefault(&crc_, sizeof(crc_), 0);
-    crc = crc_.CrcDefault(&rolling_crc_, sizeof(rolling_crc_), crc);
-    SetValue(crc, lo, hi);
-  }
-
-  virtual void Compute(const void *data,
-                       size_t bytes,
-                       /* INOUT */ UINT64 *lo,
-                       /* INOUT */ UINT64 *hi = NULL) const {
-    SetValue(crc_.CrcDefault(data, bytes, GetValue(lo, hi)), lo, hi);
-  }
-
-  virtual void RollStart(const void *data,
-                         /* INOUT */ UINT64 *lo,
-                         /* INOUT */ UINT64 *hi = NULL) const {
-    SetValue(rolling_crc_.Start(data), lo, hi);
-  }
-
-  virtual void Roll(size_t byte_out,
-                    size_t byte_in,
-                    /* INOUT */ UINT64 *lo,
-                    /* INOUT */ UINT64 *hi = NULL) const {
-    SetValue(rolling_crc_.Roll(GetValue(lo, hi), byte_out, byte_in), lo, hi);
-  }
-
-  virtual void CrcOfZeroes(UINT64 bytes,
-                           /* INOUT */ UINT64 *lo,
-                           /* INOUT */ UINT64 *hi = NULL) const {
-    SetValue(crc_.Base().CrcOfZeroes(bytes, GetValue(lo, hi)), lo, hi);
-  }
-
-  virtual void ChangeStartValue(
-      UINT64 start_old_lo, UINT64 start_old_hi,
-      UINT64 start_new_lo, UINT64 start_new_hi,
-      UINT64 bytes,
-      /* INOUT */ UINT64 *lo,
-      /* INOUT */ UINT64 *hi = NULL) const {
-    SetValue(crc_.Base().ChangeStartValue(
-                    GetValue(lo, hi),
-                    bytes,
-                    GetValue(start_old_lo, start_old_hi),
-                    GetValue(start_new_lo, start_new_hi)),
-             lo,
-             hi);
-  }
-
-  virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi,
-                           UINT64 bytes_B,
-                           /* INOUT */ UINT64* crcA_lo,
-                           /* INOUT */ UINT64* crcA_hi = NULL) const {
-    SetValue(crc_.Base().Concatenate(GetValue(crcA_lo, crcA_hi),
-                                     GetValue(crcB_lo, crcB_hi),
-                                     bytes_B),
-             crcA_lo,
-             crcA_hi);
-  }
-
-  virtual size_t StoreComplementaryCrc(
-      void *dst,
-      UINT64 message_crc_lo, UINT64 message_crc_hi,
-      UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const {
-    return crc_.Base().StoreComplementaryCrc(
-        dst,
-        GetValue(message_crc_lo, message_crc_hi),
-        GetValue(result_crc_lo, result_crc_hi));
-  }
-
-  virtual size_t StoreCrc(void *dst,
-                          UINT64 lo,
-                          UINT64 hi = 0) const {
-    return crc_.Base().StoreCrc(dst, GetValue(lo, hi));
-  }
-
-  virtual void CrcOfCrc(/* OUT */ UINT64 *lo,
-                        /* OUT */ UINT64 *hi = NULL) const {
-    SetValue(crc_.Base().CrcOfCrc(), lo, hi);
-  }
-
- private:
-  static Crc GetValue(UINT64 *lo, UINT64 *hi) {
-    if (sizeof(Crc) <= sizeof(*lo)) {
-      return CrcFromUint64<Crc>(*lo);
-    } else {
-      return CrcFromUint64<Crc>(*lo, *hi);
-    }
-  }
-
-  static Crc GetValue(UINT64 lo, UINT64 hi) {
-    return CrcFromUint64<Crc>(lo, hi);
-  }
-
-  static void SetValue(const Crc &crc, UINT64 *lo, UINT64 *hi) {
-    Uint64FromCrc<Crc>(crc,
-                       reinterpret_cast<crcutil::uint64 *>(lo),
-                       reinterpret_cast<crcutil::uint64 *>(hi));
-  }
-
-  const CrcImplementation crc_;
-  const RollingCrcImplementation rolling_crc_;
-
-  const Self &operator =(const Self &) {}
-};
-
-#if defined(_MSC_VER)
-// 'use_sse4_2' : unreferenced formal parameter
-#pragma warning(disable: 4100)
-#endif  // defined(_MSC_VER)
-
-bool CRC::IsSSE42Available() {
-#if HAVE_AMD64 || HAVE_I386
-  return Crc32cSSE4::IsSSE42Available();
-#else
-  return false;
-#endif  // HAVE_AMD64 || HAVE_I386
-}
-
-CRC::~CRC() {}
-CRC::CRC() {}
-
-CRC *CRC::Create(UINT64 poly_lo,
-                 UINT64 poly_hi,
-                 size_t degree,
-                 bool canonical,
-                 UINT64 roll_start_value_lo,
-                 UINT64 roll_start_value_hi,
-                 size_t roll_length,
-                 bool use_sse4_2,
-                 const void **allocated_memory) {
-  if (degree == 0) {
-    return NULL;
-  }
-
-  if (degree > 64) {
-#if !HAVE_SSE2
-    return NULL;
-#else
-    if (degree > 128) {
-      return NULL;
-    }
-    uint128_sse2 poly = CrcFromUint64<uint128_sse2>(poly_lo, poly_hi);
-    if (degree != 128 && (poly >> degree) != 0) {
-      return NULL;
-    }
-    uint128_sse2 roll_start_value =
-        CrcFromUint64<uint128_sse2>(roll_start_value_lo, roll_start_value_hi);
-    if (degree != 128 && (roll_start_value >> degree) != 0) {
-      return NULL;
-    }
-#if HAVE_I386
-    typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint32, 3> Crc128;
-#elif defined(__GNUC__) && GCC_VERSION_AVAILABLE(4, 5)
-    typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 6> Crc128;
-#else
-    typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 4> Crc128;
-#endif  // HAVE_I386
-    return Implementation<Crc128, RollingCrc<Crc128> >::Create(
-        poly,
-        degree,
-        canonical,
-        roll_start_value,
-        roll_length,
-        allocated_memory);
-#endif  // !HAVE_SSE2
-  }
-
-#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
-  if (use_sse4_2 &&
-      degree == Crc32cSSE4::FixedDegree() &&
-      poly_lo == Crc32cSSE4::FixedGeneratingPolynomial() &&
-      poly_hi == 0) {
-      if (roll_start_value_hi != 0 || (roll_start_value_lo >> 32) != 0) {
-        return NULL;
-      }
-    return Implementation<Crc32cSSE4, RollingCrc32cSSE4>::Create(
-        static_cast<size_t>(poly_lo),
-        degree,
-        canonical,
-        static_cast<size_t>(roll_start_value_lo),
-        static_cast<size_t>(roll_length),
-        allocated_memory);
-  }
-#endif  // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64)
-
-  if (poly_hi != 0 || (degree != 64 && (poly_lo >> degree) != 0)) {
-    return NULL;
-  }
-  if (roll_start_value_hi != 0 ||
-      (degree != 64 && (roll_start_value_lo >> degree) != 0)) {
-    return NULL;
-  }
-  typedef GenericCrc<crcutil::uint64, crcutil::uint64, crcutil::uint64, 4>
-      Crc64;
-  return Implementation<Crc64, RollingCrc<Crc64> >::Create(
-      poly_lo,
-      degree,
-      canonical,
-      roll_start_value_lo,
-      roll_length,
-      allocated_memory);
-}
-
-}  // namespace crcutil_interface
+// Suppress 'invalid access to non-static data member ...  of NULL object' 
+#undef offsetof 
+#define offsetof(TYPE, MEMBER) (reinterpret_cast <size_t> \ 
+    ((&reinterpret_cast <const char &>( \ 
+        reinterpret_cast <const TYPE *>(1)->MEMBER))) - 1) 
+#endif  // defined(__GNUC__) 
+ 
+namespace crcutil_interface { 
+ 
+template<typename CrcImplementation, typename RollingCrcImplementation> 
+    class Implementation : public CRC { 
+ public: 
+  typedef typename CrcImplementation::Crc Crc; 
+  typedef Implementation<CrcImplementation, RollingCrcImplementation> Self; 
+ 
+  Implementation(const Crc &poly, 
+                 size_t degree, 
+                 bool canonical, 
+                 const Crc &roll_start_value, 
+                 size_t roll_length) 
+    : crc_(poly, degree, canonical), 
+      rolling_crc_(crc_, roll_length, roll_start_value) { 
+  } 
+ 
+  static Self *Create(const Crc &poly, 
+                      size_t degree, 
+                      bool canonical, 
+                      const Crc &roll_start_value, 
+                      size_t roll_length, 
+                      const void **allocated_memory) { 
+    void *memory = AlignedAlloc(sizeof(Self), 
+                                offsetof(Self, crc_), 
+                                kAlign, 
+                                allocated_memory); 
+    return new(memory) Self(poly, 
+                            degree, 
+                            canonical, 
+                            roll_start_value, 
+                            roll_length); 
+  } 
+ 
+  virtual void Delete() { 
+    AlignedFree(this); 
+  } 
+ 
+  void *operator new(size_t, void *p) { 
+    return p; 
+  } 
+ 
+  virtual void GeneratingPolynomial(/* OUT */ UINT64 *lo, 
+                                    /* OUT */ UINT64 *hi = NULL) const { 
+    SetValue(crc_.Base().GeneratingPolynomial(), lo, hi); 
+  } 
+ 
+  virtual size_t Degree() const { 
+    return crc_.Base().Degree(); 
+  } 
+ 
+  virtual void CanonizeValue(/* OUT */ UINT64 *lo, 
+                             /* OUT */ UINT64 *hi = NULL) const { 
+    SetValue(crc_.Base().Canonize(), lo, hi); 
+  } 
+ 
+  virtual void RollStartValue(/* OUT */ UINT64 *lo, 
+                              /* OUT */ UINT64 *hi = NULL) const { 
+    SetValue(rolling_crc_.StartValue(), lo, hi); 
+  } 
+ 
+  virtual size_t RollWindowBytes() const { 
+    return rolling_crc_.WindowBytes(); 
+  } 
+ 
+  virtual void SelfCheckValue(/* OUT */ UINT64 *lo, 
+                              /* OUT */ UINT64 *hi = NULL) const { 
+    Crc crc = crc_.CrcDefault(&crc_, sizeof(crc_), 0); 
+    crc = crc_.CrcDefault(&rolling_crc_, sizeof(rolling_crc_), crc); 
+    SetValue(crc, lo, hi); 
+  } 
+ 
+  virtual void Compute(const void *data, 
+                       size_t bytes, 
+                       /* INOUT */ UINT64 *lo, 
+                       /* INOUT */ UINT64 *hi = NULL) const { 
+    SetValue(crc_.CrcDefault(data, bytes, GetValue(lo, hi)), lo, hi); 
+  } 
+ 
+  virtual void RollStart(const void *data, 
+                         /* INOUT */ UINT64 *lo, 
+                         /* INOUT */ UINT64 *hi = NULL) const { 
+    SetValue(rolling_crc_.Start(data), lo, hi); 
+  } 
+ 
+  virtual void Roll(size_t byte_out, 
+                    size_t byte_in, 
+                    /* INOUT */ UINT64 *lo, 
+                    /* INOUT */ UINT64 *hi = NULL) const { 
+    SetValue(rolling_crc_.Roll(GetValue(lo, hi), byte_out, byte_in), lo, hi); 
+  } 
+ 
+  virtual void CrcOfZeroes(UINT64 bytes, 
+                           /* INOUT */ UINT64 *lo, 
+                           /* INOUT */ UINT64 *hi = NULL) const { 
+    SetValue(crc_.Base().CrcOfZeroes(bytes, GetValue(lo, hi)), lo, hi); 
+  } 
+ 
+  virtual void ChangeStartValue( 
+      UINT64 start_old_lo, UINT64 start_old_hi, 
+      UINT64 start_new_lo, UINT64 start_new_hi, 
+      UINT64 bytes, 
+      /* INOUT */ UINT64 *lo, 
+      /* INOUT */ UINT64 *hi = NULL) const { 
+    SetValue(crc_.Base().ChangeStartValue( 
+                    GetValue(lo, hi), 
+                    bytes, 
+                    GetValue(start_old_lo, start_old_hi), 
+                    GetValue(start_new_lo, start_new_hi)), 
+             lo, 
+             hi); 
+  } 
+ 
+  virtual void Concatenate(UINT64 crcB_lo, UINT64 crcB_hi, 
+                           UINT64 bytes_B, 
+                           /* INOUT */ UINT64* crcA_lo, 
+                           /* INOUT */ UINT64* crcA_hi = NULL) const { 
+    SetValue(crc_.Base().Concatenate(GetValue(crcA_lo, crcA_hi), 
+                                     GetValue(crcB_lo, crcB_hi), 
+                                     bytes_B), 
+             crcA_lo, 
+             crcA_hi); 
+  } 
+ 
+  virtual size_t StoreComplementaryCrc( 
+      void *dst, 
+      UINT64 message_crc_lo, UINT64 message_crc_hi, 
+      UINT64 result_crc_lo, UINT64 result_crc_hi = 0) const { 
+    return crc_.Base().StoreComplementaryCrc( 
+        dst, 
+        GetValue(message_crc_lo, message_crc_hi), 
+        GetValue(result_crc_lo, result_crc_hi)); 
+  } 
+ 
+  virtual size_t StoreCrc(void *dst, 
+                          UINT64 lo, 
+                          UINT64 hi = 0) const { 
+    return crc_.Base().StoreCrc(dst, GetValue(lo, hi)); 
+  } 
+ 
+  virtual void CrcOfCrc(/* OUT */ UINT64 *lo, 
+                        /* OUT */ UINT64 *hi = NULL) const { 
+    SetValue(crc_.Base().CrcOfCrc(), lo, hi); 
+  } 
+ 
+ private: 
+  static Crc GetValue(UINT64 *lo, UINT64 *hi) { 
+    if (sizeof(Crc) <= sizeof(*lo)) { 
+      return CrcFromUint64<Crc>(*lo); 
+    } else { 
+      return CrcFromUint64<Crc>(*lo, *hi); 
+    } 
+  } 
+ 
+  static Crc GetValue(UINT64 lo, UINT64 hi) { 
+    return CrcFromUint64<Crc>(lo, hi); 
+  } 
+ 
+  static void SetValue(const Crc &crc, UINT64 *lo, UINT64 *hi) { 
+    Uint64FromCrc<Crc>(crc, 
+                       reinterpret_cast<crcutil::uint64 *>(lo), 
+                       reinterpret_cast<crcutil::uint64 *>(hi)); 
+  } 
+ 
+  const CrcImplementation crc_; 
+  const RollingCrcImplementation rolling_crc_; 
+ 
+  const Self &operator =(const Self &) {} 
+}; 
+ 
+#if defined(_MSC_VER) 
+// 'use_sse4_2' : unreferenced formal parameter 
+#pragma warning(disable: 4100) 
+#endif  // defined(_MSC_VER) 
+ 
+bool CRC::IsSSE42Available() { 
+#if HAVE_AMD64 || HAVE_I386 
+  return Crc32cSSE4::IsSSE42Available(); 
+#else 
+  return false; 
+#endif  // HAVE_AMD64 || HAVE_I386 
+} 
+ 
+CRC::~CRC() {} 
+CRC::CRC() {} 
+ 
+CRC *CRC::Create(UINT64 poly_lo, 
+                 UINT64 poly_hi, 
+                 size_t degree, 
+                 bool canonical, 
+                 UINT64 roll_start_value_lo, 
+                 UINT64 roll_start_value_hi, 
+                 size_t roll_length, 
+                 bool use_sse4_2, 
+                 const void **allocated_memory) { 
+  if (degree == 0) { 
+    return NULL; 
+  } 
+ 
+  if (degree > 64) { 
+#if !HAVE_SSE2 
+    return NULL; 
+#else 
+    if (degree > 128) { 
+      return NULL; 
+    } 
+    uint128_sse2 poly = CrcFromUint64<uint128_sse2>(poly_lo, poly_hi); 
+    if (degree != 128 && (poly >> degree) != 0) { 
+      return NULL; 
+    } 
+    uint128_sse2 roll_start_value = 
+        CrcFromUint64<uint128_sse2>(roll_start_value_lo, roll_start_value_hi); 
+    if (degree != 128 && (roll_start_value >> degree) != 0) { 
+      return NULL; 
+    } 
+#if HAVE_I386 
+    typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint32, 3> Crc128; 
+#elif defined(__GNUC__) && GCC_VERSION_AVAILABLE(4, 5) 
+    typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 6> Crc128; 
+#else 
+    typedef GenericCrc<uint128_sse2, uint128_sse2, crcutil::uint64, 4> Crc128; 
+#endif  // HAVE_I386 
+    return Implementation<Crc128, RollingCrc<Crc128> >::Create( 
+        poly, 
+        degree, 
+        canonical, 
+        roll_start_value, 
+        roll_length, 
+        allocated_memory); 
+#endif  // !HAVE_SSE2 
+  } 
+ 
+#if CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) 
+  if (use_sse4_2 && 
+      degree == Crc32cSSE4::FixedDegree() && 
+      poly_lo == Crc32cSSE4::FixedGeneratingPolynomial() && 
+      poly_hi == 0) { 
+      if (roll_start_value_hi != 0 || (roll_start_value_lo >> 32) != 0) { 
+        return NULL; 
+      } 
+    return Implementation<Crc32cSSE4, RollingCrc32cSSE4>::Create( 
+        static_cast<size_t>(poly_lo), 
+        degree, 
+        canonical, 
+        static_cast<size_t>(roll_start_value_lo), 
+        static_cast<size_t>(roll_length), 
+        allocated_memory); 
+  } 
+#endif  // CRCUTIL_USE_MM_CRC32 && (HAVE_I386 || HAVE_AMD64) 
+ 
+  if (poly_hi != 0 || (degree != 64 && (poly_lo >> degree) != 0)) { 
+    return NULL; 
+  } 
+  if (roll_start_value_hi != 0 || 
+      (degree != 64 && (roll_start_value_lo >> degree) != 0)) { 
+    return NULL; 
+  } 
+  typedef GenericCrc<crcutil::uint64, crcutil::uint64, crcutil::uint64, 4> 
+      Crc64; 
+  return Implementation<Crc64, RollingCrc<Crc64> >::Create( 
+      poly_lo, 
+      degree, 
+      canonical, 
+      roll_start_value_lo, 
+      roll_length, 
+      allocated_memory); 
+} 
+ 
+}  // namespace crcutil_interface 

Some files were not shown because too many files changed in this diff