Browse Source

Update contrib/libs/snappy to 1.1.9

ref:8e094c2e0f44b866d354257c6a902b6d4394b8f0
thegeorg 2 years ago
parent
commit
2037874aa0

+ 4 - 4
contrib/libs/snappy/.yandex_meta/devtools.licenses.report

@@ -96,7 +96,7 @@ BELONGS ya.make
         Match type      : NOTICE
         Links           : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/generic-cla.LICENSE
     Files with this license:
-        CONTRIBUTING.md [6:6]
+        CONTRIBUTING.md [26:26]
 
 KEEP     BSD-3-Clause         6aa235708ac9f5dd8e5c6ac415fc5837
 BELONGS ya.make
@@ -143,7 +143,7 @@ BELONGS ya.make
         Match type      : NOTICE
         Links           : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
     Files with this license:
-        NEWS [178:178]
+        NEWS [184:184]
 
 SKIP     LicenseRef-scancode-unknown-license-reference bfebd3ac57e8aa2b8b978019ed709cd1
 BELONGS ya.make
@@ -156,7 +156,7 @@ BELONGS ya.make
         Match type      : INTRO
         Links           : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unknown-license-reference.LICENSE
     Files with this license:
-        README.md [23:23]
+        README.md [25:25]
 
 SKIP     LicenseRef-scancode-generic-cla d72fcd21b18e44b666a94e6225ed43eb
 BELONGS ya.make
@@ -169,7 +169,7 @@ BELONGS ya.make
         Match type      : NOTICE
         Links           : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/generic-cla.LICENSE
     Files with this license:
-        CONTRIBUTING.md [8:9]
+        CONTRIBUTING.md [28:29]
 
 KEEP     BSD-3-Clause         f8141230e736a81272884d33c51c5ad4
 BELONGS ya.make

+ 20 - 0
contrib/libs/snappy/CONTRIBUTING.md

@@ -3,6 +3,26 @@
 We'd love to accept your patches and contributions to this project. There are
 just a few small guidelines you need to follow.
 
+## Project Goals
+
+In addition to the aims listed at the top of the [README](README.md) Snappy
+explicitly supports the following:
+
+1. C++11
+2. Clang (gcc and MSVC are best-effort).
+3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
+  1. [x86](https://en.wikipedia.org/wiki/X86)
+  2. [x86-64](https://en.wikipedia.org/wiki/X86-64)
+  3. ARMv7 (32-bit)
+  4. ARMv8 (AArch64)
+4. Supports only the Snappy compression scheme as described in
+  [format_description.txt](format_description.txt).
+5. CMake for building
+
+Changes adding features or dependencies outside of the core area of focus listed
+above might not be accepted. If in doubt post a message to the
+[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
+
 ## Contributor License Agreement
 
 Contributions to this project must be accompanied by a Contributor License

+ 6 - 0
contrib/libs/snappy/NEWS

@@ -1,3 +1,9 @@
+Snappy v1.1.9, May 4th 2021:
+
+  * Performance improvements.
+
+  * Google Test and Google Benchmark are now bundled in third_party/.
+
 Snappy v1.1.8, January 15th 2020:
 
   * Small performance improvements.

+ 23 - 31
contrib/libs/snappy/README.md

@@ -1,5 +1,7 @@
 Snappy, a fast compressor/decompressor.
 
+[![Build Status](https://travis-ci.org/google/snappy.svg?branch=master)](https://travis-ci.org/google/snappy)
+[![Build status](https://ci.appveyor.com/api/projects/status/t9nubcqkwo8rw8yn/branch/master?svg=true)](https://ci.appveyor.com/project/pwnall/leveldb)
 
 Introduction
 ============
@@ -69,6 +71,7 @@ You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt)
 or later to build:
 
 ```bash
+git submodule update --init
 mkdir build
 cd build && cmake ../ && make
 ```
@@ -107,42 +110,31 @@ information.
 Tests and benchmarks
 ====================
 
-When you compile Snappy, snappy_unittest is compiled in addition to the
-library itself. You do not need it to use the compressor from your own library,
-but it contains several useful components for Snappy development.
+When you compile Snappy, the following binaries are compiled in addition to the
+library itself. You do not need them to use the compressor from your own
+library, but they are useful for Snappy development.
 
-First of all, it contains unit tests, verifying correctness on your machine in
-various scenarios. If you want to change or optimize Snappy, please run the
-tests to verify you have not broken anything. Note that if you have the
-Google Test library installed, unit test behavior (especially failures) will be
-significantly more user-friendly. You can find Google Test at
+* `snappy_benchmark` contains microbenchmarks used to tune compression and
+  decompression performance.
+* `snappy_unittests` contains unit tests, verifying correctness on your machine
+  in various scenarios.
+* `snappy_test_tool` can benchmark Snappy against a few other compression
+  libraries (zlib, LZO, LZF, and QuickLZ), if they were detected at configure
+  time. To benchmark using a given file, give the compression algorithm you want
+  to test Snappy against (e.g. --zlib) and then a list of one or more file names
+  on the command line.
 
-  https://github.com/google/googletest
+If you want to change or optimize Snappy, please run the tests and benchmarks to
+verify you have not broken anything.
 
-You probably also want the gflags library for handling of command-line flags;
-you can find it at
-
-  https://gflags.github.io/gflags/
-
-In addition to the unit tests, snappy contains microbenchmarks used to
-tune compression and decompression performance. These are automatically run
-before the unit tests, but you can disable them using the flag
---run_microbenchmarks=false if you have gflags installed (otherwise you will
-need to edit the source).
-
-Finally, snappy can benchmark Snappy against a few other compression libraries
-(zlib, LZO, LZF, and QuickLZ), if they were detected at configure time.
-To benchmark using a given file, give the compression algorithm you want to test
-Snappy against (e.g. --zlib) and then a list of one or more file names on the
-command line. The testdata/ directory contains the files used by the
-microbenchmark, which should provide a reasonably balanced starting point for
-benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they
-are used to verify correctness in the presence of corrupted data in the unit
-test.)
+The testdata/ directory contains the files used by the microbenchmarks, which
+should provide a reasonably balanced starting point for benchmarking. (Note that
+baddata[1-3].snappy are not intended as benchmarks; they are used to verify
+correctness in the presence of corrupted data in the unit test.)
 
 
 Contact
 =======
 
-Snappy is distributed through GitHub. For the latest version, a bug tracker,
-and other information, see https://github.com/google/snappy.
+Snappy is distributed through GitHub. For the latest version and other
+information, see https://github.com/google/snappy.

+ 5 - 11
contrib/libs/snappy/config-linux.h

@@ -1,35 +1,29 @@
 #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
 #define THIRD_PARTY_SNAPPY_OPENSOURCE_CMAKE_CONFIG_H_
 
+/* Define to 1 if the compiler supports __attribute__((always_inline)). */
+/* #undef HAVE_ATTRIBUTE_ALWAYS_INLINE */
+
 /* Define to 1 if the compiler supports __builtin_ctz and friends. */
 #define HAVE_BUILTIN_CTZ 1
 
 /* Define to 1 if the compiler supports __builtin_expect. */
 #define HAVE_BUILTIN_EXPECT 1
 
-/* Define to 1 if you have the <byteswap.h> header file. */
-#define HAVE_BYTESWAP_H 1
-
 /* Define to 1 if you have a definition for mmap() in <sys/mman.h>. */
 #define HAVE_FUNC_MMAP 1
 
 /* Define to 1 if you have a definition for sysconf() in <unistd.h>. */
 #define HAVE_FUNC_SYSCONF 1
 
-/* Define to 1 to use the gflags package for command-line parsing. */
-/* #undef HAVE_GFLAGS */
-
-/* Define to 1 if you have Google Test. */
-/* #undef HAVE_GTEST */
-
 /* Define to 1 if you have the `lzo2' library (-llzo2). */
 /* #undef HAVE_LIBLZO2 */
 
 /* Define to 1 if you have the `z' library (-lz). */
 /* #undef HAVE_LIBZ */
 
-/* Define to 1 if you have the <sys/endian.h> header file. */
-/* #undef HAVE_SYS_ENDIAN_H */
+/* Define to 1 if you have the `lz4' library (-llz4). */
+/* #undef HAVE_LIBLZ4 */
 
 /* Define to 1 if you have the <sys/mman.h> header file. */
 #define HAVE_SYS_MMAN_H 1

+ 110 - 24
contrib/libs/snappy/snappy-internal.h

@@ -46,16 +46,16 @@ class WorkingMemory {
   // Allocates and clears a hash table using memory in "*this",
   // stores the number of buckets in "*table_size" and returns a pointer to
   // the base of the hash table.
-  uint16* GetHashTable(size_t fragment_size, int* table_size) const;
+  uint16_t* GetHashTable(size_t fragment_size, int* table_size) const;
   char* GetScratchInput() const { return input_; }
   char* GetScratchOutput() const { return output_; }
 
  private:
-  char* mem_;      // the allocated memory, never nullptr
-  size_t size_;    // the size of the allocated memory, never 0
-  uint16* table_;  // the pointer to the hashtable
-  char* input_;    // the pointer to the input scratch buffer
-  char* output_;   // the pointer to the output scratch buffer
+  char* mem_;        // the allocated memory, never nullptr
+  size_t size_;      // the size of the allocated memory, never 0
+  uint16_t* table_;  // the pointer to the hashtable
+  char* input_;      // the pointer to the input scratch buffer
+  char* output_;     // the pointer to the output scratch buffer
 
   // No copying
   WorkingMemory(const WorkingMemory&);
@@ -76,7 +76,7 @@ class WorkingMemory {
 char* CompressFragment(const char* input,
                        size_t input_length,
                        char* op,
-                       uint16* table,
+                       uint16_t* table,
                        const int table_size);
 
 // Find the largest n such that
@@ -89,12 +89,18 @@ char* CompressFragment(const char* input,
 // Does not read *(s1 + (s2_limit - s2)) or beyond.
 // Requires that s2_limit >= s2.
 //
+// In addition populate *data with the next 5 bytes from the end of the match.
+// This is only done if 8 bytes are available (s2_limit - s2 >= 8). The point is
+// that on some arch's this can be done faster in this routine than subsequent
+// loading from s2 + n.
+//
 // Separate implementation for 64-bit, little-endian cpus.
 #if !defined(SNAPPY_IS_BIG_ENDIAN) && \
-    (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM))
+    (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || defined(ARCH_ARM))
 static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
                                                       const char* s2,
-                                                      const char* s2_limit) {
+                                                      const char* s2_limit,
+                                                      uint64_t* data) {
   assert(s2_limit >= s2);
   size_t matched = 0;
 
@@ -103,12 +109,71 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
   // uncommon code paths that determine, without extra effort, whether the match
   // length is less than 8.  In short, we are hoping to avoid a conditional
   // branch, and perhaps get better code layout from the C++ compiler.
-  if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
-    uint64 a1 = UNALIGNED_LOAD64(s1);
-    uint64 a2 = UNALIGNED_LOAD64(s2);
-    if (a1 != a2) {
-      return std::pair<size_t, bool>(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3,
-                                     true);
+  if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
+    uint64_t a1 = UNALIGNED_LOAD64(s1);
+    uint64_t a2 = UNALIGNED_LOAD64(s2);
+    if (SNAPPY_PREDICT_TRUE(a1 != a2)) {
+      // This code is critical for performance. The reason is that it determines
+      // how much to advance `ip` (s2). This obviously depends on both the loads
+      // from the `candidate` (s1) and `ip`. Furthermore the next `candidate`
+      // depends on the advanced `ip` calculated here through a load, hash and
+      // new candidate hash lookup (a lot of cycles). This makes s1 (ie.
+      // `candidate`) the variable that limits throughput. This is the reason we
+      // go through hoops to have this function update `data` for the next iter.
+      // The straightforward code would use *data, given by
+      //
+      // *data = UNALIGNED_LOAD64(s2 + matched_bytes) (Latency of 5 cycles),
+      //
+      // as input for the hash table lookup to find next candidate. However
+      // this forces the load on the data dependency chain of s1, because
+      // matched_bytes directly depends on s1. However matched_bytes is 0..7, so
+      // we can also calculate *data by
+      //
+      // *data = AlignRight(UNALIGNED_LOAD64(s2), UNALIGNED_LOAD64(s2 + 8),
+      //                    matched_bytes);
+      //
+      // The loads do not depend on s1 anymore and are thus off the bottleneck.
+      // The straightforward implementation on x86_64 would be to use
+      //
+      // shrd rax, rdx, cl  (cl being matched_bytes * 8)
+      //
+      // unfortunately shrd with a variable shift has a 4 cycle latency. So this
+      // only wins 1 cycle. The BMI2 shrx instruction is a 1 cycle variable
+      // shift instruction but can only shift 64 bits. If we focus on just
+      // obtaining the least significant 4 bytes, we can obtain this by
+      //
+      // *data = ConditionalMove(matched_bytes < 4, UNALIGNED_LOAD64(s2),
+      //     UNALIGNED_LOAD64(s2 + 4) >> ((matched_bytes & 3) * 8);
+      //
+      // Writen like above this is not a big win, the conditional move would be
+      // a cmp followed by a cmov (2 cycles) followed by a shift (1 cycle).
+      // However matched_bytes < 4 is equal to
+      // static_cast<uint32_t>(xorval) != 0. Writen that way, the conditional
+      // move (2 cycles) can execute in parallel with FindLSBSetNonZero64
+      // (tzcnt), which takes 3 cycles.
+      uint64_t xorval = a1 ^ a2;
+      int shift = Bits::FindLSBSetNonZero64(xorval);
+      size_t matched_bytes = shift >> 3;
+#ifndef __x86_64__
+      *data = UNALIGNED_LOAD64(s2 + matched_bytes);
+#else
+      // Ideally this would just be
+      //
+      // a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
+      //
+      // However clang correctly infers that the above statement participates on
+      // a critical data dependency chain and thus, unfortunately, refuses to
+      // use a conditional move (it's tuned to cut data dependencies). In this
+      // case there is a longer parallel chain anyway AND this will be fairly
+      // unpredictable.
+      uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
+      asm("testl %k2, %k2\n\t"
+          "cmovzq %1, %0\n\t"
+          : "+r"(a2)
+          : "r"(a3), "r"(xorval));
+      *data = a2 >> (shift & (3 * 8));
+#endif
+      return std::pair<size_t, bool>(matched_bytes, true);
     } else {
       matched = 8;
       s2 += 8;
@@ -119,14 +184,27 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
   // time until we find a 64-bit block that doesn't match; then we find
   // the first non-matching bit and use that to calculate the total
   // length of the match.
-  while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) {
-    if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
+  while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 16)) {
+    uint64_t a1 = UNALIGNED_LOAD64(s1 + matched);
+    uint64_t a2 = UNALIGNED_LOAD64(s2);
+    if (a1 == a2) {
       s2 += 8;
       matched += 8;
     } else {
-      uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
-      int matching_bits = Bits::FindLSBSetNonZero64(x);
-      matched += matching_bits >> 3;
+      uint64_t xorval = a1 ^ a2;
+      int shift = Bits::FindLSBSetNonZero64(xorval);
+      size_t matched_bytes = shift >> 3;
+#ifndef __x86_64__
+      *data = UNALIGNED_LOAD64(s2 + matched_bytes);
+#else
+      uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
+      asm("testl %k2, %k2\n\t"
+          "cmovzq %1, %0\n\t"
+          : "+r"(a2)
+          : "r"(a3), "r"(xorval));
+      *data = a2 >> (shift & (3 * 8));
+#endif
+      matched += matched_bytes;
       assert(matched >= 8);
       return std::pair<size_t, bool>(matched, false);
     }
@@ -136,6 +214,9 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
       ++s2;
       ++matched;
     } else {
+      if (s2 <= s2_limit - 8) {
+        *data = UNALIGNED_LOAD64(s2);
+      }
       return std::pair<size_t, bool>(matched, matched < 8);
     }
   }
@@ -144,7 +225,8 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
 #else
 static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
                                                       const char* s2,
-                                                      const char* s2_limit) {
+                                                      const char* s2_limit,
+                                                      uint64_t* data) {
   // Implementation based on the x86-64 version, above.
   assert(s2_limit >= s2);
   int matched = 0;
@@ -155,15 +237,17 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
     matched += 4;
   }
   if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
-    uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
+    uint32_t x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
     int matching_bits = Bits::FindLSBSetNonZero(x);
     matched += matching_bits >> 3;
+    s2 += matching_bits >> 3;
   } else {
     while ((s2 < s2_limit) && (s1[matched] == *s2)) {
       ++s2;
       ++matched;
     }
   }
+  if (s2 <= s2_limit - 8) *data = LittleEndian::Load64(s2);
   return std::pair<size_t, bool>(matched, matched < 8);
 }
 #endif
@@ -190,7 +274,8 @@ static const int kMaximumTagLength = 5;  // COPY_4_BYTE_OFFSET plus the actual o
 // because of efficiency reasons:
 //      (1) Extracting a byte is faster than a bit-field
 //      (2) It properly aligns copy offset so we do not need a <<8
-static const uint16 char_table[256] = {
+static constexpr uint16_t char_table[256] = {
+    // clang-format off
   0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
   0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
   0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
@@ -222,7 +307,8 @@ static const uint16 char_table[256] = {
   0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
   0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
   0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
-  0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
+  0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040,
+    // clang-format on
 };
 
 }  // end namespace internal

+ 26 - 9
contrib/libs/snappy/snappy-sinksource.cc

@@ -26,23 +26,31 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-#include <string.h>
+#include <stddef.h>
+#include <cstring>
 
 #include "snappy-sinksource.h"
 
 namespace snappy {
 
-Source::~Source() { }
+Source::~Source() = default;
 
-Sink::~Sink() { }
+Sink::~Sink() = default;
 
 char* Sink::GetAppendBuffer(size_t length, char* scratch) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)length;
+
   return scratch;
 }
 
 char* Sink::GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
       size_t scratch_size, size_t* allocated_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)min_size;
+  (void)desired_size_hint;
+
   *allocated_size = scratch_size;
   return scratch;
 }
@@ -55,7 +63,7 @@ void Sink::AppendAndTakeOwnership(
   (*deleter)(deleter_arg, bytes, n);
 }
 
-ByteArraySource::~ByteArraySource() { }
+ByteArraySource::~ByteArraySource() = default;
 
 size_t ByteArraySource::Available() const { return left_; }
 
@@ -74,22 +82,26 @@ UncheckedByteArraySink::~UncheckedByteArraySink() { }
 void UncheckedByteArraySink::Append(const char* data, size_t n) {
   // Do no copying if the caller filled in the result of GetAppendBuffer()
   if (data != dest_) {
-    memcpy(dest_, data, n);
+    std::memcpy(dest_, data, n);
   }
   dest_ += n;
 }
 
 char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)len;
+  (void)scratch;
+
   return dest_;
 }
 
 void UncheckedByteArraySink::AppendAndTakeOwnership(
-    char* data, size_t n,
+    char* bytes, size_t n,
     void (*deleter)(void*, const char*, size_t),
     void *deleter_arg) {
-  if (data != dest_) {
-    memcpy(dest_, data, n);
-    (*deleter)(deleter_arg, data, n);
+  if (bytes != dest_) {
+    std::memcpy(dest_, bytes, n);
+    (*deleter)(deleter_arg, bytes, n);
   }
   dest_ += n;
 }
@@ -97,6 +109,11 @@ void UncheckedByteArraySink::AppendAndTakeOwnership(
 char* UncheckedByteArraySink::GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
       size_t scratch_size, size_t* allocated_size) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)min_size;
+  (void)scratch;
+  (void)scratch_size;
+
   *allocated_size = desired_size_hint;
   return dest_;
 }

+ 11 - 11
contrib/libs/snappy/snappy-sinksource.h

@@ -146,10 +146,10 @@ class Source {
 class ByteArraySource : public Source {
  public:
   ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
-  virtual ~ByteArraySource();
-  virtual size_t Available() const;
-  virtual const char* Peek(size_t* len);
-  virtual void Skip(size_t n);
+  ~ByteArraySource() override;
+  size_t Available() const override;
+  const char* Peek(size_t* len) override;
+  void Skip(size_t n) override;
  private:
   const char* ptr_;
   size_t left_;
@@ -159,15 +159,15 @@ class ByteArraySource : public Source {
 class UncheckedByteArraySink : public Sink {
  public:
   explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
-  virtual ~UncheckedByteArraySink();
-  virtual void Append(const char* data, size_t n);
-  virtual char* GetAppendBuffer(size_t len, char* scratch);
-  virtual char* GetAppendBufferVariable(
+  ~UncheckedByteArraySink() override;
+  void Append(const char* data, size_t n) override;
+  char* GetAppendBuffer(size_t len, char* scratch) override;
+  char* GetAppendBufferVariable(
       size_t min_size, size_t desired_size_hint, char* scratch,
-      size_t scratch_size, size_t* allocated_size);
-  virtual void AppendAndTakeOwnership(
+      size_t scratch_size, size_t* allocated_size) override;
+  void AppendAndTakeOwnership(
       char* bytes, size_t n, void (*deleter)(void*, const char*, size_t),
-      void *deleter_arg);
+      void *deleter_arg) override;
 
   // Return the current output pointer so that a caller can see how
   // many bytes were produced.

+ 1 - 1
contrib/libs/snappy/snappy-stubs-internal.cc

@@ -33,7 +33,7 @@
 
 namespace snappy {
 
-void Varint::Append32(std::string* s, uint32 value) {
+void Varint::Append32(std::string* s, uint32_t value) {
   char buf[Varint::kMax32];
   const char* p = Varint::Encode32(buf, value);
   s->append(buf, p - buf);

+ 185 - 299
contrib/libs/snappy/snappy-stubs-internal.h

@@ -35,11 +35,13 @@
 #include "config.h"
 #endif
 
-#include <string>
+#include <stdint.h>
 
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <string>
 
 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
@@ -67,19 +69,11 @@
 
 #include "snappy-stubs-public.h"
 
-#if defined(__x86_64__)
-
-// Enable 64-bit optimized versions of some routines.
-#define ARCH_K8 1
-
-#elif defined(__ppc64__)
-
+// Used to enable 64-bit optimized versions of some routines.
+#if defined(__PPC64__) || defined(__powerpc64__)
 #define ARCH_PPC 1
-
-#elif defined(__aarch64__)
-
+#elif defined(__aarch64__) || defined(_M_ARM64)
 #define ARCH_ARM 1
-
 #endif
 
 // Needed by OS X, among others.
@@ -93,7 +87,7 @@
 #ifdef ARRAYSIZE
 #undef ARRAYSIZE
 #endif
-#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
+#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))}
 
 // Static prediction hints.
 #ifdef HAVE_BUILTIN_EXPECT
@@ -104,212 +98,66 @@
 #define SNAPPY_PREDICT_TRUE(x) x
 #endif
 
-// This is only used for recomputing the tag byte table used during
-// decompression; for simplicity we just remove it from the open-source
-// version (anyone who wants to regenerate it can just do the call
-// themselves within main()).
-#define DEFINE_bool(flag_name, default_value, description) \
-  bool FLAGS_ ## flag_name = default_value
-#define DECLARE_bool(flag_name) \
-  extern bool FLAGS_ ## flag_name
-
-namespace snappy {
-
-static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
-static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
-
-// Potentially unaligned loads and stores.
-
-// x86, PowerPC, and ARM64 can simply do these loads and stores native.
-
-#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
-    defined(__aarch64__)
-
-#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
-#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
-#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
-
-#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
-#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
-#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
-
-// ARMv7 and newer support native unaligned accesses, but only of 16-bit
-// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
-// do an unaligned read and rotate the words around a bit, or do the reads very
-// slowly (trip through kernel mode). There's no simple #define that says just
-// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
-// sub-architectures.
-//
-// This is a mess, but there's not much we can do about it.
-//
-// To further complicate matters, only LDR instructions (single reads) are
-// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
-// explicitly tell the compiler that these accesses can be unaligned, it can and
-// will combine accesses. On armcc, the way to signal this is done by accessing
-// through the type (uint32 __packed *), but GCC has no such attribute
-// (it ignores __attribute__((packed)) on individual variables). However,
-// we can tell it that a _struct_ is unaligned, which has the same effect,
-// so we do that.
-
-#elif defined(__arm__) && \
-      !defined(__ARM_ARCH_4__) && \
-      !defined(__ARM_ARCH_4T__) && \
-      !defined(__ARM_ARCH_5__) && \
-      !defined(__ARM_ARCH_5T__) && \
-      !defined(__ARM_ARCH_5TE__) && \
-      !defined(__ARM_ARCH_5TEJ__) && \
-      !defined(__ARM_ARCH_6__) && \
-      !defined(__ARM_ARCH_6J__) && \
-      !defined(__ARM_ARCH_6K__) && \
-      !defined(__ARM_ARCH_6Z__) && \
-      !defined(__ARM_ARCH_6ZK__) && \
-      !defined(__ARM_ARCH_6T2__)
-
-#if __GNUC__
-#define ATTRIBUTE_PACKED __attribute__((__packed__))
+// Inlining hints.
+#ifdef HAVE_ATTRIBUTE_ALWAYS_INLINE
+#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
 #else
-#define ATTRIBUTE_PACKED
+#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
 #endif
 
-namespace base {
-namespace internal {
-
-struct Unaligned16Struct {
-  uint16 value;
-  uint8 dummy;  // To make the size non-power-of-two.
-} ATTRIBUTE_PACKED;
-
-struct Unaligned32Struct {
-  uint32 value;
-  uint8 dummy;  // To make the size non-power-of-two.
-} ATTRIBUTE_PACKED;
-
-}  // namespace internal
-}  // namespace base
-
-#define UNALIGNED_LOAD16(_p) \
-    ((reinterpret_cast<const ::snappy::base::internal::Unaligned16Struct *>(_p))->value)
-#define UNALIGNED_LOAD32(_p) \
-    ((reinterpret_cast<const ::snappy::base::internal::Unaligned32Struct *>(_p))->value)
-
-#define UNALIGNED_STORE16(_p, _val) \
-    ((reinterpret_cast< ::snappy::base::internal::Unaligned16Struct *>(_p))->value = \
-         (_val))
-#define UNALIGNED_STORE32(_p, _val) \
-    ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \
-         (_val))
-
-// TODO: NEON supports unaligned 64-bit loads and stores.
-// See if that would be more efficient on platforms supporting it,
-// at least for copies.
-
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UNALIGNED_STORE64(void *p, uint64 v) {
-  memcpy(p, &v, sizeof v);
-}
+// Stubbed version of ABSL_FLAG.
+//
+// In the open source version, flags can only be changed at compile time.
+#define SNAPPY_FLAG(flag_type, flag_name, default_value, help) \
+  flag_type FLAGS_ ## flag_name = default_value
 
-#else
+namespace snappy {
 
-// These functions are provided for architectures that don't support
-// unaligned loads and stores.
+// Stubbed version of absl::GetFlag().
+template <typename T>
+inline T GetFlag(T flag) { return flag; }
 
-inline uint16 UNALIGNED_LOAD16(const void *p) {
-  uint16 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
+static const uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
+static const int64_t kint64max = std::numeric_limits<int64_t>::max();
 
-inline uint32 UNALIGNED_LOAD32(const void *p) {
-  uint32 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint64 UNALIGNED_LOAD64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
+// Potentially unaligned loads and stores.
 
-inline void UNALIGNED_STORE16(void *p, uint16 v) {
-  memcpy(p, &v, sizeof v);
+inline uint16_t UNALIGNED_LOAD16(const void *p) {
+  // Compiles to a single movzx/ldrh on clang/gcc/msvc.
+  uint16_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
 }
 
-inline void UNALIGNED_STORE32(void *p, uint32 v) {
-  memcpy(p, &v, sizeof v);
+inline uint32_t UNALIGNED_LOAD32(const void *p) {
+  // Compiles to a single mov/ldr on clang/gcc/msvc.
+  uint32_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
 }
 
-inline void UNALIGNED_STORE64(void *p, uint64 v) {
-  memcpy(p, &v, sizeof v);
+inline uint64_t UNALIGNED_LOAD64(const void *p) {
+  // Compiles to a single mov/ldr on clang/gcc/msvc.
+  uint64_t v;
+  std::memcpy(&v, p, sizeof(v));
+  return v;
 }
 
-#endif
-
-// The following guarantees declaration of the byte swap functions.
-#if defined(SNAPPY_IS_BIG_ENDIAN)
-
-#ifdef HAVE_SYS_BYTEORDER_H
-#include <sys/byteorder.h>
-#endif
-
-#ifdef HAVE_SYS_ENDIAN_H
-#include <sys/endian.h>
-#endif
-
-#ifdef _MSC_VER
-#include <stdlib.h>
-#define bswap_16(x) _byteswap_ushort(x)
-#define bswap_32(x) _byteswap_ulong(x)
-#define bswap_64(x) _byteswap_uint64(x)
-
-#elif defined(__APPLE__)
-// Mac OS X / Darwin features
-#include <libkern/OSByteOrder.h>
-#define bswap_16(x) OSSwapInt16(x)
-#define bswap_32(x) OSSwapInt32(x)
-#define bswap_64(x) OSSwapInt64(x)
-
-#elif defined(HAVE_BYTESWAP_H)
-#include <byteswap.h>
-
-#elif defined(bswap32)
-// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
-#define bswap_16(x) bswap16(x)
-#define bswap_32(x) bswap32(x)
-#define bswap_64(x) bswap64(x)
-
-#elif defined(BSWAP_64)
-// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
-#define bswap_16(x) BSWAP_16(x)
-#define bswap_32(x) BSWAP_32(x)
-#define bswap_64(x) BSWAP_64(x)
-
-#else
-
-inline uint16 bswap_16(uint16 x) {
-  return (x << 8) | (x >> 8);
+inline void UNALIGNED_STORE16(void *p, uint16_t v) {
+  // Compiles to a single mov/strh on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
 }
 
-inline uint32 bswap_32(uint32 x) {
-  x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
-  return (x >> 16) | (x << 16);
+inline void UNALIGNED_STORE32(void *p, uint32_t v) {
+  // Compiles to a single mov/str on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
 }
 
-inline uint64 bswap_64(uint64 x) {
-  x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
-  x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
-  return (x >> 32) | (x << 32);
+inline void UNALIGNED_STORE64(void *p, uint64_t v) {
+  // Compiles to a single mov/str on clang/gcc/msvc.
+  std::memcpy(p, &v, sizeof(v));
 }
 
-#endif
-
-#endif  // defined(SNAPPY_IS_BIG_ENDIAN)
-
 // Convert to little-endian storage, opposite of network format.
 // Convert x from host to little endian: x = LittleEndian.FromHost(x);
 // convert x from little endian to host: x = LittleEndian.ToHost(x);
@@ -321,44 +169,77 @@ inline uint64 bswap_64(uint64 x) {
 //    x = LittleEndian.Load16(p);
 class LittleEndian {
  public:
-  // Conversion functions.
-#if defined(SNAPPY_IS_BIG_ENDIAN)
-
-  static uint16 FromHost16(uint16 x) { return bswap_16(x); }
-  static uint16 ToHost16(uint16 x) { return bswap_16(x); }
-
-  static uint32 FromHost32(uint32 x) { return bswap_32(x); }
-  static uint32 ToHost32(uint32 x) { return bswap_32(x); }
-
-  static bool IsLittleEndian() { return false; }
+  // Functions to do unaligned loads and stores in little-endian order.
+  static inline uint16_t Load16(const void *ptr) {
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
 
-#else  // !defined(SNAPPY_IS_BIG_ENDIAN)
+    // Compiles to a single mov/str on recent clang and gcc.
+    return (static_cast<uint16_t>(buffer[0])) |
+            (static_cast<uint16_t>(buffer[1]) << 8);
+  }
 
-  static uint16 FromHost16(uint16 x) { return x; }
-  static uint16 ToHost16(uint16 x) { return x; }
+  static inline uint32_t Load32(const void *ptr) {
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
 
-  static uint32 FromHost32(uint32 x) { return x; }
-  static uint32 ToHost32(uint32 x) { return x; }
+    // Compiles to a single mov/str on recent clang and gcc.
+    return (static_cast<uint32_t>(buffer[0])) |
+            (static_cast<uint32_t>(buffer[1]) << 8) |
+            (static_cast<uint32_t>(buffer[2]) << 16) |
+            (static_cast<uint32_t>(buffer[3]) << 24);
+  }
 
-  static bool IsLittleEndian() { return true; }
+  static inline uint64_t Load64(const void *ptr) {
+    const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
+
+    // Compiles to a single mov/str on recent clang and gcc.
+    return (static_cast<uint64_t>(buffer[0])) |
+            (static_cast<uint64_t>(buffer[1]) << 8) |
+            (static_cast<uint64_t>(buffer[2]) << 16) |
+            (static_cast<uint64_t>(buffer[3]) << 24) |
+            (static_cast<uint64_t>(buffer[4]) << 32) |
+            (static_cast<uint64_t>(buffer[5]) << 40) |
+            (static_cast<uint64_t>(buffer[6]) << 48) |
+            (static_cast<uint64_t>(buffer[7]) << 56);
+  }
 
-#endif  // !defined(SNAPPY_IS_BIG_ENDIAN)
+  static inline void Store16(void *dst, uint16_t value) {
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
 
-  // Functions to do unaligned loads and stores in little-endian order.
-  static uint16 Load16(const void *p) {
-    return ToHost16(UNALIGNED_LOAD16(p));
+    // Compiles to a single mov/str on recent clang and gcc.
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
   }
 
-  static void Store16(void *p, uint16 v) {
-    UNALIGNED_STORE16(p, FromHost16(v));
+  static void Store32(void *dst, uint32_t value) {
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+
+    // Compiles to a single mov/str on recent clang and gcc.
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+    buffer[2] = static_cast<uint8_t>(value >> 16);
+    buffer[3] = static_cast<uint8_t>(value >> 24);
   }
 
-  static uint32 Load32(const void *p) {
-    return ToHost32(UNALIGNED_LOAD32(p));
+  static void Store64(void* dst, uint64_t value) {
+    uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
+
+    // Compiles to a single mov/str on recent clang and gcc.
+    buffer[0] = static_cast<uint8_t>(value);
+    buffer[1] = static_cast<uint8_t>(value >> 8);
+    buffer[2] = static_cast<uint8_t>(value >> 16);
+    buffer[3] = static_cast<uint8_t>(value >> 24);
+    buffer[4] = static_cast<uint8_t>(value >> 32);
+    buffer[5] = static_cast<uint8_t>(value >> 40);
+    buffer[6] = static_cast<uint8_t>(value >> 48);
+    buffer[7] = static_cast<uint8_t>(value >> 56);
   }
 
-  static void Store32(void *p, uint32 v) {
-    UNALIGNED_STORE32(p, FromHost32(v));
+  static inline constexpr bool IsLittleEndian() {
+#if defined(SNAPPY_IS_BIG_ENDIAN)
+    return false;
+#else
+    return true;
+#endif  // defined(SNAPPY_IS_BIG_ENDIAN)
   }
 };
 
@@ -366,19 +247,17 @@ class LittleEndian {
 class Bits {
  public:
   // Return floor(log2(n)) for positive integer n.
-  static int Log2FloorNonZero(uint32 n);
+  static int Log2FloorNonZero(uint32_t n);
 
   // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
-  static int Log2Floor(uint32 n);
+  static int Log2Floor(uint32_t n);
 
   // Return the first set least / most significant bit, 0-indexed.  Returns an
   // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
   // that it's 0-indexed.
-  static int FindLSBSetNonZero(uint32 n);
+  static int FindLSBSetNonZero(uint32_t n);
 
-#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-  static int FindLSBSetNonZero64(uint64 n);
-#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
+  static int FindLSBSetNonZero64(uint64_t n);
 
  private:
   // No copying
@@ -386,9 +265,9 @@ class Bits {
   void operator=(const Bits&);
 };
 
-#ifdef HAVE_BUILTIN_CTZ
+#if defined(HAVE_BUILTIN_CTZ)
 
-inline int Bits::Log2FloorNonZero(uint32 n) {
+inline int Bits::Log2FloorNonZero(uint32_t n) {
   assert(n != 0);
   // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof
   // represents subtraction in base 2 and observes that there's no carry.
@@ -399,66 +278,52 @@ inline int Bits::Log2FloorNonZero(uint32 n) {
   return 31 ^ __builtin_clz(n);
 }
 
-inline int Bits::Log2Floor(uint32 n) {
+inline int Bits::Log2Floor(uint32_t n) {
   return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
 }
 
-inline int Bits::FindLSBSetNonZero(uint32 n) {
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
   assert(n != 0);
   return __builtin_ctz(n);
 }
 
-#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
-  assert(n != 0);
-  return __builtin_ctzll(n);
-}
-#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-
 #elif defined(_MSC_VER)
 
-inline int Bits::Log2FloorNonZero(uint32 n) {
+inline int Bits::Log2FloorNonZero(uint32_t n) {
   assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
   unsigned long where;
   _BitScanReverse(&where, n);
   return static_cast<int>(where);
 }
 
-inline int Bits::Log2Floor(uint32 n) {
+inline int Bits::Log2Floor(uint32_t n) {
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
   unsigned long where;
   if (_BitScanReverse(&where, n))
     return static_cast<int>(where);
   return -1;
 }
 
-inline int Bits::FindLSBSetNonZero(uint32 n) {
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
   assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
   unsigned long where;
   if (_BitScanForward(&where, n))
     return static_cast<int>(where);
   return 32;
 }
 
-#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
-  assert(n != 0);
-  unsigned long where;
-  if (_BitScanForward64(&where, n))
-    return static_cast<int>(where);
-  return 64;
-}
-#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
-
 #else  // Portable versions.
 
-inline int Bits::Log2FloorNonZero(uint32 n) {
+inline int Bits::Log2FloorNonZero(uint32_t n) {
   assert(n != 0);
 
   int log = 0;
-  uint32 value = n;
+  uint32_t value = n;
   for (int i = 4; i >= 0; --i) {
     int shift = (1 << i);
-    uint32 x = value >> shift;
+    uint32_t x = value >> shift;
     if (x != 0) {
       value = x;
       log += shift;
@@ -468,16 +333,16 @@ inline int Bits::Log2FloorNonZero(uint32 n) {
   return log;
 }
 
-inline int Bits::Log2Floor(uint32 n) {
+inline int Bits::Log2Floor(uint32_t n) {
   return (n == 0) ? -1 : Bits::Log2FloorNonZero(n);
 }
 
-inline int Bits::FindLSBSetNonZero(uint32 n) {
+inline int Bits::FindLSBSetNonZero(uint32_t n) {
   assert(n != 0);
 
   int rc = 31;
   for (int i = 4, shift = 1 << 4; i >= 0; --i) {
-    const uint32 x = n << shift;
+    const uint32_t x = n << shift;
     if (x != 0) {
       n = x;
       rc -= shift;
@@ -487,27 +352,48 @@ inline int Bits::FindLSBSetNonZero(uint32 n) {
   return rc;
 }
 
-#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
+#endif  // End portable versions.
+
+#if defined(HAVE_BUILTIN_CTZ)
+
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+  return __builtin_ctzll(n);
+}
+
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
+// _BitScanForward64() is only available on x64 and ARM64.
+
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
+  assert(n != 0);
+  // NOLINTNEXTLINE(runtime/int): The MSVC intrinsic demands unsigned long.
+  unsigned long where;
+  if (_BitScanForward64(&where, n))
+    return static_cast<int>(where);
+  return 64;
+}
+
+#else  // Portable version.
+
 // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
-inline int Bits::FindLSBSetNonZero64(uint64 n) {
+inline int Bits::FindLSBSetNonZero64(uint64_t n) {
   assert(n != 0);
 
-  const uint32 bottombits = static_cast<uint32>(n);
+  const uint32_t bottombits = static_cast<uint32_t>(n);
   if (bottombits == 0) {
-    // Bottom bits are zero, so scan in top bits
-    return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
+    // Bottom bits are zero, so scan the top bits.
+    return 32 + FindLSBSetNonZero(static_cast<uint32_t>(n >> 32));
   } else {
     return FindLSBSetNonZero(bottombits);
   }
 }
-#endif  // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)
 
-#endif  // End portable versions.
+#endif  // End portable version.
 
 // Variable-length integer encoding.
 class Varint {
  public:
-  // Maximum lengths of varint encoding of uint32.
+  // Maximum lengths of varint encoding of uint32_t.
   static const int kMax32 = 5;
 
   // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
@@ -516,23 +402,23 @@ class Varint {
   // past the last byte of the varint32. Else returns NULL.  On success,
   // "result <= limit".
   static const char* Parse32WithLimit(const char* ptr, const char* limit,
-                                      uint32* OUTPUT);
+                                      uint32_t* OUTPUT);
 
   // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
   // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
   //            byte just past the last encoded byte.
-  static char* Encode32(char* ptr, uint32 v);
+  static char* Encode32(char* ptr, uint32_t v);
 
   // EFFECTS    Appends the varint representation of "value" to "*s".
-  static void Append32(std::string* s, uint32 value);
+  static void Append32(std::string* s, uint32_t value);
 };
 
 inline const char* Varint::Parse32WithLimit(const char* p,
                                             const char* l,
-                                            uint32* OUTPUT) {
+                                            uint32_t* OUTPUT) {
   const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
   const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
-  uint32 b, result;
+  uint32_t b, result;
   if (ptr >= limit) return NULL;
   b = *(ptr++); result = b & 127;          if (b < 128) goto done;
   if (ptr >= limit) return NULL;
@@ -549,30 +435,30 @@ inline const char* Varint::Parse32WithLimit(const char* p,
   return reinterpret_cast<const char*>(ptr);
 }
 
-inline char* Varint::Encode32(char* sptr, uint32 v) {
+inline char* Varint::Encode32(char* sptr, uint32_t v) {
   // Operate on characters as unsigneds
-  unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
-  static const int B = 128;
-  if (v < (1<<7)) {
-    *(ptr++) = v;
-  } else if (v < (1<<14)) {
-    *(ptr++) = v | B;
-    *(ptr++) = v>>7;
-  } else if (v < (1<<21)) {
-    *(ptr++) = v | B;
-    *(ptr++) = (v>>7) | B;
-    *(ptr++) = v>>14;
-  } else if (v < (1<<28)) {
-    *(ptr++) = v | B;
-    *(ptr++) = (v>>7) | B;
-    *(ptr++) = (v>>14) | B;
-    *(ptr++) = v>>21;
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(sptr);
+  static const uint8_t B = 128;
+  if (v < (1 << 7)) {
+    *(ptr++) = static_cast<uint8_t>(v);
+  } else if (v < (1 << 14)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 7);
+  } else if (v < (1 << 21)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 14);
+  } else if (v < (1 << 28)) {
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 7) | B);
+    *(ptr++) = static_cast<uint8_t>((v >> 14) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 21);
   } else {
-    *(ptr++) = v | B;
-    *(ptr++) = (v>>7) | B;
-    *(ptr++) = (v>>14) | B;
-    *(ptr++) = (v>>21) | B;
-    *(ptr++) = v>>28;
+    *(ptr++) = static_cast<uint8_t>(v | B);
+    *(ptr++) = static_cast<uint8_t>((v>>7) | B);
+    *(ptr++) = static_cast<uint8_t>((v>>14) | B);
+    *(ptr++) = static_cast<uint8_t>((v>>21) | B);
+    *(ptr++) = static_cast<uint8_t>(v >> 28);
   }
   return reinterpret_cast<char*>(ptr);
 }

Some files were not shown because too many files changed in this diff