Browse Source

Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 2 of 2.

mowgli 3 years ago
parent
commit
56c39b3cf9

+ 12 - 12
library/cpp/accurate_accumulate/accurate_accumulate.h

@@ -11,7 +11,7 @@ public:
     using TValueType = TAccumulateType;
 
     template <typename TFloatType>
-    explicit TKahanAccumulator(const TFloatType x) 
+    explicit TKahanAccumulator(const TFloatType x)
         : Sum_(x)
         , Compensation_()
     {
@@ -30,12 +30,12 @@ public:
         return *this;
     }
 
-    TValueType Get() const { 
+    TValueType Get() const {
         return Sum_ + Compensation_;
     }
 
     template <typename TFloatType>
-    inline operator TFloatType() const { 
+    inline operator TFloatType() const {
         return Get();
     }
 
@@ -91,31 +91,31 @@ private:
 };
 
 template <typename TAccumulateType, typename TFloatType>
-inline const TKahanAccumulator<TAccumulateType> 
+inline const TKahanAccumulator<TAccumulateType>
 operator+(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
     return lhs += rhs;
 }
 
 template <typename TAccumulateType, typename TFloatType>
-inline const TKahanAccumulator<TAccumulateType> 
+inline const TKahanAccumulator<TAccumulateType>
 operator-(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
     return lhs -= rhs;
 }
 
 template <typename TAccumulateType, typename TFloatType>
-inline const TKahanAccumulator<TAccumulateType> 
+inline const TKahanAccumulator<TAccumulateType>
 operator*(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
     return lhs *= rhs;
 }
 
 template <typename TAccumulateType, typename TFloatType>
-inline const TKahanAccumulator<TAccumulateType> 
+inline const TKahanAccumulator<TAccumulateType>
 operator/(TKahanAccumulator<TAccumulateType> lhs, const TFloatType rhs) {
     return lhs /= rhs;
 }
 
 template <typename TAccumulatorType, typename It>
-static inline TAccumulatorType TypedFastAccumulate(It begin, It end) { 
+static inline TAccumulatorType TypedFastAccumulate(It begin, It end) {
     TAccumulatorType accumulator = TAccumulatorType();
 
     for (; begin + 15 < end; begin += 16) {
@@ -179,7 +179,7 @@ static inline TAccumulatorType TypedFastInnerProduct(It1 begin1, It1 end1, It2 b
 }
 
 template <typename It>
-static inline double FastAccumulate(It begin, It end) { 
+static inline double FastAccumulate(It begin, It end) {
     return TypedFastAccumulate<double>(begin, end);
 }
 
@@ -189,7 +189,7 @@ static inline double FastAccumulate(const TVector<T>& sequence) {
 }
 
 template <typename It>
-static inline double FastKahanAccumulate(It begin, It end) { 
+static inline double FastKahanAccumulate(It begin, It end) {
     return TypedFastAccumulate<TKahanAccumulator<double>>(begin, end);
 }
 
@@ -199,7 +199,7 @@ static inline double FastKahanAccumulate(const TVector<T>& sequence) {
 }
 
 template <typename It1, typename It2>
-static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) { 
+static inline double FastInnerProduct(It1 begin1, It1 end1, It2 begin2) {
     return TypedFastInnerProduct<double>(begin1, end1, begin2);
 }
 
@@ -210,7 +210,7 @@ static inline double FastInnerProduct(const TVector<T>& lhs, const TVector<T>& r
 }
 
 template <typename It1, typename It2>
-static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) { 
+static inline double FastKahanInnerProduct(It1 begin1, It1 end1, It2 begin2) {
     return TypedFastInnerProduct<TKahanAccumulator<double>>(begin1, end1, begin2);
 }
 

+ 1 - 1
library/cpp/accurate_accumulate/ya.make

@@ -1,6 +1,6 @@
 LIBRARY()
 
-OWNER(alex-sh) 
+OWNER(alex-sh)
 
 SRCS(
     accurate_accumulate.h

+ 2 - 2
library/cpp/charset/codepage.h

@@ -199,7 +199,7 @@ struct Encoder {
             return 0;
         return (unsigned char)Table[(ch >> 8) & 255][ch & 255];
     }
- 
+
     inline char Tr(wchar32 ch) const {
         char code = Code(ch);
         if (code == 0 && ch != 0)
@@ -211,7 +211,7 @@ struct Encoder {
     inline unsigned char operator[](wchar32 ch) const {
         return Tr(ch);
     }
- 
+
     void Tr(const wchar32* in, char* out, size_t len) const;
     void Tr(const wchar32* in, char* out) const;
     char* DefaultPlane;

+ 67 - 67
library/cpp/charset/codepage_ut.cpp

@@ -53,8 +53,8 @@ public:
     void TestToLower();
     void TestToUpper();
 
-    void TestCanEncode(); 
- 
+    void TestCanEncode();
+
     inline void TestUpperLower() {
         const CodePage* cp = CodePageByCharset(CODES_ASCII);
         char tmp[100];
@@ -343,82 +343,82 @@ void TCodepageTest::TestToUpper() {
     ToUpper(data, n - 1);
     UNIT_ASSERT(strcmp(data, yandexUpperCase) == 0);
 }
- 
-static void TestCanEncodeEmpty() { 
-    TWtringBuf empty; 
-    UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN)); 
-    UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX)); 
-    UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8)); 
-} 
- 
-static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) { 
-    // char by char 
-    for (size_t i = 0; i < text.size(); ++i) { 
-        if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult) 
-            ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding) 
+
+static void TestCanEncodeEmpty() {
+    TWtringBuf empty;
+    UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN));
+    UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX));
+    UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8));
+}
+
+static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) {
+    // char by char
+    for (size_t i = 0; i < text.size(); ++i) {
+        if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult)
+            ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding)
                                 << " on '" << text.SubStr(i, 1) << "' (expected " << expectedResult << ")";
-    } 
-    // whole text 
-    UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult); 
-} 
- 
-void TCodepageTest::TestCanEncode() { 
-    TestCanEncodeEmpty(); 
- 
+    }
+    // whole text
+    UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult);
+}
+
+void TCodepageTest::TestCanEncode() {
+    TestCanEncodeEmpty();
+
     const TUtf16String lat = u"AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
-    TestCanEncodeEach(lat, CODES_WIN, true); 
-    TestCanEncodeEach(lat, CODES_YANDEX, true); 
-    TestCanEncodeEach(lat, CODES_UTF8, true); 
- 
+    TestCanEncodeEach(lat, CODES_WIN, true);
+    TestCanEncodeEach(lat, CODES_YANDEX, true);
+    TestCanEncodeEach(lat, CODES_UTF8, true);
+
     const TUtf16String rus = u"АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя";
-    TestCanEncodeEach(rus, CODES_WIN, true); 
-    TestCanEncodeEach(rus, CODES_YANDEX, true); 
-    TestCanEncodeEach(rus, CODES_UTF8, true); 
- 
+    TestCanEncodeEach(rus, CODES_WIN, true);
+    TestCanEncodeEach(rus, CODES_YANDEX, true);
+    TestCanEncodeEach(rus, CODES_UTF8, true);
+
     const TUtf16String ukr = u"ҐґЄєІіЇї";
-    TestCanEncodeEach(ukr, CODES_WIN, true); 
-    TestCanEncodeEach(ukr, CODES_YANDEX, true); 
-    TestCanEncodeEach(ukr, CODES_UTF8, true); 
- 
+    TestCanEncodeEach(ukr, CODES_WIN, true);
+    TestCanEncodeEach(ukr, CODES_YANDEX, true);
+    TestCanEncodeEach(ukr, CODES_UTF8, true);
+
     const TUtf16String pol = u"ĄĆĘŁŃÓŚŹŻąćęłńóśźż";
-    TestCanEncodeEach(pol, CODES_WIN, false); 
-    TestCanEncodeEach(pol, CODES_YANDEX, true); 
-    TestCanEncodeEach(pol, CODES_UTF_16BE, true); 
- 
+    TestCanEncodeEach(pol, CODES_WIN, false);
+    TestCanEncodeEach(pol, CODES_YANDEX, true);
+    TestCanEncodeEach(pol, CODES_UTF_16BE, true);
+
     const TUtf16String ger = u"ÄäÖöÜüß";
-    TestCanEncodeEach(ger, CODES_WIN, false); 
-    TestCanEncodeEach(ger, CODES_YANDEX, true); 
-    TestCanEncodeEach(ger, CODES_UTF_16LE, true); 
- 
+    TestCanEncodeEach(ger, CODES_WIN, false);
+    TestCanEncodeEach(ger, CODES_YANDEX, true);
+    TestCanEncodeEach(ger, CODES_UTF_16LE, true);
+
     const TUtf16String fra1 = u"éàèùâêîôûëïç"; // supported in yandex cp
     const TUtf16String fra2 = u"ÉÀÈÙÂÊÎÔÛËÏŸÿÇ";
     const TUtf16String fra3 = u"Æ挜";
-    TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false); 
-    TestCanEncodeEach(fra1, CODES_YANDEX, true); 
-    TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false); 
-    TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true); 
- 
+    TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false);
+    TestCanEncodeEach(fra1, CODES_YANDEX, true);
+    TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false);
+    TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true);
+
     const TUtf16String kaz = u"ӘәҒғҚқҢңӨөҰұҮүҺһ";
-    TestCanEncodeEach(kaz, CODES_WIN, false); 
-    TestCanEncodeEach(kaz, CODES_YANDEX, false); 
-    TestCanEncodeEach(kaz, CODES_UTF8, true); 
-    TestCanEncodeEach(kaz, CODES_KAZWIN, true); 
- 
+    TestCanEncodeEach(kaz, CODES_WIN, false);
+    TestCanEncodeEach(kaz, CODES_YANDEX, false);
+    TestCanEncodeEach(kaz, CODES_UTF8, true);
+    TestCanEncodeEach(kaz, CODES_KAZWIN, true);
+
     const TUtf16String tur1 = u"ĞİŞğş";
     const TUtf16String tur = tur1 + u"ı";
-    TestCanEncodeEach(tur, CODES_WIN, false); 
-    TestCanEncodeEach(tur, CODES_YANDEX, false); 
-    TestCanEncodeEach(tur, CODES_UTF8, true); 
- 
+    TestCanEncodeEach(tur, CODES_WIN, false);
+    TestCanEncodeEach(tur, CODES_YANDEX, false);
+    TestCanEncodeEach(tur, CODES_UTF8, true);
+
     const TUtf16String chi = u"新隶体新隸體";
-    TestCanEncodeEach(chi, CODES_WIN, false); 
-    TestCanEncodeEach(chi, CODES_YANDEX, false); 
-    TestCanEncodeEach(chi, CODES_UTF8, true); 
-    TestCanEncodeEach(chi, CODES_UTF_16LE, true); 
- 
+    TestCanEncodeEach(chi, CODES_WIN, false);
+    TestCanEncodeEach(chi, CODES_YANDEX, false);
+    TestCanEncodeEach(chi, CODES_UTF8, true);
+    TestCanEncodeEach(chi, CODES_UTF_16LE, true);
+
     const TUtf16String jap = u"漢字仮字交じり文";
-    TestCanEncodeEach(jap, CODES_WIN, false); 
-    TestCanEncodeEach(jap, CODES_YANDEX, false); 
-    TestCanEncodeEach(jap, CODES_UTF8, true); 
-    TestCanEncodeEach(jap, CODES_UTF_16BE, true); 
-} 
+    TestCanEncodeEach(jap, CODES_WIN, false);
+    TestCanEncodeEach(jap, CODES_YANDEX, false);
+    TestCanEncodeEach(jap, CODES_UTF8, true);
+    TestCanEncodeEach(jap, CODES_UTF_16BE, true);
+}

+ 16 - 16
library/cpp/charset/wide.cpp

@@ -1,18 +1,18 @@
 #include "wide.h"
 
-bool CanBeEncoded(TWtringBuf text, ECharset encoding) { 
-    const size_t LEN = 16; 
-    const size_t BUFSIZE = LEN * 4; 
-    char encodeBuf[BUFSIZE]; 
-    wchar16 decodeBuf[BUFSIZE]; 
- 
-    while (!text.empty()) { 
-        TWtringBuf src = text.NextTokAt(LEN); 
-        TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding); 
-        TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding); 
-        if (decoded != src) 
-            return false; 
-    } 
- 
-    return true; 
-} 
+bool CanBeEncoded(TWtringBuf text, ECharset encoding) {
+    const size_t LEN = 16;
+    const size_t BUFSIZE = LEN * 4;
+    char encodeBuf[BUFSIZE];
+    wchar16 decodeBuf[BUFSIZE];
+
+    while (!text.empty()) {
+        TWtringBuf src = text.NextTokAt(LEN);
+        TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding);
+        TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding);
+        if (decoded != src)
+            return false;
+    }
+
+    return true;
+}

+ 93 - 93
library/cpp/charset/wide.h

@@ -47,61 +47,61 @@ inline void CharToWide(const char* text, size_t len, TCharType* dest, const Code
     }
 }
 
-namespace NDetail { 
-    namespace NBaseOps { 
-        // Template interface base recoding drivers, do not perform any memory management, 
-        // do not care about buffer size, so supplied @dst 
-        // should have enough room for the result (with proper reserve for the worst case) 
- 
-        // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string. 
- 
+namespace NDetail {
+    namespace NBaseOps {
+        // Template interface base recoding drivers, do not perform any memory management,
+        // do not care about buffer size, so supplied @dst
+        // should have enough room for the result (with proper reserve for the worst case)
+
+        // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string.
+
         template <typename TCharType>
         inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) {
             Y_ASSERT(cp.SingleByteCodepage());
             ::CharToWide(src.data(), src.size(), dst, cp);
             return TBasicStringBuf<TCharType>(dst, src.size());
-        } 
- 
+        }
+
         template <typename TCharType>
         inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) {
             Y_ASSERT(cp.SingleByteCodepage());
             ::WideToChar(src.data(), src.size(), dst, cp.CPEnum);
             return TStringBuf(dst, src.size());
-        } 
- 
+        }
+
         template <typename TCharType>
         inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) {
             Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
-            size_t read = 0; 
-            size_t written = 0; 
+            size_t read = 0;
+            size_t written = 0;
             ::NICONVPrivate::RecodeToUnicode(encoding, src.data(), dst, src.size(), src.size(), read, written);
             return TBasicStringBuf<TCharType>(dst, written);
-        } 
- 
+        }
+
         template <typename TCharType>
         inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) {
             Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
-            size_t read = 0; 
-            size_t written = 0; 
+            size_t read = 0;
+            size_t written = 0;
             ::NICONVPrivate::RecodeFromUnicode(encoding, src.data(), dst, src.size(), src.size() * 3, read, written);
-            return TStringBuf(dst, written); 
-        } 
- 
+            return TStringBuf(dst, written);
+        }
+
         template <typename TCharType>
         inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) {
-            size_t len = 0; 
+            size_t len = 0;
             if (!::UTF8ToWide(src.data(), src.size(), dst, len))
-                ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\""); 
+                ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\"");
             return TBasicStringBuf<TCharType>(dst, len);
-        } 
- 
+        }
+
         template <typename TCharType>
         inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) {
-            size_t len = 0; 
+            size_t len = 0;
             ::WideToUTF8(src.data(), src.size(), dst, len);
-            return TStringBuf(dst, len); 
-        } 
- 
+            return TStringBuf(dst, len);
+        }
+
         // Select one of re-coding methods from above, based on provided @encoding
 
         template <typename TCharFrom, typename TCharTo>
@@ -115,73 +115,73 @@ namespace NDetail {
         }
 
     }
- 
-    template <typename TCharFrom> 
-    struct TRecodeTraits; 
- 
-    template <> 
-    struct TRecodeTraits<char> { 
+
+    template <typename TCharFrom>
+    struct TRecodeTraits;
+
+    template <>
+    struct TRecodeTraits<char> {
         using TCharTo = wchar16;
         using TStringBufTo = TWtringBuf;
         using TStringTo = TUtf16String;
         enum { ReserveSize = 4 }; // How many TCharFrom characters we should reserve for one TCharTo character in worst case
                                   // Here an unicode character can be converted up to 4 bytes of UTF8
-    }; 
- 
-    template <> 
-    struct TRecodeTraits<wchar16> { 
+    };
+
+    template <>
+    struct TRecodeTraits<wchar16> {
         using TCharTo = char;
         using TStringBufTo = TStringBuf;
         using TStringTo = TString;
         enum { ReserveSize = 2 }; // possible surrogate pairs ?
-    }; 
- 
-    // Operations with destination buffer where recoded string will be written 
-    template <typename TResult> 
-    struct TRecodeResultOps { 
+    };
+
+    // Operations with destination buffer where recoded string will be written
+    template <typename TResult>
+    struct TRecodeResultOps {
         // default implementation will work with TString and TUtf16String - 99% of usage
         using TResultChar = typename TResult::char_type;
- 
-        static inline size_t Size(const TResult& dst) { 
-            return dst.size(); 
-        } 
- 
-        static inline TResultChar* Reserve(TResult& dst, size_t len) { 
-            dst.ReserveAndResize(len); 
-            return dst.begin(); 
-        } 
- 
-        static inline void Truncate(TResult& dst, size_t len) { 
-            dst.resize(len); 
-        } 
-    }; 
- 
-    // Main template interface for recoding in both directions 
- 
-    template <typename TCharFrom, typename TResult> 
+
+        static inline size_t Size(const TResult& dst) {
+            return dst.size();
+        }
+
+        static inline TResultChar* Reserve(TResult& dst, size_t len) {
+            dst.ReserveAndResize(len);
+            return dst.begin();
+        }
+
+        static inline void Truncate(TResult& dst, size_t len) {
+            dst.resize(len);
+        }
+    };
+
+    // Main template interface for recoding in both directions
+
+    template <typename TCharFrom, typename TResult>
     typename TRecodeTraits<TCharFrom>::TStringBufTo Recode(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) {
         using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo;
-        // make enough room for re-coded string 
-        TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize); 
-        // do re-coding 
+        // make enough room for re-coded string
+        TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize);
+        // do re-coding
         TBasicStringBuf<TCharTo> res = NBaseOps::Recode(src, dstbuf, encoding);
-        // truncate result back to proper size 
-        TRecodeResultOps<TResult>::Truncate(dst, res.size()); 
-        return res; 
-    } 
- 
-    // appending version of Recode() 
-    template <typename TCharFrom, typename TResult> 
+        // truncate result back to proper size
+        TRecodeResultOps<TResult>::Truncate(dst, res.size());
+        return res;
+    }
+
+    // appending version of Recode()
+    template <typename TCharFrom, typename TResult>
     typename TRecodeTraits<TCharFrom>::TStringBufTo RecodeAppend(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) {
         using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo;
-        size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst); 
-        TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize); 
+        size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst);
+        TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize);
         TBasicStringBuf<TCharTo> appended = NBaseOps::Recode(src, dstbuf + dstOrigSize, encoding);
-        size_t dstFinalSize = dstOrigSize + appended.size(); 
-        TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize); 
+        size_t dstFinalSize = dstOrigSize + appended.size();
+        TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize);
         return TBasicStringBuf<TCharTo>(dstbuf, dstFinalSize);
-    } 
- 
+    }
+
     // special implementation for robust utf8 functions
     template <typename TResult>
     TWtringBuf RecodeUTF8Robust(const TStringBuf src, TResult& dst) {
@@ -197,31 +197,31 @@ namespace NDetail {
         return TWtringBuf(dstbuf, written);
     }
 
-    template <typename TCharFrom> 
+    template <typename TCharFrom>
     inline typename TRecodeTraits<TCharFrom>::TStringTo Recode(const TBasicStringBuf<TCharFrom> src, ECharset encoding) {
-        typename TRecodeTraits<TCharFrom>::TStringTo res; 
-        Recode<TCharFrom>(src, res, encoding); 
-        return res; 
-    } 
+        typename TRecodeTraits<TCharFrom>::TStringTo res;
+        Recode<TCharFrom>(src, res, encoding);
+        return res;
+    }
 }
- 
-// Write result into @dst. Return string-buffer pointing to re-coded content of @dst. 
- 
+
+// Write result into @dst. Return string-buffer pointing to re-coded content of @dst.
+
 template <bool robust>
 inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) {
     if (robust && CODES_UTF8 == encoding)
         return ::NDetail::RecodeUTF8Robust(src, dst);
     return ::NDetail::Recode<char>(src, dst, encoding);
-} 
- 
+}
+
 inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) {
     return ::NDetail::Recode<char>(src, dst, encoding);
 }
 
 inline TStringBuf WideToChar(const TWtringBuf src, TString& dst, ECharset encoding) {
     return ::NDetail::Recode<wchar16>(src, dst, encoding);
-} 
- 
+}
+
 //! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type
 inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) {
     if (NCodepagePrivate::NativeCodepage(enc)) {
@@ -301,6 +301,6 @@ inline TUtf16String CharToWide(const TStringBuf s, const CodePage& cp) {
     return CharToWide(s.data(), s.size(), cp);
 }
 
-// true if @text can be fully encoded to specified @encoding, 
-// with possibility to recover exact original text after decoding 
-bool CanBeEncoded(TWtringBuf text, ECharset encoding); 
+// true if @text can be fully encoded to specified @encoding,
+// with possibility to recover exact original text after decoding
+bool CanBeEncoded(TWtringBuf text, ECharset encoding);

+ 69 - 69
library/cpp/charset/wide_ut.cpp

@@ -9,7 +9,7 @@
 #include <util/generic/hash_set.h>
 
 #include <algorithm>
- 
+
 namespace {
     //! three UTF8 encoded russian letters (A, B, V)
     const char yandexCyrillicAlphabet[] =
@@ -143,8 +143,8 @@ public:
     void TestCharToWide();
     void TestWideToChar();
     void TestYandexEncoding();
-    void TestRecodeIntoString(); 
-    void TestRecodeAppend(); 
+    void TestRecodeIntoString();
+    void TestRecodeAppend();
     void TestRecode();
     void TestUnicodeLimit();
 };
@@ -228,114 +228,114 @@ void TConversionTest::TestYandexEncoding() {
     }
 }
 
-void TConversionTest::TestRecodeIntoString() { 
+void TConversionTest::TestRecodeIntoString() {
     TString sYandex(UnicodeText.size() * 4, 'x');
     const char* sdata = sYandex.data();
-    TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX); 
+    TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX);
     UNIT_ASSERT(sYandex == YandexText); // same content
     UNIT_ASSERT(sYandex.data() == sdata);     // reserved buffer reused
     UNIT_ASSERT(sYandex.data() == sres.data());     // same buffer
     UNIT_ASSERT(sYandex.size() == sres.size());     // same size
     TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX);
- 
+
     TUtf16String sUnicode;
-    sUnicode.reserve(YandexText.size() * 4); 
+    sUnicode.reserve(YandexText.size() * 4);
     const wchar16* wdata = sUnicode.data();
-    TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX); 
+    TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX);
     UNIT_ASSERT(sUnicode == UnicodeText); // same content
     UNIT_ASSERT(sUnicode.data() == wdata);      // reserved buffer reused
     UNIT_ASSERT(sUnicode.data() == wres.data());      // same buffer
     UNIT_ASSERT(sUnicode.size() == wres.size());      // same size
- 
+
     TString sUtf8 = " ";
-    size_t scap = sUtf8.capacity(); 
-    sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8); 
+    size_t scap = sUtf8.capacity();
+    sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8);
     UNIT_ASSERT(sUtf8 == UTF8Text);       // same content
     UNIT_ASSERT(sUtf8.capacity() > scap); // increased buffer capacity (supplied was too small)
     UNIT_ASSERT(sUtf8.data() == sres.data());         // same buffer
     UNIT_ASSERT(sUtf8.size() == sres.size());         // same size
     TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8);
- 
-    sUnicode.clear(); 
+
+    sUnicode.clear();
     wdata = sUnicode.data();
     TUtf16String copy = sUnicode; // increase ref-counter
-    wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8); 
+    wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8);
     UNIT_ASSERT(sUnicode == UnicodeText); // same content
 #ifndef TSTRING_IS_STD_STRING
     UNIT_ASSERT(sUnicode.data() != wdata);      // re-allocated (shared buffer supplied)
     UNIT_ASSERT(sUnicode.data() == wres.data());      // same buffer
 #endif
     UNIT_ASSERT(sUnicode.size() == wres.size());      // same content
-} 
- 
+}
+
 static TString GenerateJunk(size_t seed) {
     TString res;
-    size_t hash = NumericHash(seed); 
-    size_t size = hash % 1024; 
-    res.reserve(size); 
-    for (size_t i = 0; i < size; ++i) 
-        res += static_cast<char>(NumericHash(hash + i) % 256); 
-    return res; 
-} 
- 
-void TConversionTest::TestRecodeAppend() { 
-    { 
+    size_t hash = NumericHash(seed);
+    size_t size = hash % 1024;
+    res.reserve(size);
+    for (size_t i = 0; i < size; ++i)
+        res += static_cast<char>(NumericHash(hash + i) % 256);
+    return res;
+}
+
+void TConversionTest::TestRecodeAppend() {
+    {
         TString s1, s2;
         NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX);
-        UNIT_ASSERT(s1.empty()); 
- 
-        NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN); 
-        s2 += WideToChar(UnicodeText, CODES_WIN); 
-        UNIT_ASSERT_EQUAL(s1, s2); 
- 
-        NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX); 
-        s2 += WideToChar(UnicodeText, CODES_YANDEX); 
-        UNIT_ASSERT_EQUAL(s1, s2); 
- 
+        UNIT_ASSERT(s1.empty());
+
+        NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN);
+        s2 += WideToChar(UnicodeText, CODES_WIN);
+        UNIT_ASSERT_EQUAL(s1, s2);
+
+        NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX);
+        s2 += WideToChar(UnicodeText, CODES_YANDEX);
+        UNIT_ASSERT_EQUAL(s1, s2);
+
         NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX);
-        UNIT_ASSERT_EQUAL(s1, s2); 
- 
-        NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8); 
+        UNIT_ASSERT_EQUAL(s1, s2);
+
+        NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8);
         s2 += WideToUTF8(UnicodeText);
-        UNIT_ASSERT_EQUAL(s1, s2); 
+        UNIT_ASSERT_EQUAL(s1, s2);
 
-        for (size_t i = 0; i < 100; ++i) { 
+        for (size_t i = 0; i < 100; ++i) {
             TUtf16String junk = CharToWide(GenerateJunk(i), CODES_YANDEX);
-            NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8); 
+            NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8);
             s2 += WideToUTF8(junk);
-            UNIT_ASSERT_EQUAL(s1, s2); 
-        } 
-    } 
- 
-    { 
+            UNIT_ASSERT_EQUAL(s1, s2);
+        }
+    }
+
+    {
         TUtf16String s1, s2;
         NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX);
-        UNIT_ASSERT(s1.empty()); 
- 
-        NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN); 
-        s2 += CharToWide(YandexText, CODES_WIN); 
-        UNIT_ASSERT_EQUAL(s1, s2); 
- 
-        NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX); 
-        s2 += CharToWide(YandexText, CODES_YANDEX); 
-        UNIT_ASSERT_EQUAL(s1, s2); 
- 
+        UNIT_ASSERT(s1.empty());
+
+        NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN);
+        s2 += CharToWide(YandexText, CODES_WIN);
+        UNIT_ASSERT_EQUAL(s1, s2);
+
+        NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX);
+        s2 += CharToWide(YandexText, CODES_YANDEX);
+        UNIT_ASSERT_EQUAL(s1, s2);
+
         NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX);
-        UNIT_ASSERT_EQUAL(s1, s2); 
+        UNIT_ASSERT_EQUAL(s1, s2);
 
-        NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8); 
+        NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8);
         s2 += UTF8ToWide(UTF8Text);
-        UNIT_ASSERT_EQUAL(s1, s2); 
- 
-        for (size_t i = 0; i < 100; ++i) { 
+        UNIT_ASSERT_EQUAL(s1, s2);
+
+        for (size_t i = 0; i < 100; ++i) {
             TString junk = GenerateJunk(i);
-            NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX); 
-            s2 += CharToWide(junk, CODES_YANDEX); 
-            UNIT_ASSERT_EQUAL(s1, s2); 
-        } 
-    } 
-} 
- 
+            NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX);
+            s2 += CharToWide(junk, CODES_YANDEX);
+            UNIT_ASSERT_EQUAL(s1, s2);
+        }
+    }
+}
+
 template <>
 void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) {
     out << int(val);

+ 84 - 84
library/cpp/containers/comptrie/comptrie_impl.h

@@ -26,10 +26,10 @@ namespace NCompactTrie {
         return (sizeof(T) - 1) * 8;
     }
 
-    static inline bool IsEpsilonLink(const char flags) { 
-        return !(flags & (MT_FINAL | MT_NEXT)); 
-    } 
- 
+    static inline bool IsEpsilonLink(const char flags) {
+        return !(flags & (MT_FINAL | MT_NEXT));
+    }
+
     static inline void TraverseEpsilon(const char*& datapos) {
         const char flags = *datapos;
         if (!IsEpsilonLink(flags)) {
@@ -41,14 +41,14 @@ namespace NCompactTrie {
         datapos += offset;
     }
 
-    static inline size_t LeftOffsetLen(const char flags) { 
-        return (flags >> MT_LEFTSHIFT) & MT_SIZEMASK; 
-    } 
- 
-    static inline size_t RightOffsetLen(const char flags) { 
-        return flags & MT_SIZEMASK; 
-    } 
- 
+    static inline size_t LeftOffsetLen(const char flags) {
+        return (flags >> MT_LEFTSHIFT) & MT_SIZEMASK;
+    }
+
+    static inline size_t RightOffsetLen(const char flags) {
+        return flags & MT_SIZEMASK;
+    }
+
     void ShowProgress(size_t n); // just print dots
 }
 
@@ -100,82 +100,82 @@ namespace NCompactTrie {
         os.Write(buf, len);
         return len;
     }
- 
-    // Unpack the offset to the next node. The encoding scheme can store offsets 
-    // up to 7 bytes; whether they fit into size_t is another issue. 
+
+    // Unpack the offset to the next node. The encoding scheme can store offsets
+    // up to 7 bytes; whether they fit into size_t is another issue.
     Y_FORCE_INLINE size_t UnpackOffset(const char* p, size_t len) {
-        size_t result = 0; 
- 
-        while (len--) 
-            result = ((result << 8) | (*(p++) & 0xFF)); 
- 
-        return result; 
-    } 
- 
-    // Auxiliary function: consumes one character from the input. Advances the data pointer 
-    // to the position immediately preceding the value for the link just traversed (if any); 
-    // returns flags associated with the link. If no arc with the required label is present, 
-    // zeroes the data pointer. 
+        size_t result = 0;
+
+        while (len--)
+            result = ((result << 8) | (*(p++) & 0xFF));
+
+        return result;
+    }
+
+    // Auxiliary function: consumes one character from the input. Advances the data pointer
+    // to the position immediately preceding the value for the link just traversed (if any);
+    // returns flags associated with the link. If no arc with the required label is present,
+    // zeroes the data pointer.
     Y_FORCE_INLINE char LeapByte(const char*& datapos, const char* dataend, char label) {
-        while (datapos < dataend) { 
-            size_t offsetlength, offset; 
-            const char* startpos = datapos; 
-            char flags = *(datapos++); 
- 
-            if (IsEpsilonLink(flags)) { 
-                // Epsilon link - jump to the specified offset without further checks. 
-                // These links are created during minimization: original uncompressed 
-                // tree does not need them. (If we find a way to package 3 offset lengths 
-                // into 1 byte, we could get rid of them; but it looks like they do no harm. 
+        while (datapos < dataend) {
+            size_t offsetlength, offset;
+            const char* startpos = datapos;
+            char flags = *(datapos++);
+
+            if (IsEpsilonLink(flags)) {
+                // Epsilon link - jump to the specified offset without further checks.
+                // These links are created during minimization: original uncompressed
+                // tree does not need them. (If we find a way to package 3 offset lengths
+                // into 1 byte, we could get rid of them; but it looks like they do no harm.
                 Y_ASSERT(datapos < dataend);
-                offsetlength = flags & MT_SIZEMASK; 
-                offset = UnpackOffset(datapos, offsetlength); 
-                if (!offset) 
-                    break; 
-                datapos = startpos + offset; 
- 
-                continue; 
-            } 
- 
-            char ch = *(datapos++); 
- 
-            // Left branch 
-            offsetlength = LeftOffsetLen(flags); 
-            if ((unsigned char)label < (unsigned char)ch) { 
-                offset = UnpackOffset(datapos, offsetlength); 
-                if (!offset) 
-                    break; 
- 
-                datapos = startpos + offset; 
- 
-                continue; 
-            } 
- 
-            datapos += offsetlength; 
- 
-            // Right branch 
-            offsetlength = RightOffsetLen(flags); 
-            if ((unsigned char)label > (unsigned char)ch) { 
-                offset = UnpackOffset(datapos, offsetlength); 
- 
-                if (!offset) 
-                    break; 
- 
-                datapos = startpos + offset; 
- 
-                continue; 
-            } 
- 
-            // Got a match; return position right before the contents for the label 
-            datapos += offsetlength; 
-            return flags; 
-        } 
- 
-        // if we got here, we're past the dataend - bail out ASAP 
+                offsetlength = flags & MT_SIZEMASK;
+                offset = UnpackOffset(datapos, offsetlength);
+                if (!offset)
+                    break;
+                datapos = startpos + offset;
+
+                continue;
+            }
+
+            char ch = *(datapos++);
+
+            // Left branch
+            offsetlength = LeftOffsetLen(flags);
+            if ((unsigned char)label < (unsigned char)ch) {
+                offset = UnpackOffset(datapos, offsetlength);
+                if (!offset)
+                    break;
+
+                datapos = startpos + offset;
+
+                continue;
+            }
+
+            datapos += offsetlength;
+
+            // Right branch
+            offsetlength = RightOffsetLen(flags);
+            if ((unsigned char)label > (unsigned char)ch) {
+                offset = UnpackOffset(datapos, offsetlength);
+
+                if (!offset)
+                    break;
+
+                datapos = startpos + offset;
+
+                continue;
+            }
+
+            // Got a match; return position right before the contents for the label
+            datapos += offsetlength;
+            return flags;
+        }
+
+        // if we got here, we're past the dataend - bail out ASAP
         datapos = nullptr;
-        return 0; 
-    } 
- 
+        return 0;
+    }
+
     // Auxiliary function: consumes one (multibyte) symbol from the input.
     // Advances the data pointer to the root of the subtrie beginning after the symbol,
     // zeroes it if this subtrie is empty.

+ 37 - 37
library/cpp/containers/comptrie/comptrie_trie.h

@@ -127,7 +127,7 @@ public:
         return FindLongestPrefix(key.data(), key.size(), prefixLen, value, hasNext);
     }
 
-    // Return trie, containing all tails for the given key 
+    // Return trie, containing all tails for the given key
     inline TCompactTrie<T, D, S> FindTails(const TSymbol* key, size_t keylen) const;
     TCompactTrie<T, D, S> FindTails(const TKeyBuf& key) const {
         return FindTails(key.data(), key.size());
@@ -137,10 +137,10 @@ public:
         return FindTails(key.data(), key.size(), res);
     }
 
-    // same as FindTails(&key, 1), a bit faster 
-    // return false, if no arc with @label exists 
+    // same as FindTails(&key, 1), a bit faster
+    // return false, if no arc with @label exists
     inline bool FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const;
- 
+
     class TConstIterator {
     private:
         typedef NCompactTrie::TOpaqueTrieIterator TOpaqueTrieIterator;
@@ -343,10 +343,10 @@ void TCompactTrie<T, D, S>::FindPhrases(const TSymbol* key, size_t keylen, TPhra
 template <class T, class D, class S>
 inline TCompactTrie<T, D, S> TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen) const {
     TCompactTrie<T, D, S> ret;
-    FindTails(key, keylen, ret); 
-    return ret; 
-} 
- 
+    FindTails(key, keylen, ret);
+    return ret;
+}
+
 template <class T, class D, class S>
 bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompactTrie<T, D, S>& res) const {
     using namespace NCompactTrie;
@@ -354,11 +354,11 @@ bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompac
     size_t len = DataHolder.Length();
 
     if (!key || !len)
-        return false; 
+        return false;
 
     if (!keylen) {
-        res = *this; 
-        return true; 
+        res = *this;
+        return true;
     }
 
     const char* datastart = DataHolder.AsCharPtr();
@@ -386,35 +386,35 @@ bool TCompactTrie<T, D, S>::FindTails(const TSymbol* key, size_t keylen, TCompac
         }
     }
 
-    return false; 
+    return false;
 }
 
 template <class T, class D, class S>
 inline bool TCompactTrie<T, D, S>::FindTails(TSymbol label, TCompactTrie<T, D, S>& res) const {
-    using namespace NCompactTrie; 
- 
+    using namespace NCompactTrie;
+
     const size_t len = DataHolder.Length();
-    if (!len) 
-        return false; 
- 
+    if (!len)
+        return false;
+
     const char* datastart = DataHolder.AsCharPtr();
-    const char* dataend = datastart + len; 
-    const char* datapos = datastart; 
+    const char* dataend = datastart + len;
+    const char* datapos = datastart;
     const char* value = nullptr;
 
     if (!NCompactTrie::Advance(datapos, dataend, value, label, Packer))
         return false;
- 
+
     if (datapos) {
         Y_ASSERT(datapos >= datastart);
         res = TCompactTrie<T, D, S>(TBlob::NoCopy(datapos, dataend - datapos), value);
     } else {
         res = TCompactTrie<T, D, S>(value);
-    } 
- 
+    }
+
     return true;
-} 
- 
+}
+
 template <class T, class D, class S>
 typename TCompactTrie<T, D, S>::TConstIterator TCompactTrie<T, D, S>::Begin() const {
     NCompactTrie::TOpaqueTrie self(DataHolder.AsCharPtr(), DataHolder.Length(), Skipper);
@@ -495,30 +495,30 @@ bool TCompactTrie<T, D, S>::LookupLongestPrefix(const TSymbol* key, size_t keyle
 
     const char* const dataend = datapos + len;
 
-    const T* keyend = key + keylen; 
+    const T* keyend = key + keylen;
     while (key != keyend) {
         T label = *(key++);
-        for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) { 
+        for (i64 i = (i64)ExtraBits<TSymbol>(); i >= 0; i -= 8) {
             const char flags = LeapByte(datapos, dataend, (char)(label >> i));
-            if (!datapos) { 
-                return found; // no such arc 
-            } 
+            if (!datapos) {
+                return found; // no such arc
+            }
 
             Y_ASSERT(datapos <= dataend);
-            if ((flags & MT_FINAL)) { 
+            if ((flags & MT_FINAL)) {
                 prefixLen = keylen - (keyend - key) - (i ? 1 : 0);
                 valuepos = datapos;
                 hasNext = flags & MT_NEXT;
                 found = true;
 
-                if (!i && key == keyend) { // last byte, and got a match 
-                    return found; 
-                } 
-                datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes 
-            } 
+                if (!i && key == keyend) { // last byte, and got a match
+                    return found;
+                }
+                datapos += Packer.SkipLeaf(datapos); // skip intermediate leaf nodes
+            }
 
-            if (!(flags & MT_NEXT)) { 
-                return found; // no further way 
+            if (!(flags & MT_NEXT)) {
+                return found; // no further way
             }
         }
     }

+ 45 - 45
library/cpp/containers/comptrie/comptrie_ut.cpp

@@ -21,7 +21,7 @@
 #include <util/string/cast.h>
 
 #include "comptrie.h"
-#include "set.h" 
+#include "set.h"
 #include "first_symbol_iterator.h"
 #include "search_iterator.h"
 #include "pattern_searcher.h"
@@ -74,7 +74,7 @@ private:
     UNIT_TEST(TestIterateEmptyKey);
 
     UNIT_TEST(TestTrieSet);
- 
+
     UNIT_TEST(TestTrieForVectorInt64);
     UNIT_TEST(TestTrieForListInt64);
     UNIT_TEST(TestTrieForSetInt64);
@@ -209,8 +209,8 @@ public:
     void TestClear();
 
     void TestIterateEmptyKey();
- 
-    void TestTrieSet(); 
+
+    void TestTrieSet();
 
     void TestTrieForVectorInt64();
     void TestTrieForListInt64();
@@ -1060,48 +1060,48 @@ void TCompactTrieTest::TestIterateEmptyKey() {
     UNIT_ASSERT(it.GetValue() == 1);
 }
 
-void TCompactTrieTest::TestTrieSet() { 
-    TBuffer buffer; 
-    { 
-        TCompactTrieSet<char>::TBuilder builder; 
-        UNIT_ASSERT(builder.Add("a", 0)); 
-        UNIT_ASSERT(builder.Add("ab", 1)); 
-        UNIT_ASSERT(builder.Add("abc", 1)); 
-        UNIT_ASSERT(builder.Add("abcd", 0)); 
-        UNIT_ASSERT(!builder.Add("abcd", 1)); 
- 
-        TBufferStream stream(buffer); 
-        builder.Save(stream); 
-    } 
- 
-    TCompactTrieSet<char> set(TBlob::FromBuffer(buffer)); 
-    UNIT_ASSERT(set.Has("a")); 
-    UNIT_ASSERT(set.Has("ab")); 
-    UNIT_ASSERT(set.Has("abc")); 
-    UNIT_ASSERT(set.Has("abcd")); 
-    UNIT_ASSERT(!set.Has("abcde")); 
-    UNIT_ASSERT(!set.Has("aa")); 
-    UNIT_ASSERT(!set.Has("b")); 
-    UNIT_ASSERT(!set.Has("")); 
- 
-    TCompactTrieSet<char> tails; 
-    UNIT_ASSERT(set.FindTails("a", tails)); 
-    UNIT_ASSERT(tails.Has("b")); 
-    UNIT_ASSERT(tails.Has("bcd")); 
-    UNIT_ASSERT(!tails.Has("ab")); 
-    UNIT_ASSERT(!set.Has("")); 
- 
-    TCompactTrieSet<char> empty; 
-    UNIT_ASSERT(set.FindTails("abcd", empty)); 
-    UNIT_ASSERT(!empty.Has("a")); 
-    UNIT_ASSERT(!empty.Has("b")); 
-    UNIT_ASSERT(!empty.Has("c")); 
-    UNIT_ASSERT(!empty.Has("d")); 
-    UNIT_ASSERT(!empty.Has("d")); 
- 
+void TCompactTrieTest::TestTrieSet() {
+    TBuffer buffer;
+    {
+        TCompactTrieSet<char>::TBuilder builder;
+        UNIT_ASSERT(builder.Add("a", 0));
+        UNIT_ASSERT(builder.Add("ab", 1));
+        UNIT_ASSERT(builder.Add("abc", 1));
+        UNIT_ASSERT(builder.Add("abcd", 0));
+        UNIT_ASSERT(!builder.Add("abcd", 1));
+
+        TBufferStream stream(buffer);
+        builder.Save(stream);
+    }
+
+    TCompactTrieSet<char> set(TBlob::FromBuffer(buffer));
+    UNIT_ASSERT(set.Has("a"));
+    UNIT_ASSERT(set.Has("ab"));
+    UNIT_ASSERT(set.Has("abc"));
+    UNIT_ASSERT(set.Has("abcd"));
+    UNIT_ASSERT(!set.Has("abcde"));
+    UNIT_ASSERT(!set.Has("aa"));
+    UNIT_ASSERT(!set.Has("b"));
+    UNIT_ASSERT(!set.Has(""));
+
+    TCompactTrieSet<char> tails;
+    UNIT_ASSERT(set.FindTails("a", tails));
+    UNIT_ASSERT(tails.Has("b"));
+    UNIT_ASSERT(tails.Has("bcd"));
+    UNIT_ASSERT(!tails.Has("ab"));
+    UNIT_ASSERT(!set.Has(""));
+
+    TCompactTrieSet<char> empty;
+    UNIT_ASSERT(set.FindTails("abcd", empty));
+    UNIT_ASSERT(!empty.Has("a"));
+    UNIT_ASSERT(!empty.Has("b"));
+    UNIT_ASSERT(!empty.Has("c"));
+    UNIT_ASSERT(!empty.Has("d"));
+    UNIT_ASSERT(!empty.Has("d"));
+
     UNIT_ASSERT(empty.Has("")); // contains only empty string
-} 
- 
+}
+
 // Tests for trie with vector (list, set) values
 
 TVector<TUtf16String> TCompactTrieTest::GetSampleKeys(size_t nKeys) const {

Some files were not shown because too many files changed in this diff