3 years ago · 1d2e8a8e99
--- a/build/scripts/merge_coverage_data.py
+++ b/build/scripts/merge_coverage_data.py
@@ -7,8 +7,8 @@ import uuid
 
				 
			
 
				 def main(args):
			
 
				     output_file, args = args[0], args[1:]
			
 
				-    # heretic@: Splits files on which could be merged( files ) and which should not be merged( expendables ) 
			
 
				-    # expendables will be in output_file in form {name}{ordinal number of archive in args[]}.{extension} 
			
 
				+    # heretic@: Splits files on which could be merged( files ) and which should not be merged( expendables )
			
 
				+    # expendables will be in output_file in form {name}{ordinal number of archive in args[]}.{extension}
			
 
				     try:
			
 
				         split_i = args.index('-no-merge')
			
 
				     except ValueError:
			
--- a/library/README.md
+++ b/library/README.md
@@ -36,11 +36,11 @@ library
 
				 12. All language specific aspects are defined by `<language>` committee: see `library/<language>/README.md`.
			
 
				 
			
 
				 13. The library **MUST** satisfy `<language>` style-guide.
			
 
				- 
			
 
				+
			
 
				 14. The existing library **SHOULD** be improved instead of creating a new one - if it is possible.
			
 
				 
			
 
				     Please do not create yet another library for the same thing: just improve existing one.
			
 
				- 
			
 
				+
			
 
				 Contacts
			
 
				 ===
			
 
				 If you have any language-specific questions, please contact `<language>` [committee](https://wiki.yandex-team.ru/devrules/#profilnyekomitety).
			
--- a/library/cpp/binsaver/mem_io.cpp
+++ b/library/cpp/binsaver/mem_io.cpp
@@ -1 +1 @@
 
				-#include "mem_io.h" 
			
 
				+#include "mem_io.h"
			
--- a/library/cpp/binsaver/mem_io.h
+++ b/library/cpp/binsaver/mem_io.h
@@ -1,212 +1,212 @@
 
				 #pragma once
			
 
				 
			
 
				-#include "bin_saver.h" 
			
 
				-
			
 
				-namespace NMemIoInternals { 
			
 
				-    class TMemoryStream: public IBinaryStream { 
			
 
				-        TVector<char>& Data; 
			
 
				-        ui64 Pos; 
			
 
				-
			
 
				-    public: 
			
 
				-        TMemoryStream(TVector<char>* data, ui64 pos = 0) 
			
 
				-            : Data(*data) 
			
 
				-            , Pos(pos) 
			
 
				-        { 
			
 
				-        } 
			
 
				-        ~TMemoryStream() override { 
			
 
				-        } // keep gcc happy 
			
 
				-
			
 
				-        bool IsValid() const override { 
			
 
				-            return true; 
			
 
				-        } 
			
 
				-        bool IsFailed() const override { 
			
 
				-            return false; 
			
 
				-        } 
			
 
				-
			
 
				-    private: 
			
 
				-        int WriteImpl(const void* userBuffer, int size) override { 
			
 
				-            if (size == 0) 
			
 
				-                return 0; 
			
 
				-            Y_ASSERT(size > 0); 
			
 
				-            if (Pos + size > Data.size()) 
			
 
				-                Data.yresize(Pos + size); 
			
 
				-            memcpy(&Data[Pos], userBuffer, size); 
			
 
				-            Pos += size; 
			
 
				-            return size; 
			
 
				-        } 
			
 
				-        int ReadImpl(void* userBuffer, int size) override { 
			
 
				-            if (size == 0) 
			
 
				-                return 0; 
			
 
				-            Y_ASSERT(size > 0); 
			
 
				-            int res = Min(Data.size() - Pos, (ui64)size); 
			
 
				-            if (res) 
			
 
				-                memcpy(userBuffer, &Data[Pos], res); 
			
 
				-            Pos += res; 
			
 
				-            return res; 
			
 
				-        } 
			
 
				-    }; 
			
 
				- 
			
 
				-    template <class T> 
			
 
				-    inline void SerializeMem(bool bRead, TVector<char>* data, T& c, bool stableOutput = false) { 
			
 
				-        if (IBinSaver::HasNonTrivialSerializer<T>(0u)) { 
			
 
				-            TMemoryStream f(data); 
			
 
				-            { 
			
 
				-                IBinSaver bs(f, bRead, stableOutput); 
			
 
				-                bs.Add(1, &c); 
			
 
				-            } 
			
 
				+#include "bin_saver.h"
			
 
				+
			
 
				+namespace NMemIoInternals {
			
 
				+    class TMemoryStream: public IBinaryStream {
			
 
				+        TVector<char>& Data;
			
 
				+        ui64 Pos;
			
 
				+
			
 
				+    public:
			
 
				+        TMemoryStream(TVector<char>* data, ui64 pos = 0)
			
 
				+            : Data(*data)
			
 
				+            , Pos(pos)
			
 
				+        {
			
 
				+        }
			
 
				+        ~TMemoryStream() override {
			
 
				+        } // keep gcc happy
			
 
				+
			
 
				+        bool IsValid() const override {
			
 
				+            return true;
			
 
				+        }
			
 
				+        bool IsFailed() const override {
			
 
				+            return false;
			
 
				+        }
			
 
				+
			
 
				+    private:
			
 
				+        int WriteImpl(const void* userBuffer, int size) override {
			
 
				+            if (size == 0)
			
 
				+                return 0;
			
 
				+            Y_ASSERT(size > 0);
			
 
				+            if (Pos + size > Data.size())
			
 
				+                Data.yresize(Pos + size);
			
 
				+            memcpy(&Data[Pos], userBuffer, size);
			
 
				+            Pos += size;
			
 
				+            return size;
			
 
				+        }
			
 
				+        int ReadImpl(void* userBuffer, int size) override {
			
 
				+            if (size == 0)
			
 
				+                return 0;
			
 
				+            Y_ASSERT(size > 0);
			
 
				+            int res = Min(Data.size() - Pos, (ui64)size);
			
 
				+            if (res)
			
 
				+                memcpy(userBuffer, &Data[Pos], res);
			
 
				+            Pos += res;
			
 
				+            return res;
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    template <class T>
			
 
				+    inline void SerializeMem(bool bRead, TVector<char>* data, T& c, bool stableOutput = false) {
			
 
				+        if (IBinSaver::HasNonTrivialSerializer<T>(0u)) {
			
 
				+            TMemoryStream f(data);
			
 
				+            {
			
 
				+                IBinSaver bs(f, bRead, stableOutput);
			
 
				+                bs.Add(1, &c);
			
 
				+            }
			
 
				         } else {
			
 
				-            if (bRead) { 
			
 
				-                Y_ASSERT(data->size() == sizeof(T)); 
			
 
				-                c = *reinterpret_cast<T*>(&(*data)[0]); 
			
 
				-            } else { 
			
 
				-                data->yresize(sizeof(T)); 
			
 
				-                *reinterpret_cast<T*>(&(*data)[0]) = c; 
			
 
				-            } 
			
 
				+            if (bRead) {
			
 
				+                Y_ASSERT(data->size() == sizeof(T));
			
 
				+                c = *reinterpret_cast<T*>(&(*data)[0]);
			
 
				+            } else {
			
 
				+                data->yresize(sizeof(T));
			
 
				+                *reinterpret_cast<T*>(&(*data)[0]) = c;
			
 
				+            }
			
 
				         }
			
 
				     }
			
 
				 
			
 
				-    //////////////////////////////////////////////////////////////////////////// 
			
 
				-    class THugeMemoryStream: public IBinaryStream { 
			
 
				-        TVector<TVector<char>>& Data; 
			
 
				-        i64 Block, Pos; 
			
 
				-        bool ShrinkOnRead; 
			
 
				-
			
 
				-        enum { 
			
 
				-            MAX_BLOCK_SIZE = 1024 * 1024 // Aligned with cache size 
			
 
				-        }; 
			
 
				-
			
 
				-    public: 
			
 
				-        THugeMemoryStream(TVector<TVector<char>>* data, bool shrinkOnRead = false) 
			
 
				-            : Data(*data) 
			
 
				-            , Block(0) 
			
 
				-            , Pos(0) 
			
 
				-            , ShrinkOnRead(shrinkOnRead) 
			
 
				-        { 
			
 
				-            Y_ASSERT(!data->empty()); 
			
 
				+    ////////////////////////////////////////////////////////////////////////////
			
 
				+    class THugeMemoryStream: public IBinaryStream {
			
 
				+        TVector<TVector<char>>& Data;
			
 
				+        i64 Block, Pos;
			
 
				+        bool ShrinkOnRead;
			
 
				+
			
 
				+        enum {
			
 
				+            MAX_BLOCK_SIZE = 1024 * 1024 // Aligned with cache size
			
 
				+        };
			
 
				+
			
 
				+    public:
			
 
				+        THugeMemoryStream(TVector<TVector<char>>* data, bool shrinkOnRead = false)
			
 
				+            : Data(*data)
			
 
				+            , Block(0)
			
 
				+            , Pos(0)
			
 
				+            , ShrinkOnRead(shrinkOnRead)
			
 
				+        {
			
 
				+            Y_ASSERT(!data->empty());
			
 
				+        }
			
 
				+
			
 
				+        ~THugeMemoryStream() override {
			
 
				+        } // keep gcc happy
			
 
				+
			
 
				+        bool IsValid() const override {
			
 
				+            return true;
			
 
				         }
			
 
				- 
			
 
				-        ~THugeMemoryStream() override { 
			
 
				-        } // keep gcc happy 
			
 
				- 
			
 
				-        bool IsValid() const override { 
			
 
				-            return true; 
			
 
				+        bool IsFailed() const override {
			
 
				+            return false;
			
 
				         }
			
 
				-        bool IsFailed() const override { 
			
 
				-            return false; 
			
 
				-        } 
			
 
				-
			
 
				-    private: 
			
 
				-        int WriteImpl(const void* userDataArg, int sizeArg) override { 
			
 
				-            if (sizeArg == 0) 
			
 
				-                return 0; 
			
 
				-            const char* userData = (const char*)userDataArg; 
			
 
				-            i64 size = sizeArg; 
			
 
				-            i64 newSize = Pos + size; 
			
 
				-            if (newSize > Data[Block].ysize()) { 
			
 
				-                while (newSize > MAX_BLOCK_SIZE) { 
			
 
				-                    int maxWrite = MAX_BLOCK_SIZE - Pos; 
			
 
				-                    Data[Block].yresize(MAX_BLOCK_SIZE); 
			
 
				-                    if (maxWrite) { 
			
 
				-                        memcpy(&Data[Block][Pos], userData, maxWrite); 
			
 
				-                        userData += maxWrite; 
			
 
				-                        size -= maxWrite; 
			
 
				-                    } 
			
 
				-                    ++Block; 
			
 
				-                    Pos = 0; 
			
 
				-                    Data.resize(Block + 1); 
			
 
				-                    newSize = Pos + size; 
			
 
				+
			
 
				+    private:
			
 
				+        int WriteImpl(const void* userDataArg, int sizeArg) override {
			
 
				+            if (sizeArg == 0)
			
 
				+                return 0;
			
 
				+            const char* userData = (const char*)userDataArg;
			
 
				+            i64 size = sizeArg;
			
 
				+            i64 newSize = Pos + size;
			
 
				+            if (newSize > Data[Block].ysize()) {
			
 
				+                while (newSize > MAX_BLOCK_SIZE) {
			
 
				+                    int maxWrite = MAX_BLOCK_SIZE - Pos;
			
 
				+                    Data[Block].yresize(MAX_BLOCK_SIZE);
			
 
				+                    if (maxWrite) {
			
 
				+                        memcpy(&Data[Block][Pos], userData, maxWrite);
			
 
				+                        userData += maxWrite;
			
 
				+                        size -= maxWrite;
			
 
				+                    }
			
 
				+                    ++Block;
			
 
				+                    Pos = 0;
			
 
				+                    Data.resize(Block + 1);
			
 
				+                    newSize = Pos + size;
			
 
				                 }
			
 
				-                Data[Block].yresize(newSize); 
			
 
				+                Data[Block].yresize(newSize);
			
 
				+            }
			
 
				+            if (size) {
			
 
				+                memcpy(&Data[Block][Pos], userData, size);
			
 
				             }
			
 
				-            if (size) { 
			
 
				-                memcpy(&Data[Block][Pos], userData, size); 
			
 
				-            } 
			
 
				-            Pos += size; 
			
 
				-            return sizeArg; 
			
 
				+            Pos += size;
			
 
				+            return sizeArg;
			
 
				         }
			
 
				-        int ReadImpl(void* userDataArg, int sizeArg) override { 
			
 
				-            if (sizeArg == 0) 
			
 
				-                return 0; 
			
 
				-
			
 
				-            char* userData = (char*)userDataArg; 
			
 
				-            i64 size = sizeArg; 
			
 
				-            i64 rv = 0; 
			
 
				-            while (size > 0) { 
			
 
				-                int curBlockSize = Data[Block].ysize(); 
			
 
				-                int maxRead = 0; 
			
 
				-                if (Pos + size > curBlockSize) { 
			
 
				-                    maxRead = curBlockSize - Pos; 
			
 
				-                    if (maxRead) { 
			
 
				-                        memcpy(userData, &Data[Block][Pos], maxRead); 
			
 
				-                        userData += maxRead; 
			
 
				-                        size -= maxRead; 
			
 
				-                        rv += maxRead; 
			
 
				-                    } 
			
 
				-                    if (Block + 1 == Data.ysize()) { 
			
 
				-                        memset(userData, 0, size); 
			
 
				-                        return rv; 
			
 
				-                    } 
			
 
				-                    if (ShrinkOnRead) { 
			
 
				-                        TVector<char>().swap(Data[Block]); 
			
 
				-                    } 
			
 
				-                    ++Block; 
			
 
				-                    Pos = 0; 
			
 
				-                } else { 
			
 
				-                    memcpy(userData, &Data[Block][Pos], size); 
			
 
				-                    Pos += size; 
			
 
				-                    rv += size; 
			
 
				+        int ReadImpl(void* userDataArg, int sizeArg) override {
			
 
				+            if (sizeArg == 0)
			
 
				+                return 0;
			
 
				+
			
 
				+            char* userData = (char*)userDataArg;
			
 
				+            i64 size = sizeArg;
			
 
				+            i64 rv = 0;
			
 
				+            while (size > 0) {
			
 
				+                int curBlockSize = Data[Block].ysize();
			
 
				+                int maxRead = 0;
			
 
				+                if (Pos + size > curBlockSize) {
			
 
				+                    maxRead = curBlockSize - Pos;
			
 
				+                    if (maxRead) {
			
 
				+                        memcpy(userData, &Data[Block][Pos], maxRead);
			
 
				+                        userData += maxRead;
			
 
				+                        size -= maxRead;
			
 
				+                        rv += maxRead;
			
 
				+                    }
			
 
				+                    if (Block + 1 == Data.ysize()) {
			
 
				+                        memset(userData, 0, size);
			
 
				+                        return rv;
			
 
				+                    }
			
 
				+                    if (ShrinkOnRead) {
			
 
				+                        TVector<char>().swap(Data[Block]);
			
 
				+                    }
			
 
				+                    ++Block;
			
 
				+                    Pos = 0;
			
 
				+                } else {
			
 
				+                    memcpy(userData, &Data[Block][Pos], size);
			
 
				+                    Pos += size;
			
 
				+                    rv += size;
			
 
				                     return rv;
			
 
				                 }
			
 
				             }
			
 
				-            return rv; 
			
 
				+            return rv;
			
 
				+        }
			
 
				+    };
			
 
				+
			
 
				+    template <class T>
			
 
				+    inline void SerializeMem(bool bRead, TVector<TVector<char>>* data, T& c, bool stableOutput = false) {
			
 
				+        if (data->empty()) {
			
 
				+            data->resize(1);
			
 
				         }
			
 
				-    }; 
			
 
				- 
			
 
				-    template <class T> 
			
 
				-    inline void SerializeMem(bool bRead, TVector<TVector<char>>* data, T& c, bool stableOutput = false) { 
			
 
				-        if (data->empty()) { 
			
 
				-            data->resize(1); 
			
 
				-        } 
			
 
				-        THugeMemoryStream f(data); 
			
 
				-        { 
			
 
				-            IBinSaver bs(f, bRead, stableOutput); 
			
 
				-            bs.Add(1, &c); 
			
 
				-        } 
			
 
				-    } 
			
 
				-} 
			
 
				-
			
 
				-template <class T> 
			
 
				-inline void SerializeMem(const TVector<char>& data, T& c) { 
			
 
				-    if (IBinSaver::HasNonTrivialSerializer<T>(0u)) { 
			
 
				-        TVector<char> tmp(data); 
			
 
				-        SerializeFromMem(&tmp, c); 
			
 
				-    } else { 
			
 
				-        Y_ASSERT(data.size() == sizeof(T)); 
			
 
				-        c = *reinterpret_cast<const T*>(&data[0]); 
			
 
				+        THugeMemoryStream f(data);
			
 
				+        {
			
 
				+            IBinSaver bs(f, bRead, stableOutput);
			
 
				+            bs.Add(1, &c);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template <class T>
			
 
				+inline void SerializeMem(const TVector<char>& data, T& c) {
			
 
				+    if (IBinSaver::HasNonTrivialSerializer<T>(0u)) {
			
 
				+        TVector<char> tmp(data);
			
 
				+        SerializeFromMem(&tmp, c);
			
 
				+    } else {
			
 
				+        Y_ASSERT(data.size() == sizeof(T));
			
 
				+        c = *reinterpret_cast<const T*>(&data[0]);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+template <class T, class D>
			
 
				+inline void SerializeToMem(D* data, T& c, bool stableOutput = false) {
			
 
				+    NMemIoInternals::SerializeMem(false, data, c, stableOutput);
			
 
				+}
			
 
				+
			
 
				+template <class T, class D>
			
 
				+inline void SerializeFromMem(D* data, T& c, bool stableOutput = false) {
			
 
				+    NMemIoInternals::SerializeMem(true, data, c, stableOutput);
			
 
				+}
			
 
				+
			
 
				+// Frees memory in (*data)[i] immediately upon it's deserialization, thus keeps low overall memory consumption for data + object.
			
 
				+template <class T>
			
 
				+inline void SerializeFromMemShrinkInput(TVector<TVector<char>>* data, T& c) {
			
 
				+    if (data->empty()) {
			
 
				+        data->resize(1);
			
 
				+    }
			
 
				+    NMemIoInternals::THugeMemoryStream f(data, true);
			
 
				+    {
			
 
				+        IBinSaver bs(f, true, false);
			
 
				+        bs.Add(1, &c);
			
 
				     }
			
 
				+    data->resize(0);
			
 
				+    data->shrink_to_fit();
			
 
				 }
			
 
				- 
			
 
				-template <class T, class D> 
			
 
				-inline void SerializeToMem(D* data, T& c, bool stableOutput = false) { 
			
 
				-    NMemIoInternals::SerializeMem(false, data, c, stableOutput); 
			
 
				-} 
			
 
				- 
			
 
				-template <class T, class D> 
			
 
				-inline void SerializeFromMem(D* data, T& c, bool stableOutput = false) { 
			
 
				-    NMemIoInternals::SerializeMem(true, data, c, stableOutput); 
			
 
				-} 
			
 
				- 
			
 
				-// Frees memory in (*data)[i] immediately upon it's deserialization, thus keeps low overall memory consumption for data + object. 
			
 
				-template <class T> 
			
 
				-inline void SerializeFromMemShrinkInput(TVector<TVector<char>>* data, T& c) { 
			
 
				-    if (data->empty()) { 
			
 
				-        data->resize(1); 
			
 
				-    } 
			
 
				-    NMemIoInternals::THugeMemoryStream f(data, true); 
			
 
				-    { 
			
 
				-        IBinSaver bs(f, true, false); 
			
 
				-        bs.Add(1, &c); 
			
 
				-    } 
			
 
				-    data->resize(0); 
			
 
				-    data->shrink_to_fit(); 
			
 
				-} 
			
--- a/library/cpp/binsaver/ya.make
+++ b/library/cpp/binsaver/ya.make
@@ -7,7 +7,7 @@ SRCS(
 
				     bin_saver.cpp
			
 
				     blob_io.cpp
			
 
				     buffered_io.cpp
			
 
				-    mem_io.cpp 
			
 
				+    mem_io.cpp
			
 
				     util_stream_io.cpp
			
 
				 )
			
 
				 
			
--- a/library/cpp/blockcodecs/core/stream.cpp
+++ b/library/cpp/blockcodecs/core/stream.cpp
@@ -100,7 +100,7 @@ void TCodedOutput::DoWrite(const void* buf, size_t len) {
 
				         in += avail;
			
 
				         len -= avail;
			
 
				 
			
 
				-        Y_VERIFY(FlushImpl(), "flush on writing failed"); 
			
 
				+        Y_VERIFY(FlushImpl(), "flush on writing failed");
			
 
				     }
			
 
				 }
			
 
				 
			
--- a/library/cpp/codecs/README.md
+++ b/library/cpp/codecs/README.md
@@ -1,4 +1,4 @@
 
				-This is a library of compression algorithms with a unified interface and serialization. 
			
 
				+This is a library of compression algorithms with a unified interface and serialization.
			
 
				 See also library/cpp/codecs/static, where a support for statically compiled dictionaries is implemented.
			
 
				 
			
 
				 All algorithms have a common `ICodec` interface (described in codecs.h).
			
@@ -26,9 +26,9 @@ The `ICodec` interface has the following methods:\
 
				             - The name of the codec. It is required for registration of the codec in the system of serialization/deserialization.\
			
 
				                     For example, it allows you to save information about which combination of codecs was in use (see below).\
			
 
				     `virtual void Learn(ISequenceReader*);`\
			
 
				-            - The interface for teaching codecs that use information about the distribution of data. 
			
 
				+            - The interface for teaching codecs that use information about the distribution of data.
			
 
				 
			
 
				-In addition, the library has a number of utilities that allow a more flexible use of it. 
			
 
				+In addition, the library has a number of utilities that allow a more flexible use of it.
			
 
				 
			
 
				 In the `ICodec` class the following methods are available:\
			
 
				     `static TCodecPtr GetInstance(const TString& name);`\
			
@@ -43,4 +43,4 @@ In the `ICodec` class the following methods are available:\
 
				     `static TCodecPtr RestoreFromString(TStringBuf data);`\
			
 
				             - Loads the codec instance from the string\
			
 
				     `static TVector<TString> GetCodecsList();`\
			
 
				-            - The list of registered codecs 
			
 
				+            - The list of registered codecs
			
--- a/library/cpp/codecs/static/tools/static_codec_checker/README
+++ b/library/cpp/codecs/static/tools/static_codec_checker/README
@@ -1,4 +1,4 @@
 
				-This is a viewer for generated codec and utility for verification of the compression quality on a new data. 
			
 
				+This is a viewer for generated codec and utility for verification of the compression quality on a new data.
			
 
				 
			
 
				-Usage: 
			
 
				+Usage:
			
 
				 static_codec_checker -t -c 029b29ff64a74927.codec_info -f plain samples.txt
			
--- a/library/cpp/codecs/static/tools/static_codec_generator/README
+++ b/library/cpp/codecs/static/tools/static_codec_generator/README
@@ -1,4 +1,4 @@
 
				-This is a utility for reproducible  teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource. 
			
 
				+This is a utility for reproducible  teaching of a codec. And also for saving it into a file with a unique name for a static compilation as a resource.
			
 
				 
			
 
				-Usage: 
			
 
				+Usage:
			
 
				 static_codec_generator -t -m 'the training data description' -f plain samples.txt
			
--- a/library/cpp/containers/comptrie/README.md
+++ b/library/cpp/containers/comptrie/README.md
@@ -1,17 +1,17 @@
 
				-Compact trie 
			
 
				-============= 
			
 
				+Compact trie
			
 
				+=============
			
 
				 
			
 
				 The comptrie library is a fast and very tightly packed
			
 
				-implementation of a prefix tree (Sedgewick's T-trie, that is a ternary tree, 
			
 
				-see https://www.cs.princeton.edu/~rs/strings/paper.pdf, 
			
 
				+implementation of a prefix tree (Sedgewick's T-trie, that is a ternary tree,
			
 
				+see https://www.cs.princeton.edu/~rs/strings/paper.pdf,
			
 
				 https://www.cs.upc.edu/~ps/downloads/tst/tst.html). It contains tools for creating, optimizing, and serializing trees, accessing by key, and performing 
			
 
				 various searches. Because it is template-based and performance-oriented, a significant
			
 
				-part of the library consists of inline functions, so if you don't need all the 
			
 
				+part of the library consists of inline functions, so if you don't need all the
			
 
				 features of the library, consider including a more specific header file instead of the top-level
			
 
				 comptrie.h file.
			
 
				 
			
 
				-Description of the data structure 
			
 
				---------------------------------- 
			
 
				+Description of the data structure
			
 
				+---------------------------------
			
 
				 
			
 
				 A prefix tree is an implementation of the map data structure
			
 
				 for cases when keys are sequences of characters. The nodes on this tree
			
@@ -29,9 +29,9 @@ The library implements tree optimization by merging identical subtrees, which me
 
				 the tree becomes a DAG (Directed Acyclic Graph –
			
 
				 an oriented graph without oriented cycles).
			
 
				 
			
 
				-The main class TCompactTrie is defined in comptrie_trie.h and is templatized: 
			
 
				+The main class TCompactTrie is defined in comptrie_trie.h and is templatized:
			
 
				 - The first parameter of the template is the character type. It should be an
			
 
				-integer type, which means that arithmetical operations must be defined for it. 
			
 
				+integer type, which means that arithmetical operations must be defined for it.
			
 
				 - The second parameter of the template is the value type.
			
 
				 - The third parameter is the packer class, which packs values in order to quickly and compactly
			
 
				 serialize the value type to a continuous memory buffer, deserialize it
			
@@ -40,29 +40,29 @@ memory buffer. Good packers have already been written for most types, and they a
 
				 library/cpp/packers. For more information, please refer to the documentation for these packers.
			
 
				 
			
 
				 The set.h file defines a modification for cases when keys must be stored
			
 
				-without values. 
			
 
				+without values.
			
 
				 
			
 
				 When a tree is built from scratch, the value corresponding to an empty key is
			
 
				 assigned to a single-character key '\0'. So in a tree with the 'char' character type,
			
 
				 the empty key and the '\0' key are bound together. For a subtree received from
			
 
				 a call to FindTails, this restriction no longer exists.
			
 
				 
			
 
				-Creating trees 
			
 
				--------------- 
			
 
				+Creating trees
			
 
				+--------------
			
 
				 
			
 
				-Building a tree from a list of key-value pairs is performed by the 
			
 
				+Building a tree from a list of key-value pairs is performed by the
			
 
				 TCompactTrieBuilder class described in the comptrie_builder.h file.
			
 
				 
			
 
				 This class allows you to add words to a tree one at a time, merge a complete
			
 
				-subtree, and also use an unfinished tree as a map. 
			
 
				+subtree, and also use an unfinished tree as a map.
			
 
				 
			
 
				-An important optimization is the prefix-grouped mode when you need to add keys 
			
 
				+An important optimization is the prefix-grouped mode when you need to add keys
			
 
				 in a certain order (for details, see the comments in the header file). The resulting tree is compactly packed while keys are being added, and the memory consumption is approximately the same as for
			
 
				 the completed tree. For the default mode, compact stacking is turned on at the
			
 
				 very end, and the data consumes quite a lot of memory up until that point.
			
 
				 
			
 
				-Optimizing trees 
			
 
				----------------- 
			
 
				+Optimizing trees
			
 
				+----------------
			
 
				 
			
 
				 After a tree is created, there are two optimizing operations that can be applied:
			
 
				  - Minimization to a DAG by merging equal subtrees.
			
@@ -70,7 +70,7 @@ After a tree is created, there are two optimizing operations that can be applied
 
				 The functions that implement these operations are declared in the comptrie_builder.h file. The first
			
 
				 optimization is implemented by the CompactTrieMinimize function, and the second is implemented by
			
 
				 CompactTrieMakeFastLayout. You can perform both at once by calling the
			
 
				-CompactTrieMinimizeAndMakeFastLayout function. 
			
 
				+CompactTrieMinimizeAndMakeFastLayout function.
			
 
				 
			
 
				 ### Minimization ###
			
 
				 
			
@@ -86,21 +86,21 @@ won't have any effect on the tree.
 
				 The second optimization function results in fewer cache misses, but it causes the
			
 
				 tree to grow in size. Our experience has shown a 5% gain
			
 
				 in speed for some tries. The algorithm consumes about three times more memory than
			
 
				-the amount required for the source tree. So if the machine has enough memory to 
			
 
				+the amount required for the source tree. So if the machine has enough memory to
			
 
				 assemble a tree, it does not neccessarily mean that it has enough memory to run
			
 
				 the algorithm. To learn about the theory behind this algorithm, read the comments before the declaration of the CompactTrieMinimize function.
			
 
				 
			
 
				-Serializing trees 
			
 
				------------------ 
			
 
				+Serializing trees
			
 
				+-----------------
			
 
				 
			
 
				 The tree resides in memory as a sequence of nodes. Links to other nodes are always
			
 
				-counted relative to the position of the current node. This allows you to save a 
			
 
				+counted relative to the position of the current node. This allows you to save a
			
 
				 tree to disk as it is and then re-load it using mmap(). The TCompactTrie class has the
			
 
				 TBlob constructor for reading a tree from disk. The TCompactTrieBuilder class has
			
 
				-Save/SaveToFile methods for writing a built tree to a stream or a file. 
			
 
				+Save/SaveToFile methods for writing a built tree to a stream or a file.
			
 
				 
			
 
				 Accessing trees
			
 
				---------------- 
			
 
				+---------------
			
 
				 
			
 
				 As a rule, all methods that accept a key as input have two variants:
			
 
				 - One takes the key in the format: pointer to the beginning of the key, length.
			
@@ -115,51 +115,51 @@ An important operation that distinguishes a tree from a simple map is implemente
 
				 which allows you to obtain a subtree consisting of all possible extensions of the
			
 
				 given prefix.
			
 
				 
			
 
				-Iterators for trees 
			
 
				-------------------- 
			
 
				+Iterators for trees
			
 
				+-------------------
			
 
				 
			
 
				 First of all, there is a typical map iterator over all key-value pairs called
			
 
				 TConstIterator. A tree has three methods that return it: Begin, End, and
			
 
				-UpperBound. The latter takes a key as input and returns an iterator to the 
			
 
				+UpperBound. The latter takes a key as input and returns an iterator to the
			
 
				 smallest key that is not smaller than the input key.
			
 
				 
			
 
				-The rest of the iterators are not so widely used, and thus are located in 
			
 
				-separate files. 
			
 
				+The rest of the iterators are not so widely used, and thus are located in
			
 
				+separate files.
			
 
				 
			
 
				 TPrefixIterator is defined in the prefix_iterator.h file. It allows
			
 
				-iterations over all the prefixes of this key available in the tree. 
			
 
				+iterations over all the prefixes of this key available in the tree.
			
 
				 
			
 
				 TSearchIterator is defined in the search_iterator.h file. It allows you to enter
			
 
				 a key in a tree one character at a time and see where it ends up. The following character can
			
 
				-be selected depending on the current result. You can also copy the iterator and 
			
 
				+be selected depending on the current result. You can also copy the iterator and
			
 
				 proceed on two different paths. You can actually achieve the same result with
			
 
				 repeated use of the FindTails method, but the authors of this iterator claim
			
 
				 that they obtained a performance gain with it.
			
 
				 
			
 
				 Appendix. Memory implementation details
			
 
				---------------------------------------- 
			
 
				+---------------------------------------
			
 
				 
			
 
				-*If you are not going to modify the library, then you do not need to read further.* 
			
 
				+*If you are not going to modify the library, then you do not need to read further.*
			
 
				 
			
 
				 First, if the character type has a size larger than 1 byte, then all keys that use these characters are converted to byte strings in the big-endian way. This
			
 
				 means that character bytes are written in a string from the most significant
			
 
				-to the least significant from left to right. Thus it is reduced to the case when 
			
 
				+to the least significant from left to right. Thus it is reduced to the case when
			
 
				 the character in use is 'char'.
			
 
				 
			
 
				 The tree resides in memory as a series of consecutive nodes. The nodes can have different
			
 
				 sizes, so the only way to identify the boundaries of nodes is by passing the entire
			
 
				-tree. 
			
 
				+tree.
			
 
				 
			
 
				-### Node structure ### 
			
 
				+### Node structure ###
			
 
				 
			
 
				 The structure of a node, as can be understood from thoughtfully reading the
			
 
				 LeapByte function in Comptrie_impl.h, is the following:
			
 
				-- The first byte is for service flags. 
			
 
				+- The first byte is for service flags.
			
 
				 - The second byte is a character (unless it is the ε-link type of node
			
 
				   described below, which has from 1 to 7 bytes of offset distance from the
			
 
				   beginning of this node to the content node, and nothing else).
			
 
				 
			
 
				-Thus, the size of any node is at least 2 bytes. All other elements of a node 
			
 
				+Thus, the size of any node is at least 2 bytes. All other elements of a node
			
 
				 are optional. Next there is from 0 to 7 bytes of the packed offset from the beginning
			
 
				 of this node to the beginning of the root node of a subtree with the younger
			
 
				 siblings. It is followed by 0 to 7 bytes of the packed offset from the beginning of this
			
@@ -171,15 +171,15 @@ that the tree has children, there is a root node of the subtree of children.
 
				 
			
 
				 The packed offset is restricted to 7 bytes, and this gives us a limit on the largest 
			
 
				 possible size of a tree. You need to study the packer code to understand
			
 
				-the exact limit. 
			
 
				+the exact limit.
			
 
				 
			
 
				 All packed offsets are nonnegative, meaning that roots of subtrees with
			
 
				 siblings and the node pointed to by the ε-link must be located
			
 
				 strictly to the right of the current node in memory. This does not allow placement of
			
 
				 finite state machines with oriented cycles in the comptrie. But it does allow you to
			
 
				-effectively stack the comptrie from right to left. 
			
 
				+effectively stack the comptrie from right to left.
			
 
				 
			
 
				-### Service flags ### 
			
 
				+### Service flags ###
			
 
				 
			
 
				 The byte of service flags contains (as shown by the constants at the beginning of
			
 
				 the comptrie_impl.h file):
			
@@ -189,14 +189,14 @@ the comptrie_impl.h file):
 
				   with elder siblings.
			
 
				 - 3 bits of MT_SIZEMASK << MT_LEFTSHIFT, indicating the size of the packed
			
 
				   offset to a subtree with younger siblings.
			
 
				-If one of these subtrees is not present, then the size of the corresponding 
			
 
				-packed offset is 0, and vice versa. 
			
 
				+If one of these subtrees is not present, then the size of the corresponding
			
 
				+packed offset is 0, and vice versa.
			
 
				 
			
 
				-### ε-links ### 
			
 
				+### ε-links ###
			
 
				 
			
 
				 These nodes only occur if we optimized a tree into a DAG and got two nodes with
			
 
				 merged subtrees of children. Since the offset to the subtree of children can't be
			
 
				-specified and the root of this subtree should lie just after the value, we have 
			
 
				+specified and the root of this subtree should lie just after the value, we have
			
 
				 to add a node of the ε-link type, which contains the offset to the root subtree of
			
 
				 children and nothing more. This applies to all nodes that have equal subtrees of children,
			
 
				 except the rightmost node. The size of this offset is set in 3 bits of MT_SIZEMASK
			
@@ -206,7 +206,7 @@ As the implementation of the IsEpsilonLink function in
 
				 comptrie_impl.h demonstrates, the ε-link differs from other nodes in that it does not have the MT_NEXT flag or the MT_FINAL
			
 
				  flag, so it can always be
			
 
				 identified by the flags. Of course, the best programming practice is to call the
			
 
				-function itself instead of examining the flags. 
			
 
				+function itself instead of examining the flags.
			
 
				 
			
 
				 Note that the ε-link flags do not use the MT_SIZEMASK <<
			
 
				 MT_LEFTSHIFT` bits, which allows us to start using ε-links for some other purpose.