Browse Source

Restoring authorship annotation for <snow@yandex-team.ru>. Commit 2 of 2.

snow 3 years ago
parent
commit
ec31dbabb2

+ 70 - 70
library/cpp/comptable/comptable.cpp

@@ -1,24 +1,24 @@
-#include <util/system/defaults.h> 
-#include <util/system/filemap.h> 
+#include <util/system/defaults.h>
+#include <util/system/filemap.h>
 #include <util/generic/string.h>
-#include <util/generic/vector.h> 
-#include <util/string/cast.h> 
-#include <util/stream/file.h> 
+#include <util/generic/vector.h>
+#include <util/string/cast.h>
+#include <util/stream/file.h>
 #include <library/cpp/compproto/huff.h>
- 
-#include "comptable.h" 
- 
+
+#include "comptable.h"
+
 #include <cstdlib>
 
-namespace NCompTable { 
+namespace NCompTable {
     static const ui32 magicHashMul = 0xd077cd1f;
     static const size_t hashSizeLog = 18;
     static const size_t hashSize = 1 << hashSizeLog;
- 
+
     size_t HashIndex(ui32 value, ui32 hashMul) {
         return (value * hashMul) >> (32 - hashSizeLog);
     }
- 
+
     void TCompressorTable::GetHuffCode(const NCompProto::TCoderEntry& entry, ui32 value, ui64& bitCode, ui8& bitLength) const {
         ui64 code = value - entry.MinValue;
         bitCode = (code << entry.PrefixBits) | entry.Prefix;
@@ -34,19 +34,19 @@ namespace NCompTable {
                 GetHuffCode(huffCode, value, bitCode, bitLength);
                 return;
             }
-        } 
+        }
         abort();
-    } 
+    }
     ui8 TCompressorTable::GetHuffIndex(ui8 prefix) {
         for (size_t i = 0; i < 10; ++i) {
             if ((prefix & ((1 << HuffCodes[i].PrefixBits) - 1)) == HuffCodes[i].Prefix) {
                 return i;
             }
-        } 
+        }
         abort();
         return 0;
-    } 
- 
+    }
+
     void TCompressorTable::BuildHuffCodes(i64 totalFreq, i64 freqs[65536]) {
         i64 add = 1;
         while (1) {
@@ -54,9 +54,9 @@ namespace NCompTable {
                 return;
             }
             add = add * 2;
-        } 
-    } 
- 
+        }
+    }
+
     bool TCompressorTable::BuildHuffCodes(i64 totalFreq, i64 freqs[65536], i64 add) {
         TVector<NCompProto::TCode> codes;
         size_t bits[] = {0, 1, 2, 4, 8, 10, 12, 14, 16, 32};
@@ -71,7 +71,7 @@ namespace NCompTable {
             codes.push_back(NCompProto::TCode(weight + add, ui32(off), bits[i]));
             total = total > weight ? total - weight : 0;
             off += size;
-        } 
+        }
         codes.push_back(NCompProto::TCode(total + add, 0, 32));
         i64 ret = NCompProto::BuildHuff(codes);
         Y_UNUSED(ret);
@@ -84,13 +84,13 @@ namespace NCompTable {
             if (code.PrefixBits > 8) {
                 return false;
             }
-        } 
+        }
         for (size_t i = 0; i < 256; ++i) {
             HuffIndices[i] = GetHuffIndex(ui8(i));
         }
         return true;
-    } 
- 
+    }
+
     template <class TIterator>
     void Iterate(const TStringBuf& data, TIterator& iterator) {
         size_t i = 0;
@@ -103,8 +103,8 @@ namespace NCompTable {
             memcpy(buffer, data.data() + i, data.size() - i);
             iterator.Visit(buffer[0]);
         }
-    } 
- 
+    }
+
     class TDataCompressor {
         ui32 Keys[hashSize];
         ui32 Vals[hashSize];
@@ -128,7 +128,7 @@ namespace NCompTable {
                 Vals[index] = i;
                 table.GetHuffCode(ui32(i), BitCodes[index], BitLengths[index]);
             }
-        } 
+        }
         void GetStat(ui32 val, ui32& stat) const {
             size_t index = HashIndex(val, HashMul);
             if (Keys[index] == val) {
@@ -136,7 +136,7 @@ namespace NCompTable {
             } else {
                 stat = ui32(-1);
             }
-        } 
+        }
         void GetStat(ui32 val, ui64& bitCode, ui8& bitLength) const {
             size_t index = HashIndex(val, HashMul);
             if (Keys[index] == val) {
@@ -145,7 +145,7 @@ namespace NCompTable {
             } else {
                 Table.GetLastHuffCode(val, bitCode, bitLength);
             }
-        } 
+        }
         size_t Compress4(const ui32* data4, ui8* dataBuff) const {
             ui8* oldBuff = dataBuff;
             ++dataBuff;
@@ -174,7 +174,7 @@ namespace NCompTable {
             }
             oldBuff[0] = ui8(status);
             return dataBuff - oldBuff;
-        } 
+        }
         struct TCompressorIterator {
             TVector<char>& Result;
             const TDataCompressor& Compressor;
@@ -187,7 +187,7 @@ namespace NCompTable {
                 , Index(0)
                 , RealSize(0)
             {
-            } 
+            }
             void Flush() {
                 Result.yresize(RealSize + 32);
                 RealSize += Compressor.Compress4(Cached, reinterpret_cast<ui8*>(Result.data()) + RealSize);
@@ -246,11 +246,11 @@ namespace NCompTable {
             if (!HQ) {
                 TCompressorIterator iterator(rslt, *this);
                 Iterate(stringBuf, iterator);
-            } else { 
+            } else {
                 THQCompressorIterator iterator(rslt, *this);
                 Iterate(stringBuf, iterator);
-            } 
-        } 
+            }
+        }
     };
 
     class TDataDecompressor {
@@ -259,8 +259,8 @@ namespace NCompTable {
     public:
         TDataDecompressor(const TCompressorTable& table)
             : Table(table)
-        { 
-        } 
+        {
+        }
         size_t Decompress(ui32* data4, const ui8* dataBuff, ui32 type) const {
             if (type == 0) {
                 data4[0] = 0;
@@ -280,7 +280,7 @@ namespace NCompTable {
                 memcpy(data4, dataBuff, sizeof(*data4));
                 return 4;
             }
-        } 
+        }
         size_t Decompress(ui32* data4, const ui8* dataBuff) const {
             ui32 status = *dataBuff;
             const ui8* oldData = dataBuff;
@@ -298,7 +298,7 @@ namespace NCompTable {
                 memcpy(&val, dataBuff + off, sizeof(val));
             } else {
                 memcpy(&val, dataBuff + off, border - off);
-            } 
+            }
             val >>= (offset & 7);
             ui8 index = Table.HuffIndices[ui8(val)];
             const NCompProto::TCoderEntry& entry = Table.HuffCodes[index];
@@ -307,7 +307,7 @@ namespace NCompTable {
             val = val + entry.MinValue;
             data[0] = (index == 9) ? val : Table.Table[val];
             offset += allBits;
-        } 
+        }
         size_t GetJunkSize() const {
             return 8;
         }
@@ -331,7 +331,7 @@ namespace NCompTable {
                 len = length;
                 for (size_t i = 0; i < length32; ++i) {
                     Decompress(&result[i], src, offset, border);
-                } 
+                }
             } else {
                 ui32 data[4];
                 src += Decompress(data, src);
@@ -345,39 +345,39 @@ namespace NCompTable {
                 for (size_t j = 1; j < length32x4; ++j) {
                     src += Decompress(&result[j * 4 - 1], src);
                 }
-            } 
+            }
             rslt.resize(len);
-        } 
-    }; 
+        }
+    };
 
     struct TSamplerIterator {
         TDataSampler& Sampler;
         TSamplerIterator(TDataSampler& sampler)
             : Sampler(sampler)
-        { 
-        } 
-        void Visit(const ui32 data) { 
+        {
+        }
+        void Visit(const ui32 data) {
             Sampler.AddStat(data);
-        } 
+        }
     };
 
     struct TGreaterComparer {
         //std::greater in arcadia???
         bool operator()(ui64 a, ui64 b) const {
             return a > b;
-        } 
-    }; 
+        }
+    };
 
     TDataSampler::TDataSampler() {
         memset(this, 0, sizeof(*this));
-    } 
- 
+    }
+
     void TDataSampler::BuildTable(TCompressorTable& table) const {
         std::vector<ui64> sorted;
         for (size_t i = 0; i < Size; ++i) {
             ui64 res = (ui64(EntryHit[i]) << 32) + ui32(i);
             sorted.push_back(res);
-        } 
+        }
         std::vector<ui32> entryTbl(Size);
         std::sort(sorted.begin(), sorted.end(), TGreaterComparer());
         table.HashMul = magicHashMul;
@@ -386,9 +386,9 @@ namespace NCompTable {
             ui32 ind = ui32(sorted[i]);
             table.Table[i] = EntryVal[ind];
             freqs[i] = EntryHit[ind];
-        } 
+        }
         table.BuildHuffCodes(Counter, freqs);
-    } 
+    }
 
     void TDataSampler::AddStat(ui32 val) {
         ++Counter;
@@ -397,47 +397,47 @@ namespace NCompTable {
             ++EntryHit[hashInd];
         } else if (EntryHit[hashInd] > 1) {
             --EntryHit[hashInd];
-        } else { 
+        } else {
             EntryHit[hashInd] = 1;
             EntryVal[hashInd] = val;
-        } 
-    } 
+        }
+    }
 
     void TDataSampler::AddStat(const TStringBuf& stringBuf) {
         TSamplerIterator iterator(*this);
         Iterate(stringBuf, iterator);
-    } 
- 
+    }
+
     TChunkCompressor::TChunkCompressor(bool highQuality, const TCompressorTable& table)
         : HighQuality(highQuality)
-    { 
+    {
         Compressor.Reset(new TDataCompressor(table));
-    } 
- 
+    }
+
     void TChunkCompressor::Compress(TStringBuf data, TVector<char>* rslt) const {
         if (HighQuality) {
             Compressor->Compress<1>(data, *rslt);
         } else {
             Compressor->Compress<0>(data, *rslt);
         }
-    } 
- 
+    }
+
     TChunkCompressor::~TChunkCompressor() = default;
- 
+
     TChunkDecompressor::TChunkDecompressor(bool highQuality, const TCompressorTable& table)
         : HighQuality(highQuality)
     {
         Decompressor.Reset(new TDataDecompressor(table));
-    } 
- 
+    }
+
     void TChunkDecompressor::Decompress(TStringBuf data, TVector<char>* rslt) const {
         if (HighQuality) {
             Decompressor->Decompress<1>(data, *rslt);
         } else {
             Decompressor->Decompress<0>(data, *rslt);
         }
-    } 
- 
+    }
+
     TChunkDecompressor::~TChunkDecompressor() = default;
- 
-} 
+
+}

+ 25 - 25
library/cpp/comptable/comptable.h

@@ -1,18 +1,18 @@
-#pragma once 
+#pragma once
+
+#include <util/generic/vector.h>
+#include <util/memory/blob.h>
+#include <util/ysaveload.h>
 
-#include <util/generic/vector.h> 
-#include <util/memory/blob.h> 
-#include <util/ysaveload.h> 
- 
 #include <library/cpp/compproto/huff.h>
- 
-namespace NCompTable { 
+
+namespace NCompTable {
     struct TCompressorTable {
         ui32 Table[65536];
         ui32 HashMul;
         NCompProto::TCoderEntry HuffCodes[10];
         ui8 HuffIndices[256];
- 
+
         void GetHuffCode(const NCompProto::TCoderEntry& entry, ui32 value, ui64& bitCode, ui8& bitLength) const;
         void GetLastHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const;
         void GetHuffCode(ui32 value, ui64& bitCode, ui8& bitLength) const;
@@ -20,7 +20,7 @@ namespace NCompTable {
         void BuildHuffCodes(i64 totalFreq, i64 freqs[65536]);
         bool BuildHuffCodes(i64 totalFreq, i64 freqs[65536], i64 add);
     };
- 
+
     struct TDataSampler {
         enum {
             Size = 1 << 18,
@@ -28,48 +28,48 @@ namespace NCompTable {
         ui32 EntryVal[Size];
         i64 EntryHit[Size];
         i64 Counter;
- 
+
     public:
         TDataSampler();
         void BuildTable(TCompressorTable& table) const;
         void AddStat(ui32 val);
         void AddStat(const TStringBuf& stringBuf);
-    }; 
- 
+    };
+
     class TDataCompressor;
     class TDataDecompressor;
- 
+
     class TChunkCompressor {
     public:
         TChunkCompressor(bool highQuality, const TCompressorTable& table);
         void Compress(TStringBuf data, TVector<char>* result) const;
         ~TChunkCompressor();
- 
+
     private:
         bool HighQuality;
         THolder<TDataCompressor> Compressor;
     };
- 
+
     class TChunkDecompressor {
     public:
         TChunkDecompressor(bool highQuality, const TCompressorTable& table);
         void Decompress(TStringBuf data, TVector<char>* result) const;
         ~TChunkDecompressor();
- 
+
     private:
         bool HighQuality;
         THolder<TDataDecompressor> Decompressor;
     };
- 
+
 }
 
-template <> 
-class TSerializer<NCompTable::TCompressorTable> { 
-public: 
+template <>
+class TSerializer<NCompTable::TCompressorTable> {
+public:
     static inline void Save(IOutputStream* out, const NCompTable::TCompressorTable& entry) {
-        SavePodType(out, entry); 
-    } 
+        SavePodType(out, entry);
+    }
     static inline void Load(IInputStream* in, NCompTable::TCompressorTable& entry) {
-        LoadPodType(in, entry); 
-    } 
-}; 
+        LoadPodType(in, entry);
+    }
+};

+ 47 - 47
library/cpp/comptable/usage/usage.cpp

@@ -1,75 +1,75 @@
 #include <library/cpp/comptable/comptable.h>
- 
+
 #include <util/random/random.h>
 #include <util/random/fast.h>
 
 #include <time.h>
 #include <stdlib.h>
 
-using namespace NCompTable; 
- 
+using namespace NCompTable;
+
 template <bool HQ>
 void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {
     TVector<char> compressed;
     TVector<char> decompressed;
- 
-    TChunkCompressor compressor(HQ, table); 
-    TChunkDecompressor deCompressor(HQ, table); 
- 
-    size_t origSize = 0; 
-    size_t compSize = 0; 
-    float cl1 = clock(); 
+
+    TChunkCompressor compressor(HQ, table);
+    TChunkDecompressor deCompressor(HQ, table);
+
+    size_t origSize = 0;
+    size_t compSize = 0;
+    float cl1 = clock();
     for (size_t i = 0; i < lines.size(); ++i) {
         const TString& line = lines[i];
-        compressor.Compress(line, &compressed); 
-        origSize += line.size(); 
-        compSize += compressed.size(); 
-        TStringBuf in(compressed.data(), compressed.size()); 
-        deCompressor.Decompress(in, &decompressed); 
+        compressor.Compress(line, &compressed);
+        origSize += line.size();
+        compSize += compressed.size();
+        TStringBuf in(compressed.data(), compressed.size());
+        deCompressor.Decompress(in, &decompressed);
         if (decompressed.size() != line.size() || memcmp(decompressed.data(), line.data(), decompressed.size())) {
-            Cout << i << "\n"; 
+            Cout << i << "\n";
             Cout << line << "\n"
                  << TString(decompressed.data(), decompressed.size()) << "\n";
-            abort(); 
-        } 
-    } 
-    float cl2 = clock(); 
-    float secs = (cl2 - cl1) / CLOCKS_PER_SEC; 
-    Cout << "origSize: " << origSize << "\tcompSize: " << compSize << Endl; 
+            abort();
+        }
+    }
+    float cl2 = clock();
+    float secs = (cl2 - cl1) / CLOCKS_PER_SEC;
+    Cout << "origSize: " << origSize << "\tcompSize: " << compSize << Endl;
     Cout << "yep! compression + decompression speed " << origSize / 1024.0f / 1024.0f / secs << " mbps\n";
-    Cout << "yep! compression ratio " << double(origSize) / double(compSize + 1) << "\n"; 
-} 
- 
+    Cout << "yep! compression ratio " << double(origSize) / double(compSize + 1) << "\n";
+}
+
 int main(int argc, const char* argv[]) {
     TReallyFastRng32 rr(17);
     TVector<TString> lines;
-    /*FILE *fp = fopen("res", "rb"); 
-    while (!feof(fp)) { 
-        char buff[4096]; 
-        fscanf(fp, "%s", buff); 
+    /*FILE *fp = fopen("res", "rb");
+    while (!feof(fp)) {
+        char buff[4096];
+        fscanf(fp, "%s", buff);
         lines.push_back(TString(buff));
-    }*/ 
-    //for (size_t i = 0; i < 10000000; ++i) { 
-    //for (size_t i = 0; i < 1000000; ++i) { 
+    }*/
+    //for (size_t i = 0; i < 10000000; ++i) {
+    //for (size_t i = 0; i < 1000000; ++i) {
     for (size_t i = 0; i < 1000000; ++i) {
         size_t size = rr.Uniform(32);
         TString res = "www.yandex.ru/yandsearch?text=";
-        for (size_t j = 0; j < size; ++j) { 
+        for (size_t j = 0; j < size; ++j) {
             res += "qwer"[rr.Uniform(4)];
-        } 
-        lines.push_back(res); 
-    } 
-    THolder<TDataSampler> sampler(new TDataSampler); 
-    for (size_t i = 0; i < lines.size(); ++i) { 
+        }
+        lines.push_back(res);
+    }
+    THolder<TDataSampler> sampler(new TDataSampler);
+    for (size_t i = 0; i < lines.size(); ++i) {
         sampler->AddStat(lines[i]);
-    } 
-    TCompressorTable table; 
-    sampler->BuildTable(table); 
- 
-    DoTest<true>(table, lines); 
-    DoTest<false>(table, lines); 
- 
+    }
+    TCompressorTable table;
+    sampler->BuildTable(table);
+
+    DoTest<true>(table, lines);
+    DoTest<false>(table, lines);
+
     Y_UNUSED(argc);
     Y_UNUSED(argv);
-    return 0; 
-} 
+    return 0;
+}

+ 5 - 5
library/cpp/comptable/usage/ya.make

@@ -1,13 +1,13 @@
-PROGRAM() 
- 
+PROGRAM()
+
 OWNER(ironpeter)
- 
+
 SRCS(
     usage.cpp
 )
- 
+
 PEERDIR(
     library/cpp/comptable
 )
 
-END() 
+END()

+ 41 - 41
library/cpp/comptable/ut/comptable_ut.cpp

@@ -1,64 +1,64 @@
 #include <library/cpp/comptable/comptable.h>
 #include <library/cpp/testing/unittest/registar.h>
- 
+
 #include <util/random/random.h>
 #include <util/random/fast.h>
 
-using namespace NCompTable; 
- 
+using namespace NCompTable;
+
 template <bool HQ>
 void DoTest(const TCompressorTable& table, const TVector<TString>& lines) {
     TVector<char> compressed;
     TVector<char> decompressed;
- 
-    TChunkCompressor compressor(HQ, table); 
-    TStringStream tmp; 
-    Save(&tmp, table); 
-    TCompressorTable tableLoaded; 
-    Load(&tmp, tableLoaded); 
-    UNIT_ASSERT(memcmp(&table, &tableLoaded, sizeof(table)) == 0); 
-    TChunkDecompressor deCompressor(HQ, tableLoaded); 
- 
-    size_t origSize = 0; 
-    size_t compSize = 0; 
+
+    TChunkCompressor compressor(HQ, table);
+    TStringStream tmp;
+    Save(&tmp, table);
+    TCompressorTable tableLoaded;
+    Load(&tmp, tableLoaded);
+    UNIT_ASSERT(memcmp(&table, &tableLoaded, sizeof(table)) == 0);
+    TChunkDecompressor deCompressor(HQ, tableLoaded);
+
+    size_t origSize = 0;
+    size_t compSize = 0;
     for (size_t i = 0; i < lines.size(); ++i) {
         const TString& line = lines[i];
-        compressor.Compress(line, &compressed); 
-        origSize += line.size(); 
-        compSize += compressed.size(); 
-        TStringBuf in(compressed.data(), compressed.size()); 
-        deCompressor.Decompress(in, &decompressed); 
+        compressor.Compress(line, &compressed);
+        origSize += line.size();
+        compSize += compressed.size();
+        TStringBuf in(compressed.data(), compressed.size());
+        deCompressor.Decompress(in, &decompressed);
         UNIT_ASSERT(decompressed.size() == line.size() && memcmp(decompressed.data(), line.data(), decompressed.size()) == 0);
-    } 
+    }
     UNIT_ASSERT_EQUAL(origSize, 45491584);
-    if (HQ) { 
+    if (HQ) {
         UNIT_ASSERT_EQUAL(compSize, 11074583);
-    } else { 
+    } else {
         UNIT_ASSERT_EQUAL(compSize, 17459336);
-    } 
-    UNIT_ASSERT(compSize < origSize); 
-} 
- 
+    }
+    UNIT_ASSERT(compSize < origSize);
+}
+
 Y_UNIT_TEST_SUITE(TestComptable) {
     Y_UNIT_TEST(TestComptableCompressDecompress) {
         TReallyFastRng32 rr(17);
         TVector<TString> lines;
-        for (size_t i = 0; i < 1000000; ++i) { 
+        for (size_t i = 0; i < 1000000; ++i) {
             size_t size = rr.Uniform(32);
             TString res = "www.yandex.ru/yandsearch?text=";
-            for (size_t j = 0; j < size; ++j) { 
+            for (size_t j = 0; j < size; ++j) {
                 res += "qwer"[rr.Uniform(4)];
-            } 
-            lines.push_back(res); 
-        } 
-        THolder<TDataSampler> sampler(new TDataSampler); 
-        for (size_t i = 0; i < lines.size(); ++i) { 
+            }
+            lines.push_back(res);
+        }
+        THolder<TDataSampler> sampler(new TDataSampler);
+        for (size_t i = 0; i < lines.size(); ++i) {
             sampler->AddStat(lines[i]);
-        } 
-        TCompressorTable table; 
-        sampler->BuildTable(table); 
- 
-        DoTest<true>(table, lines); 
-        DoTest<false>(table, lines); 
-    } 
-} 
+        }
+        TCompressorTable table;
+        sampler->BuildTable(table);
+
+        DoTest<true>(table, lines);
+        DoTest<false>(table, lines);
+    }
+}

+ 4 - 4
library/cpp/comptable/ut/ya.make

@@ -1,9 +1,9 @@
 UNITTEST_FOR(library/cpp/comptable)
- 
+
 OWNER(ironpeter)
- 
+
 SRCS(
     comptable_ut.cpp
 )
- 
-END() 
+
+END()

+ 6 - 6
library/cpp/comptable/ya.make

@@ -1,13 +1,13 @@
-LIBRARY() 
- 
+LIBRARY()
+
 OWNER(ironpeter)
- 
+
 SRCS(
     comptable.cpp
 )
- 
+
 PEERDIR(
     library/cpp/compproto
 )
- 
-END() 
+
+END()

+ 8 - 8
util/system/filemap.h

@@ -168,14 +168,14 @@ public:
 
     void Precharge(size_t pos = 0, size_t size = (size_t)-1) const;
 
-    void SetSequential() { 
-        Map_.SetSequential(); 
-    } 
- 
-    void Evict() { 
-        Map_.Evict(); 
-    } 
- 
+    void SetSequential() {
+        Map_.SetSequential();
+    }
+
+    void Evict() {
+        Map_.Evict();
+    }
+
 private:
     void Flush(void* ptr, size_t size, bool sync);