123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791 |
- #include <util/random/shuffle.h>
- #include <library/cpp/testing/unittest/registar.h>
- #include <util/stream/output.h>
- #include <utility>
- #include <util/charset/wide.h>
- #include <util/generic/algorithm.h>
- #include <util/generic/buffer.h>
- #include <util/generic/map.h>
- #include <util/generic/vector.h>
- #include <util/generic/ptr.h>
- #include <util/generic/ylimits.h>
- #include <util/folder/dirut.h>
- #include <util/random/random.h>
- #include <util/random/fast.h>
- #include <util/string/hex.h>
- #include <util/string/cast.h>
- #include "comptrie.h"
- #include "set.h"
- #include "first_symbol_iterator.h"
- #include "search_iterator.h"
- #include "pattern_searcher.h"
- #include <array>
- #include <iterator>
- class TCompactTrieTest: public TTestBase {
- private:
- UNIT_TEST_SUITE(TCompactTrieTest);
- UNIT_TEST(TestTrie8);
- UNIT_TEST(TestTrie16);
- UNIT_TEST(TestTrie32);
- UNIT_TEST(TestFastTrie8);
- UNIT_TEST(TestFastTrie16);
- UNIT_TEST(TestFastTrie32);
- UNIT_TEST(TestMinimizedTrie8);
- UNIT_TEST(TestMinimizedTrie16);
- UNIT_TEST(TestMinimizedTrie32);
- UNIT_TEST(TestFastMinimizedTrie8);
- UNIT_TEST(TestFastMinimizedTrie16);
- UNIT_TEST(TestFastMinimizedTrie32);
- UNIT_TEST(TestTrieIterator8);
- UNIT_TEST(TestTrieIterator16);
- UNIT_TEST(TestTrieIterator32);
- UNIT_TEST(TestMinimizedTrieIterator8);
- UNIT_TEST(TestMinimizedTrieIterator16);
- UNIT_TEST(TestMinimizedTrieIterator32);
- UNIT_TEST(TestPhraseSearch);
- UNIT_TEST(TestAddGet);
- UNIT_TEST(TestEmpty);
- UNIT_TEST(TestUninitializedNonEmpty);
- UNIT_TEST(TestRandom);
- UNIT_TEST(TestFindTails);
- UNIT_TEST(TestPrefixGrouped);
- UNIT_TEST(CrashTestPrefixGrouped);
- UNIT_TEST(TestMergeFromFile);
- UNIT_TEST(TestMergeFromBuffer);
- UNIT_TEST(TestUnique);
- UNIT_TEST(TestAddRetValue);
- UNIT_TEST(TestClear);
- UNIT_TEST(TestIterateEmptyKey);
- UNIT_TEST(TestTrieSet);
- UNIT_TEST(TestTrieForVectorInt64);
- UNIT_TEST(TestTrieForListInt64);
- UNIT_TEST(TestTrieForSetInt64);
- UNIT_TEST(TestTrieForVectorStroka);
- UNIT_TEST(TestTrieForListStroka);
- UNIT_TEST(TestTrieForSetStroka);
- UNIT_TEST(TestTrieForVectorWtroka);
- UNIT_TEST(TestTrieForVectorFloat);
- UNIT_TEST(TestTrieForVectorDouble);
- UNIT_TEST(TestTrieForListVectorInt64);
- UNIT_TEST(TestTrieForPairWtrokaVectorInt64);
- UNIT_TEST(TestEmptyValueOutOfOrder);
- UNIT_TEST(TestFindLongestPrefixWithEmptyValue);
- UNIT_TEST(TestSearchIterChar);
- UNIT_TEST(TestSearchIterWchar);
- UNIT_TEST(TestSearchIterWchar32)
- UNIT_TEST(TestCopyAndAssignment);
- UNIT_TEST(TestFirstSymbolIterator8);
- UNIT_TEST(TestFirstSymbolIterator16);
- UNIT_TEST(TestFirstSymbolIterator32);
- UNIT_TEST(TestFirstSymbolIteratorChar32);
- UNIT_TEST(TestArrayPacker);
- UNIT_TEST(TestBuilderFindLongestPrefix);
- UNIT_TEST(TestBuilderFindLongestPrefixWithEmptyValue);
- UNIT_TEST(TestPatternSearcherEmpty);
- UNIT_TEST(TestPatternSearcherSimple);
- UNIT_TEST(TestPatternSearcherRandom);
- UNIT_TEST_SUITE_END();
- static const char* SampleData[];
- template <class T>
- void CreateTrie(IOutputStream& out, bool minimize, bool useFastLayout);
- template <class T>
- void CheckData(const char* src, size_t len);
- template <class T>
- void CheckUpperBound(const char* src, size_t len);
- template <class T>
- void CheckIterator(const char* src, size_t len);
- template <class T>
- void TestTrie(bool minimize, bool useFastLayout);
- template <class T>
- void TestTrieIterator(bool minimize);
- template <class T, bool minimize>
- void TestRandom(const size_t n, const size_t maxKeySize);
- void TestFindTailsImpl(const TString& prefix);
- void TestUniqueImpl(bool isPrefixGrouped);
- TVector<TUtf16String> GetSampleKeys(size_t nKeys) const;
- template <class TContainer>
- TVector<TContainer> GetSampleVectorData(size_t nValues);
- template <class TContainer>
- TVector<TContainer> GetSampleTextVectorData(size_t nValues);
- template <class T>
- void CheckEquality(const T& value1, const T& value2) const;
- template <class TContainer>
- void TestTrieWithContainers(const TVector<TUtf16String>& keys, const TVector<TContainer>& sampleData, TString methodName);
- template <typename TChar>
- void TestSearchIterImpl();
- template <class TTrie>
- void TestFirstSymbolIteratorForTrie(const TTrie& trie, const TStringBuf& narrowAnswers);
- template <typename TSymbol>
- void TestFirstSymbolIterator();
- template <class T>
- class TIntPacker;
- template <class T>
- class TDummyPacker;
- class TStrokaPacker;
- public:
- void TestPackers();
- void TestTrie8();
- void TestTrie16();
- void TestTrie32();
- void TestFastTrie8();
- void TestFastTrie16();
- void TestFastTrie32();
- void TestMinimizedTrie8();
- void TestMinimizedTrie16();
- void TestMinimizedTrie32();
- void TestFastMinimizedTrie8();
- void TestFastMinimizedTrie16();
- void TestFastMinimizedTrie32();
- void TestTrieIterator8();
- void TestTrieIterator16();
- void TestTrieIterator32();
- void TestMinimizedTrieIterator8();
- void TestMinimizedTrieIterator16();
- void TestMinimizedTrieIterator32();
- void TestPhraseSearch();
- void TestAddGet();
- void TestEmpty();
- void TestUninitializedNonEmpty();
- void TestRandom();
- void TestFindTails();
- void TestPrefixGrouped();
- void CrashTestPrefixGrouped();
- void TestMergeFromFile();
- void TestMergeFromBuffer();
- void TestUnique();
- void TestAddRetValue();
- void TestClear();
- void TestIterateEmptyKey();
- void TestTrieSet();
- void TestTrieForVectorInt64();
- void TestTrieForListInt64();
- void TestTrieForSetInt64();
- void TestTrieForVectorStroka();
- void TestTrieForListStroka();
- void TestTrieForSetStroka();
- void TestTrieForVectorWtroka();
- void TestTrieForVectorFloat();
- void TestTrieForVectorDouble();
- void TestTrieForListVectorInt64();
- void TestTrieForPairWtrokaVectorInt64();
- void TestEmptyValueOutOfOrder();
- void TestFindLongestPrefixWithEmptyValue();
- void TestSearchIterChar();
- void TestSearchIterWchar();
- void TestSearchIterWchar32();
- void TestCopyAndAssignment();
- void TestFirstSymbolIterator8();
- void TestFirstSymbolIterator16();
- void TestFirstSymbolIterator32();
- void TestFirstSymbolIteratorChar32();
- void TestArrayPacker();
- void TestBuilderFindLongestPrefix();
- void TestBuilderFindLongestPrefix(size_t keysCount, double branchProbability, bool isPrefixGrouped, bool hasEmptyKey);
- void TestBuilderFindLongestPrefixWithEmptyValue();
- void TestPatternSearcherOnDataset(
- const TVector<TString>& patterns,
- const TVector<TString>& samples
- );
- void TestPatternSearcherEmpty();
- void TestPatternSearcherSimple();
- void TestPatternSearcherRandom(
- size_t patternsNum,
- size_t patternMaxLength,
- size_t strMaxLength,
- int maxChar,
- TFastRng<ui64>& rng
- );
- void TestPatternSearcherRandom();
- };
- UNIT_TEST_SUITE_REGISTRATION(TCompactTrieTest);
- const char* TCompactTrieTest::SampleData[] = {
- "",
- "a", "b", "c", "d",
- "aa", "ab", "ac", "ad",
- "aaa", "aab", "aac", "aad",
- "aba", "abb", "abc", "abd",
- "fba", "fbb", "fbc", "fbd",
- "fbbaa",
- "c\x85\xA4\xBF" // Just something outside ASCII.
- };
- template <class T>
- typename TCompactTrie<T>::TKey MakeWideKey(const char* str, size_t len) {
- typename TCompactTrie<T>::TKey buffer;
- for (size_t i = 0; i < len; i++) {
- unsigned int ch = (str[i] & 0xFF);
- buffer.push_back((T)(ch | ch << 8 | ch << 16 | ch << 24));
- }
- return buffer;
- }
- template <class T>
- typename TCompactTrie<T>::TKey MakeWideKey(const TString& str) {
- return MakeWideKey<T>(str.c_str(), str.length());
- }
- template <class T>
- typename TCompactTrie<T>::TKey MakeWideKey(const TStringBuf& buf) {
- return MakeWideKey<T>(buf.data(), buf.length());
- }
- template <class T>
- void TCompactTrieTest::CreateTrie(IOutputStream& out, bool minimize, bool useFastLayout) {
- TCompactTrieBuilder<T> builder;
- for (auto& i : SampleData) {
- size_t len = strlen(i);
- builder.Add(MakeWideKey<T>(i, len), len * 2);
- }
- TBufferOutput tmp2;
- IOutputStream& currentOutput = useFastLayout ? tmp2 : out;
- if (minimize) {
- TBufferOutput buftmp;
- builder.Save(buftmp);
- CompactTrieMinimize<TCompactTriePacker<ui64>>(currentOutput, buftmp.Buffer().Data(), buftmp.Buffer().Size(), false);
- } else {
- builder.Save(currentOutput);
- }
- if (useFastLayout) {
- CompactTrieMakeFastLayout<TCompactTriePacker<T>>(out, tmp2.Buffer().Data(), tmp2.Buffer().Size(), false);
- }
- }
- // Iterates over all strings of length <= 4 made of letters a-g.
- static bool LexicographicStep(TString& s) {
- if (s.length() < 4) {
- s += "a";
- return true;
- }
- while (!s.empty() && s.back() == 'g')
- s.pop_back();
- if (s.empty())
- return false;
- char last = s.back();
- last++;
- s.pop_back();
- s.push_back(last);
- return true;
- }
- template <class T>
- void TCompactTrieTest::CheckUpperBound(const char* data, size_t datalen) {
- TCompactTrie<T> trie(data, datalen);
- typedef typename TCompactTrie<T>::TKey TKey;
- typedef typename TCompactTrie<T>::TData TData;
- TString key;
- do {
- const TKey wideKey = MakeWideKey<T>(key);
- typename TCompactTrie<T>::TConstIterator it = trie.UpperBound(wideKey);
- UNIT_ASSERT_C(it == trie.End() || it.GetKey() >= wideKey, "key=" + key);
- TData data;
- const bool found = trie.Find(wideKey, &data);
- if (found)
- UNIT_ASSERT_C(it.GetKey() == wideKey && it.GetValue() == data, "key=" + key);
- if (it != trie.Begin())
- UNIT_ASSERT_C((--it).GetKey() < wideKey, "key=" + key);
- } while (LexicographicStep(key));
- }
- template <class T>
- void TCompactTrieTest::CheckData(const char* data, size_t datalen) {
- TCompactTrie<T> trie(data, datalen);
- UNIT_ASSERT_VALUES_EQUAL(Y_ARRAY_SIZE(SampleData), trie.Size());
- for (auto& i : SampleData) {
- size_t len = strlen(i);
- ui64 value = 0;
- size_t prefixLen = 0;
- typename TCompactTrie<T>::TKey key = MakeWideKey<T>(i, len);
- UNIT_ASSERT(trie.Find(key, &value));
- UNIT_ASSERT_EQUAL(len * 2, value);
- UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value));
- UNIT_ASSERT_EQUAL(len, prefixLen);
- UNIT_ASSERT_EQUAL(len * 2, value);
- TString badkey("bb");
- badkey += i;
- key = MakeWideKey<T>(badkey);
- UNIT_ASSERT(!trie.Find(key));
- value = 123;
- UNIT_ASSERT(!trie.Find(key, &value));
- UNIT_ASSERT_EQUAL(123, value);
- UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value));
- UNIT_ASSERT_EQUAL(1, prefixLen);
- UNIT_ASSERT_EQUAL(2, value);
- badkey = i;
- badkey += "x";
- key = MakeWideKey<T>(badkey);
- UNIT_ASSERT(!trie.Find(key));
- value = 1234;
- UNIT_ASSERT(!trie.Find(key, &value));
- UNIT_ASSERT_EQUAL(1234, value);
- UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value));
- UNIT_ASSERT_EQUAL(len, prefixLen);
- UNIT_ASSERT_EQUAL(len * 2, value);
- UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, nullptr));
- UNIT_ASSERT_EQUAL(len, prefixLen);
- }
- TString testkey("fbbaa");
- typename TCompactTrie<T>::TKey key = MakeWideKey<T>(testkey);
- ui64 value = 0;
- size_t prefixLen = 0;
- UNIT_ASSERT(trie.FindLongestPrefix(key.data(), testkey.length() - 1, &prefixLen, &value));
- UNIT_ASSERT_EQUAL(prefixLen, 3);
- UNIT_ASSERT_EQUAL(6, value);
- testkey = "fbbax";
- key = MakeWideKey<T>(testkey);
- UNIT_ASSERT(trie.FindLongestPrefix(key, &prefixLen, &value));
- UNIT_ASSERT_EQUAL(prefixLen, 3);
- UNIT_ASSERT_EQUAL(6, value);
- value = 12345678;
- UNIT_ASSERT(!trie.Find(key, &value));
- UNIT_ASSERT_EQUAL(12345678, value); //Failed Find() should not change value
- }
- template <class T>
- void TCompactTrieTest::CheckIterator(const char* data, size_t datalen) {
- typedef typename TCompactTrie<T>::TKey TKey;
- typedef typename TCompactTrie<T>::TValueType TValue;
- TMap<TKey, ui64> stored;
- for (auto& i : SampleData) {
- size_t len = strlen(i);
- stored[MakeWideKey<T>(i, len)] = len * 2;
- }
- TCompactTrie<T> trie(data, datalen);
- TVector<TValue> items;
- typename TCompactTrie<T>::TConstIterator it = trie.Begin();
- size_t entry_count = 0;
- TMap<TKey, ui64> received;
- while (it != trie.End()) {
- UNIT_ASSERT_VALUES_EQUAL(it.GetKeySize(), it.GetKey().size());
- received.insert(*it);
- items.push_back(*it);
- entry_count++;
- it++;
- }
- TMap<TKey, ui64> received2;
- for (std::pair<TKey, ui64> x : trie) {
- received2.insert(x);
- }
- UNIT_ASSERT(entry_count == stored.size());
- UNIT_ASSERT(received == stored);
- UNIT_ASSERT(received2 == stored);
- std::reverse(items.begin(), items.end());
- typename TCompactTrie<T>::TConstIterator revIt = trie.End();
- typename TCompactTrie<T>::TConstIterator const begin = trie.Begin();
- typename TCompactTrie<T>::TConstIterator emptyIt;
- size_t pos = 0;
- while (revIt != begin) {
- revIt--;
- UNIT_ASSERT(*revIt == items[pos]);
- pos++;
- }
- // Checking the assignment operator.
- revIt = begin;
- UNIT_ASSERT(revIt == trie.Begin());
- UNIT_ASSERT(!revIt.IsEmpty());
- UNIT_ASSERT(revIt != emptyIt);
- UNIT_ASSERT(revIt != trie.End());
- ++revIt; // Call a method that uses Skipper.
- revIt = emptyIt;
- UNIT_ASSERT(revIt == emptyIt);
- UNIT_ASSERT(revIt.IsEmpty());
- UNIT_ASSERT(revIt != trie.End());
- // Checking the move assignment operator.
- revIt = trie.Begin();
- UNIT_ASSERT(revIt == trie.Begin());
- UNIT_ASSERT(!revIt.IsEmpty());
- UNIT_ASSERT(revIt != emptyIt);
- UNIT_ASSERT(revIt != trie.End());
- ++revIt; // Call a method that uses Skipper.
- revIt = typename TCompactTrie<T>::TConstIterator();
- UNIT_ASSERT(revIt == emptyIt);
- UNIT_ASSERT(revIt.IsEmpty());
- UNIT_ASSERT(revIt != trie.End());
- }
- template <class T>
- void TCompactTrieTest::TestTrie(bool minimize, bool useFastLayout) {
- TBufferOutput bufout;
- CreateTrie<T>(bufout, minimize, useFastLayout);
- CheckData<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
- CheckUpperBound<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
- }
- template <class T>
- void TCompactTrieTest::TestTrieIterator(bool minimize) {
- TBufferOutput bufout;
- CreateTrie<T>(bufout, minimize, false);
- CheckIterator<T>(bufout.Buffer().Data(), bufout.Buffer().Size());
- }
- void TCompactTrieTest::TestTrie8() {
- TestTrie<char>(false, false);
- }
- void TCompactTrieTest::TestTrie16() {
- TestTrie<wchar16>(false, false);
- }
- void TCompactTrieTest::TestTrie32() {
- TestTrie<wchar32>(false, false);
- }
- void TCompactTrieTest::TestFastTrie8() {
- TestTrie<char>(false, true);
- }
- void TCompactTrieTest::TestFastTrie16() {
- TestTrie<wchar16>(false, true);
- }
- void TCompactTrieTest::TestFastTrie32() {
- TestTrie<wchar32>(false, true);
- }
- void TCompactTrieTest::TestMinimizedTrie8() {
- TestTrie<char>(true, false);
- }
- void TCompactTrieTest::TestMinimizedTrie16() {
- TestTrie<wchar16>(true, false);
- }
- void TCompactTrieTest::TestMinimizedTrie32() {
- TestTrie<wchar32>(true, false);
- }
- void TCompactTrieTest::TestFastMinimizedTrie8() {
- TestTrie<char>(true, true);
- }
- void TCompactTrieTest::TestFastMinimizedTrie16() {
- TestTrie<wchar16>(true, true);
- }
- void TCompactTrieTest::TestFastMinimizedTrie32() {
- TestTrie<wchar32>(true, true);
- }
- void TCompactTrieTest::TestTrieIterator8() {
- TestTrieIterator<char>(false);
- }
- void TCompactTrieTest::TestTrieIterator16() {
- TestTrieIterator<wchar16>(false);
- }
- void TCompactTrieTest::TestTrieIterator32() {
- TestTrieIterator<wchar32>(false);
- }
- void TCompactTrieTest::TestMinimizedTrieIterator8() {
- TestTrieIterator<char>(true);
- }
- void TCompactTrieTest::TestMinimizedTrieIterator16() {
- TestTrieIterator<wchar16>(true);
- }
- void TCompactTrieTest::TestMinimizedTrieIterator32() {
- TestTrieIterator<wchar32>(true);
- }
- void TCompactTrieTest::TestPhraseSearch() {
- static const char* phrases[] = {"ab", "ab cd", "ab cd ef"};
- static const char* const goodphrase = "ab cd ef gh";
- static const char* const badphrase = "cd ef gh ab";
- TBufferOutput bufout;
- TCompactTrieBuilder<char> builder;
- for (size_t i = 0; i < Y_ARRAY_SIZE(phrases); i++) {
- builder.Add(phrases[i], strlen(phrases[i]), i);
- }
- builder.Save(bufout);
- TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size());
- TVector<TCompactTrie<char>::TPhraseMatch> matches;
- trie.FindPhrases(goodphrase, strlen(goodphrase), matches);
- UNIT_ASSERT(matches.size() == Y_ARRAY_SIZE(phrases));
- for (size_t i = 0; i < Y_ARRAY_SIZE(phrases); i++) {
- UNIT_ASSERT(matches[i].first == strlen(phrases[i]));
- UNIT_ASSERT(matches[i].second == i);
- }
- trie.FindPhrases(badphrase, strlen(badphrase), matches);
- UNIT_ASSERT(matches.size() == 0);
- }
- void TCompactTrieTest::TestAddGet() {
- TCompactTrieBuilder<char> builder;
- builder.Add("abcd", 4, 1);
- builder.Add("acde", 4, 2);
- ui64 dummy;
- UNIT_ASSERT(builder.Find("abcd", 4, &dummy));
- UNIT_ASSERT(1 == dummy);
- UNIT_ASSERT(builder.Find("acde", 4, &dummy));
- UNIT_ASSERT(2 == dummy);
- UNIT_ASSERT(!builder.Find("fgdgfacde", 9, &dummy));
- UNIT_ASSERT(!builder.Find("ab", 2, &dummy));
- }
- void TCompactTrieTest::TestEmpty() {
- TCompactTrieBuilder<char> builder;
- ui64 dummy = 12345;
- size_t prefixLen;
- UNIT_ASSERT(!builder.Find("abc", 3, &dummy));
- TBufferOutput bufout;
- builder.Save(bufout);
- TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size());
- UNIT_ASSERT(!trie.Find("abc", 3, &dummy));
- UNIT_ASSERT(!trie.Find("", 0, &dummy));
- UNIT_ASSERT(!trie.FindLongestPrefix("abc", 3, &prefixLen, &dummy));
- UNIT_ASSERT(!trie.FindLongestPrefix("", 0, &prefixLen, &dummy));
- UNIT_ASSERT_EQUAL(12345, dummy);
- UNIT_ASSERT(trie.Begin() == trie.End());
- TCompactTrie<> trieNull;
- UNIT_ASSERT(!trieNull.Find(" ", 1));
- TCompactTrie<>::TPhraseMatchVector matches;
- trieNull.FindPhrases(" ", 1, matches); // just to be sure it doesn't crash
- UNIT_ASSERT(trieNull.Begin() == trieNull.End());
- }
- void TCompactTrieTest::TestUninitializedNonEmpty() {
- TBufferOutput bufout;
- CreateTrie<char>(bufout, false, false);
- TCompactTrie<char> trie(bufout.Buffer().Data(), bufout.Buffer().Size());
- typedef TCompactTrie<char>::TKey TKey;
- typedef TCompactTrie<char>::TConstIterator TIter;
- TCompactTrie<char> tails = trie.FindTails("abd", 3); // A trie that has empty value and no data.
- UNIT_ASSERT(!tails.IsEmpty());
- UNIT_ASSERT(!tails.IsInitialized());
- const TKey wideKey = MakeWideKey<char>("c", 1);
- TIter it = tails.UpperBound(wideKey);
- UNIT_ASSERT(it == tails.End());
- UNIT_ASSERT(it != tails.Begin());
- --it;
- UNIT_ASSERT(it == tails.Begin());
- ++it;
- UNIT_ASSERT(it == tails.End());
- }
- static char RandChar() {
- return char(RandomNumber<size_t>() % 256);
- }
- static TString RandStr(const size_t max) {
- size_t len = RandomNumber<size_t>() % max;
- TString key;
- for (size_t j = 0; j < len; ++j)
- key += RandChar();
- return key;
- }
- template <class T, bool minimize>
- void TCompactTrieTest::TestRandom(const size_t n, const size_t maxKeySize) {
- const TStringBuf EMPTY_KEY = TStringBuf("", 1);
- TCompactTrieBuilder<char, typename T::TData, T> builder;
- typedef TMap<TString, typename T::TData> TKeys;
- TKeys keys;
- typename T::TData dummy;
- for (size_t i = 0; i < n; ++i) {
- const TString key = RandStr(maxKeySize);
- if (key != EMPTY_KEY && keys.find(key) == keys.end()) {
- const typename T::TData val = T::Data(key);
- keys[key] = val;
- UNIT_ASSERT_C(!builder.Find(key.data(), key.size(), &dummy), "key = " << HexEncode(TString(key)));
- builder.Add(key.data(), key.size(), val);
- UNIT_ASSERT_C(builder.Find(key.data(), key.size(), &dummy), "key = " << HexEncode(TString(key)));
- UNIT_ASSERT(dummy == val);
- }
- }
- TBufferStream stream;
- size_t len = builder.Save(stream);
- TCompactTrie<char, typename T::TData, T> trie(stream.Buffer().Data(), len);
- TBufferStream buftmp;
- if (minimize) {
- CompactTrieMinimize<T>(buftmp, stream.Buffer().Data(), len, false);
- }
- TCompactTrie<char, typename T::TData, T> trieMin(buftmp.Buffer().Data(), buftmp.Buffer().Size());
- TCompactTrieBuilder<char, typename T::TData, T> prefixGroupedBuilder(CTBF_PREFIX_GROUPED);
- for (typename TKeys::const_iterator i = keys.begin(), mi = keys.end(); i != mi; ++i) {
- UNIT_ASSERT(!prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy));
- UNIT_ASSERT(trie.Find(i->first.c_str(), i->first.size(), &dummy));
- UNIT_ASSERT(dummy == i->second);
- if (minimize) {
- UNIT_ASSERT(trieMin.Find(i->first.c_str(), i->first.size(), &dummy));
- UNIT_ASSERT(dummy == i->second);
- }
- prefixGroupedBuilder.Add(i->first.c_str(), i->first.size(), dummy);
- UNIT_ASSERT(prefixGroupedBuilder.Find(i->first.c_str(), i->first.size(), &dummy));
- for (typename TKeys::const_iterator j = keys.begin(), end = keys.end(); j != end; ++j) {
- typename T::TData valFound;
- if (j->first <= i->first) {
- UNIT_ASSERT(prefixGroupedBuilder.Find(j->first.c_str(), j->first.size(), &valFound));
- UNIT_ASSERT_VALUES_EQUAL(j->second, valFound);
- } else {
- UNIT_ASSERT(!prefixGroupedBuilder.Find(j->first.c_str(), j->first.size(), &valFound));
- }
- }
- }
- TBufferStream prefixGroupedBuffer;
- prefixGroupedBuilder.Save(prefixGroupedBuffer);
- UNIT_ASSERT_VALUES_EQUAL(stream.Buffer().Size(), prefixGroupedBuffer.Buffer().Size());
- UNIT_ASSERT(0 == memcmp(stream.Buffer().Data(), prefixGroupedBuffer.Buffer().Data(), stream.Buffer().Size()));
- }
- void TCompactTrieTest::TestRandom() {
- TestRandom<TIntPacker<ui64>, true>(1000, 1000);
- TestRandom<TIntPacker<int>, true>(100, 100);
- TestRandom<TDummyPacker<ui64>, true>(0, 0);
- TestRandom<TDummyPacker<ui64>, true>(100, 3);
- TestRandom<TDummyPacker<ui64>, true>(100, 100);
- TestRandom<TStrokaPacker, true>(100, 100);
- }
- void TCompactTrieTest::TestFindTailsImpl(const TString& prefix) {
- TCompactTrieBuilder<> builder;
- TMap<TString, ui64> input;
- for (auto& i : SampleData) {
- TString temp = i;
- ui64 val = temp.size() * 2;
- builder.Add(temp.data(), temp.size(), val);
- if (temp.StartsWith(prefix)) {
- input[temp.substr(prefix.size())] = val;
- }
- }
- typedef TCompactTrie<> TTrie;
- TBufferStream stream;
- size_t len = builder.Save(stream);
- TTrie trie(stream.Buffer().Data(), len);
- TTrie subtrie = trie.FindTails(prefix.data(), prefix.size());
- TMap<TString, ui64> output;
- for (TTrie::TConstIterator i = subtrie.Begin(), mi = subtrie.End(); i != mi; ++i) {
- TTrie::TValueType val = *i;
- output[TString(val.first.data(), val.first.size())] = val.second;
- }
- UNIT_ASSERT(input.size() == output.size());
- UNIT_ASSERT(input == output);
- TBufferStream buftmp;
- CompactTrieMinimize<TTrie::TPacker>(buftmp, stream.Buffer().Data(), len, false);
- TTrie trieMin(buftmp.Buffer().Data(), buftmp.Buffer().Size());
- subtrie = trieMin.FindTails(prefix.data(), prefix.size());
- output.clear();
- for (TTrie::TConstIterator i = subtrie.Begin(), mi = subtrie.End(); i != mi; ++i) {
- TTrie::TValueType val = *i;
- output[TString(val.first.data(), val.first.size())] = val.second;
- }
- UNIT_ASSERT(input.size() == output.size());
- UNIT_ASSERT(input == output);
- }
- void TCompactTrieTest::TestPrefixGrouped() {
- TBuffer b1b;
- TCompactTrieBuilder<char, ui32> b1(CTBF_PREFIX_GROUPED);
- const char* data[] = {
- "Kazan",
- "Moscow",
- "Monino",
- "Murmansk",
- "Fryanovo",
- "Fryazino",
- "Fryazevo",
- "Tumen",
- };
- for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
- ui32 val = strlen(data[i]) + 1;
- b1.Add(data[i], strlen(data[i]), val);
- for (size_t j = 0; j < Y_ARRAY_SIZE(data); ++j) {
- ui32 mustHave = strlen(data[j]) + 1;
- ui32 found = 0;
- if (j <= i) {
- UNIT_ASSERT(b1.Find(data[j], strlen(data[j]), &found));
- UNIT_ASSERT_VALUES_EQUAL(mustHave, found);
- } else {
- UNIT_ASSERT(!b1.Find(data[j], strlen(data[j]), &found));
- }
- }
- }
- {
- TBufferOutput b1bo(b1b);
- b1.Save(b1bo);
- }
- TCompactTrie<char, ui32> t1(TBlob::FromBuffer(b1b));
- //t1.Print(Cerr);
- for (auto& i : data) {
- ui32 v;
- UNIT_ASSERT(t1.Find(i, strlen(i), &v));
- UNIT_ASSERT_VALUES_EQUAL(strlen(i) + 1, v);
- }
- }
- void TCompactTrieTest::CrashTestPrefixGrouped() {
- TCompactTrieBuilder<char, ui32> builder(CTBF_PREFIX_GROUPED);
- const char* data[] = {
- "Fryazino",
- "Fryanovo",
- "Monino",
- "",
- "Fryazevo",
- };
- bool wasException = false;
- try {
- for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
- builder.Add(data[i], strlen(data[i]), i + 1);
- }
- } catch (const yexception& e) {
- wasException = true;
- UNIT_ASSERT(strstr(e.what(), "Bad input order - expected input strings to be prefix-grouped."));
- }
- UNIT_ASSERT_C(wasException, "CrashTestPrefixGrouped");
- }
- void TCompactTrieTest::TestMergeFromFile() {
- {
- TCompactTrieBuilder<> b;
- b.Add("yandex", 12);
- b.Add("google", 13);
- b.Add("mail", 14);
- TUnbufferedFileOutput out(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-ru");
- b.Save(out);
- }
- {
- TCompactTrieBuilder<> b;
- b.Add("yandex", 112);
- b.Add("google", 113);
- b.Add("yahoo", 114);
- TUnbufferedFileOutput out(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-com");
- b.Save(out);
- }
- {
- TCompactTrieBuilder<> b;
- UNIT_ASSERT(b.AddSubtreeInFile("com.", GetSystemTempDir() + "/TCompactTrieTest-TestMerge-com"));
- UNIT_ASSERT(b.Add("org.kernel", 22));
- UNIT_ASSERT(b.AddSubtreeInFile("ru.", GetSystemTempDir() + "/TCompactTrieTest-TestMerge-ru"));
- TUnbufferedFileOutput out(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-res");
- b.Save(out);
- }
- TCompactTrie<> trie(TBlob::FromFileSingleThreaded(GetSystemTempDir() + "/TCompactTrieTest-TestMerge-res"));
- UNIT_ASSERT_VALUES_EQUAL(12u, trie.Get("ru.yandex"));
- UNIT_ASSERT_VALUES_EQUAL(13u, trie.Get("ru.google"));
- UNIT_ASSERT_VALUES_EQUAL(14u, trie.Get("ru.mail"));
- UNIT_ASSERT_VALUES_EQUAL(22u, trie.Get("org.kernel"));
- UNIT_ASSERT_VALUES_EQUAL(112u, trie.Get("com.yandex"));
- UNIT_ASSERT_VALUES_EQUAL(113u, trie.Get("com.google"));
- UNIT_ASSERT_VALUES_EQUAL(114u, trie.Get("com.yahoo"));
- unlink((GetSystemTempDir() + "/TCompactTrieTest-TestMerge-res").data());
- unlink((GetSystemTempDir() + "/TCompactTrieTest-TestMerge-com").data());
- unlink((GetSystemTempDir() + "/TCompactTrieTest-TestMerge-ru").data());
- }
- void TCompactTrieTest::TestMergeFromBuffer() {
- TArrayWithSizeHolder<char> buffer1;
- {
- TCompactTrieBuilder<> b;
- b.Add("aaaaa", 1);
- b.Add("bbbbb", 2);
- b.Add("ccccc", 3);
- buffer1.Resize(b.MeasureByteSize());
- TMemoryOutput out(buffer1.Get(), buffer1.Size());
- b.Save(out);
- }
- TArrayWithSizeHolder<char> buffer2;
- {
- TCompactTrieBuilder<> b;
- b.Add("aaaaa", 10);
- b.Add("bbbbb", 20);
- b.Add("ccccc", 30);
- b.Add("xxxxx", 40);
- b.Add("yyyyy", 50);
- buffer2.Resize(b.MeasureByteSize());
- TMemoryOutput out(buffer2.Get(), buffer2.Size());
- b.Save(out);
- }
- {
- TCompactTrieBuilder<> b;
- UNIT_ASSERT(b.AddSubtreeInBuffer("com.", std::move(buffer1)));
- UNIT_ASSERT(b.Add("org.upyachka", 42));
- UNIT_ASSERT(b.AddSubtreeInBuffer("ru.", std::move(buffer2)));
- TUnbufferedFileOutput out(GetSystemTempDir() + "/TCompactTrieTest-TestMergeFromBuffer-res");
- b.Save(out);
- }
- TCompactTrie<> trie(TBlob::FromFileSingleThreaded(GetSystemTempDir() + "/TCompactTrieTest-TestMergeFromBuffer-res"));
- UNIT_ASSERT_VALUES_EQUAL(10u, trie.Get("ru.aaaaa"));
- UNIT_ASSERT_VALUES_EQUAL(20u, trie.Get("ru.bbbbb"));
- UNIT_ASSERT_VALUES_EQUAL(40u, trie.Get("ru.xxxxx"));
- UNIT_ASSERT_VALUES_EQUAL(42u, trie.Get("org.upyachka"));
- UNIT_ASSERT_VALUES_EQUAL(1u, trie.Get("com.aaaaa"));
- UNIT_ASSERT_VALUES_EQUAL(2u, trie.Get("com.bbbbb"));
- UNIT_ASSERT_VALUES_EQUAL(3u, trie.Get("com.ccccc"));
- unlink((GetSystemTempDir() + "/TCompactTrieTest-TestMergeFromBuffer-res").data());
- }
- void TCompactTrieTest::TestUnique() {
- TestUniqueImpl(false);
- TestUniqueImpl(true);
- }
- void TCompactTrieTest::TestUniqueImpl(bool isPrefixGrouped) {
- TCompactTrieBuilder<char, ui32> builder(CTBF_UNIQUE | (isPrefixGrouped ? CTBF_PREFIX_GROUPED : CTBF_NONE));
- const char* data[] = {
- "Kazan",
- "Moscow",
- "Monino",
- "Murmansk",
- "Fryanovo",
- "Fryazino",
- "Fryazevo",
- "Fry",
- "Tumen",
- };
- for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
- UNIT_ASSERT_C(builder.Add(data[i], strlen(data[i]), i + 1), i);
- }
- bool wasException = false;
- try {
- builder.Add(data[4], strlen(data[4]), 20);
- } catch (const yexception& e) {
- wasException = true;
- UNIT_ASSERT(strstr(e.what(), "Duplicate key"));
- }
- UNIT_ASSERT_C(wasException, "TestUnique");
- }
- void TCompactTrieTest::TestAddRetValue() {
- TCompactTrieBuilder<char, ui32> builder;
- const char* data[] = {
- "Kazan",
- "Moscow",
- "Monino",
- "Murmansk",
- "Fryanovo",
- "Fryazino",
- "Fryazevo",
- "Fry",
- "Tumen",
- };
- for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
- UNIT_ASSERT(builder.Add(data[i], strlen(data[i]), i + 1));
- UNIT_ASSERT(!builder.Add(data[i], strlen(data[i]), i + 2));
- ui32 value;
- UNIT_ASSERT(builder.Find(data[i], strlen(data[i]), &value));
- UNIT_ASSERT(value == i + 2);
- }
- }
- void TCompactTrieTest::TestClear() {
- TCompactTrieBuilder<char, ui32> builder;
- const char* data[] = {
- "Kazan",
- "Moscow",
- "Monino",
- "Murmansk",
- "Fryanovo",
- "Fryazino",
- "Fryazevo",
- "Fry",
- "Tumen",
- };
- for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
- builder.Add(data[i], strlen(data[i]), i + 1);
- }
- UNIT_ASSERT(builder.GetEntryCount() == Y_ARRAY_SIZE(data));
- builder.Clear();
- UNIT_ASSERT(builder.GetEntryCount() == 0);
- UNIT_ASSERT(builder.GetNodeCount() == 1);
- }
- void TCompactTrieTest::TestFindTails() {
- TestFindTailsImpl("aa");
- TestFindTailsImpl("bb");
- TestFindTailsImpl("fb");
- TestFindTailsImpl("fbc");
- TestFindTailsImpl("fbbaa");
- }
- template <class T>
- class TCompactTrieTest::TDummyPacker: public TNullPacker<T> {
- public:
- static T Data(const TString&) {
- T data;
- TNullPacker<T>().UnpackLeaf(nullptr, data);
- return data;
- }
- typedef T TData;
- };
- class TCompactTrieTest::TStrokaPacker: public TCompactTriePacker<TString> {
- public:
- typedef TString TData;
- static TString Data(const TString& str) {
- return str;
- }
- };
- template <class T>
- class TCompactTrieTest::TIntPacker: public TCompactTriePacker<T> {
- public:
- typedef T TData;
- static TData Data(const TString&) {
- return RandomNumber<std::make_unsigned_t<T>>();
- }
- };
- void TCompactTrieTest::TestIterateEmptyKey() {
- TBuffer trieBuffer;
- {
- TCompactTrieBuilder<char, ui32> builder;
- UNIT_ASSERT(builder.Add("", 1));
- TBufferStream trieBufferO(trieBuffer);
- builder.Save(trieBufferO);
- }
- TCompactTrie<char, ui32> trie(TBlob::FromBuffer(trieBuffer));
- ui32 val;
- UNIT_ASSERT(trie.Find("", &val));
- UNIT_ASSERT(val == 1);
- TCompactTrie<char, ui32>::TConstIterator it = trie.Begin();
- UNIT_ASSERT(it.GetKey().empty());
- UNIT_ASSERT(it.GetValue() == 1);
- }
- void TCompactTrieTest::TestTrieSet() {
- TBuffer buffer;
- {
- TCompactTrieSet<char>::TBuilder builder;
- UNIT_ASSERT(builder.Add("a", 0));
- UNIT_ASSERT(builder.Add("ab", 1));
- UNIT_ASSERT(builder.Add("abc", 1));
- UNIT_ASSERT(builder.Add("abcd", 0));
- UNIT_ASSERT(!builder.Add("abcd", 1));
- TBufferStream stream(buffer);
- builder.Save(stream);
- }
- TCompactTrieSet<char> set(TBlob::FromBuffer(buffer));
- UNIT_ASSERT(set.Has("a"));
- UNIT_ASSERT(set.Has("ab"));
- UNIT_ASSERT(set.Has("abc"));
- UNIT_ASSERT(set.Has("abcd"));
- UNIT_ASSERT(!set.Has("abcde"));
- UNIT_ASSERT(!set.Has("aa"));
- UNIT_ASSERT(!set.Has("b"));
- UNIT_ASSERT(!set.Has(""));
- TCompactTrieSet<char> tails;
- UNIT_ASSERT(set.FindTails("a", tails));
- UNIT_ASSERT(tails.Has("b"));
- UNIT_ASSERT(tails.Has("bcd"));
- UNIT_ASSERT(!tails.Has("ab"));
- UNIT_ASSERT(!set.Has(""));
- TCompactTrieSet<char> empty;
- UNIT_ASSERT(set.FindTails("abcd", empty));
- UNIT_ASSERT(!empty.Has("a"));
- UNIT_ASSERT(!empty.Has("b"));
- UNIT_ASSERT(!empty.Has("c"));
- UNIT_ASSERT(!empty.Has("d"));
- UNIT_ASSERT(!empty.Has("d"));
- UNIT_ASSERT(empty.Has("")); // contains only empty string
- }
- // Tests for trie with vector (list, set) values
- TVector<TUtf16String> TCompactTrieTest::GetSampleKeys(size_t nKeys) const {
- Y_ASSERT(nKeys <= 10);
- TString sampleKeys[] = {"a", "b", "ac", "bd", "abe", "bcf", "deg", "ah", "xy", "abc"};
- TVector<TUtf16String> result;
- for (size_t i = 0; i < nKeys; i++)
- result.push_back(ASCIIToWide(sampleKeys[i]));
- return result;
- }
- template <class TContainer>
- TVector<TContainer> TCompactTrieTest::GetSampleVectorData(size_t nValues) {
- TVector<TContainer> data;
- for (size_t i = 0; i < nValues; i++) {
- data.push_back(TContainer());
- for (size_t j = 0; j < i; j++)
- data[i].insert(data[i].end(), (typename TContainer::value_type)((j == 3) ? 0 : (1 << (j * 5))));
- }
- return data;
- }
- template <class TContainer>
- TVector<TContainer> TCompactTrieTest::GetSampleTextVectorData(size_t nValues) {
- TVector<TContainer> data;
- for (size_t i = 0; i < nValues; i++) {
- data.push_back(TContainer());
- for (size_t j = 0; j < i; j++)
- data[i].insert(data[i].end(), TString("abc") + ToString<size_t>(j));
- }
- return data;
- }
- template <class T>
- void TCompactTrieTest::CheckEquality(const T& value1, const T& value2) const {
- UNIT_ASSERT_VALUES_EQUAL(value1, value2);
- }
- template <>
- void TCompactTrieTest::CheckEquality<TVector<i64>>(const TVector<i64>& value1, const TVector<i64>& value2) const {
- UNIT_ASSERT_VALUES_EQUAL(value1.size(), value2.size());
- for (size_t i = 0; i < value1.size(); i++)
- UNIT_ASSERT_VALUES_EQUAL(value1[i], value2[i]);
- }
- template <class TContainer>
- void TCompactTrieTest::TestTrieWithContainers(const TVector<TUtf16String>& keys, const TVector<TContainer>& sampleData, TString methodName) {
- TString fileName = GetSystemTempDir() + "/TCompactTrieTest-TestTrieWithContainers-" + methodName;
- TCompactTrieBuilder<wchar16, TContainer> b;
- for (size_t i = 0; i < keys.size(); i++) {
- b.Add(keys[i], sampleData[i]);
- }
- TUnbufferedFileOutput out(fileName);
- b.Save(out);
- TCompactTrie<wchar16, TContainer> trie(TBlob::FromFileSingleThreaded(fileName));
- for (size_t i = 0; i < keys.size(); i++) {
- TContainer value = trie.Get(keys[i]);
- UNIT_ASSERT_VALUES_EQUAL(value.size(), sampleData[i].size());
- typename TContainer::const_iterator p = value.begin();
- typename TContainer::const_iterator p1 = sampleData[i].begin();
- for (; p != value.end(); p++, p1++)
- CheckEquality<typename TContainer::value_type>(*p, *p1);
- }
- unlink(fileName.data());
- }
- template <>
- void TCompactTrieTest::TestTrieWithContainers<std::pair<TUtf16String, TVector<i64>>>(const TVector<TUtf16String>& keys, const TVector<std::pair<TUtf16String, TVector<i64>>>& sampleData, TString methodName) {
- typedef std::pair<TUtf16String, TVector<i64>> TContainer;
- TString fileName = GetSystemTempDir() + "/TCompactTrieTest-TestTrieWithContainers-" + methodName;
- TCompactTrieBuilder<wchar16, TContainer> b;
- for (size_t i = 0; i < keys.size(); i++) {
- b.Add(keys[i], sampleData[i]);
- }
- TUnbufferedFileOutput out(fileName);
- b.Save(out);
- TCompactTrie<wchar16, TContainer> trie(TBlob::FromFileSingleThreaded(fileName));
- for (size_t i = 0; i < keys.size(); i++) {
- TContainer value = trie.Get(keys[i]);
- CheckEquality<TContainer::first_type>(value.first, sampleData[i].first);
- CheckEquality<TContainer::second_type>(value.second, sampleData[i].second);
- }
- unlink(fileName.data());
- }
- void TCompactTrieTest::TestTrieForVectorInt64() {
- TestTrieWithContainers<TVector<i64>>(GetSampleKeys(10), GetSampleVectorData<TVector<i64>>(10), "v-i64");
- }
- void TCompactTrieTest::TestTrieForListInt64() {
- TestTrieWithContainers<TList<i64>>(GetSampleKeys(10), GetSampleVectorData<TList<i64>>(10), "l-i64");
- }
- void TCompactTrieTest::TestTrieForSetInt64() {
- TestTrieWithContainers<TSet<i64>>(GetSampleKeys(10), GetSampleVectorData<TSet<i64>>(10), "s-i64");
- }
- void TCompactTrieTest::TestTrieForVectorStroka() {
- TestTrieWithContainers<TVector<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TVector<TString>>(10), "v-str");
- }
- void TCompactTrieTest::TestTrieForListStroka() {
- TestTrieWithContainers<TList<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TList<TString>>(10), "l-str");
- }
- void TCompactTrieTest::TestTrieForSetStroka() {
- TestTrieWithContainers<TSet<TString>>(GetSampleKeys(10), GetSampleTextVectorData<TSet<TString>>(10), "s-str");
- }
- void TCompactTrieTest::TestTrieForVectorWtroka() {
- TVector<TVector<TString>> data = GetSampleTextVectorData<TVector<TString>>(10);
- TVector<TVector<TUtf16String>> wData;
- for (size_t i = 0; i < data.size(); i++) {
- wData.push_back(TVector<TUtf16String>());
- for (size_t j = 0; j < data[i].size(); j++)
- wData[i].push_back(UTF8ToWide(data[i][j]));
- }
- TestTrieWithContainers<TVector<TUtf16String>>(GetSampleKeys(10), wData, "v-wtr");
- }
- void TCompactTrieTest::TestTrieForVectorFloat() {
- TestTrieWithContainers<TVector<float>>(GetSampleKeys(10), GetSampleVectorData<TVector<float>>(10), "v-float");
- }
- void TCompactTrieTest::TestTrieForVectorDouble() {
- TestTrieWithContainers<TVector<double>>(GetSampleKeys(10), GetSampleVectorData<TVector<double>>(10), "v-double");
- }
- void TCompactTrieTest::TestTrieForListVectorInt64() {
- TVector<i64> tmp;
- tmp.push_back(0);
- TList<TVector<i64>> dataElement(5, tmp);
- TVector<TList<TVector<i64>>> data(10, dataElement);
- TestTrieWithContainers<TList<TVector<i64>>>(GetSampleKeys(10), data, "l-v-i64");
- }
- void TCompactTrieTest::TestTrieForPairWtrokaVectorInt64() {
- TVector<TUtf16String> keys = GetSampleKeys(10);
- TVector<TVector<i64>> values = GetSampleVectorData<TVector<i64>>(10);
- TVector<std::pair<TUtf16String, TVector<i64>>> data;
- for (size_t i = 0; i < 10; i++)
- data.push_back(std::pair<TUtf16String, TVector<i64>>(keys[i] + u"_v", values[i]));
- TestTrieWithContainers<std::pair<TUtf16String, TVector<i64>>>(keys, data, "pair-str-v-i64");
- }
- void TCompactTrieTest::TestEmptyValueOutOfOrder() {
- TBufferOutput buffer;
- using TSymbol = ui32;
- {
- TCompactTrieBuilder<TSymbol, ui32> builder;
- TSymbol key = 1;
- builder.Add(&key, 1, 10);
- builder.Add(nullptr, 0, 14);
- builder.Save(buffer);
- }
- {
- TCompactTrie<TSymbol, ui32> trie(buffer.Buffer().Data(), buffer.Buffer().Size());
- UNIT_ASSERT(trie.Find(nullptr, 0));
- }
- }
- void TCompactTrieTest::TestFindLongestPrefixWithEmptyValue() {
- TBufferOutput buffer;
- {
- TCompactTrieBuilder<wchar16, ui32> builder;
- builder.Add(u"", 42);
- builder.Add(u"yandex", 271828);
- builder.Add(u"ya", 31415);
- builder.Save(buffer);
- }
- {
- TCompactTrie<wchar16, ui32> trie(buffer.Buffer().Data(), buffer.Buffer().Size());
- size_t prefixLen = 123;
- ui32 value = 0;
- UNIT_ASSERT(trie.FindLongestPrefix(u"google", &prefixLen, &value));
- UNIT_ASSERT(prefixLen == 0);
- UNIT_ASSERT(value == 42);
- UNIT_ASSERT(trie.FindLongestPrefix(u"yahoo", &prefixLen, &value));
- UNIT_ASSERT(prefixLen == 2);
- UNIT_ASSERT(value == 31415);
- }
- }
- template <typename TChar>
- struct TConvertKey {
- static inline TString Convert(const TStringBuf& key) {
- return ToString(key);
- }
- };
- template <>
- struct TConvertKey<wchar16> {
- static inline TUtf16String Convert(const TStringBuf& key) {
- return UTF8ToWide(key);
- }
- };
- template <>
- struct TConvertKey<wchar32> {
- static inline TUtf32String Convert(const TStringBuf& key) {
- return TUtf32String::FromUtf8(key);
- }
- };
- template <class TSearchIter, class TKeyBuf>
- static void MoveIter(TSearchIter& iter, const TKeyBuf& key) {
- for (size_t i = 0; i < key.length(); ++i) {
- UNIT_ASSERT(iter.Advance(key[i]));
- }
- }
- template <typename TChar>
- void TCompactTrieTest::TestSearchIterImpl() {
- TBufferOutput buffer;
- {
- TCompactTrieBuilder<TChar, ui32> builder;
- TStringBuf data[] = {
- TStringBuf("abaab"),
- TStringBuf("abcdef"),
- TStringBuf("abbbc"),
- TStringBuf("bdfaa"),
- };
- for (size_t i = 0; i < Y_ARRAY_SIZE(data); ++i) {
- builder.Add(TConvertKey<TChar>::Convert(data[i]), i + 1);
- }
- builder.Save(buffer);
- }
- TCompactTrie<TChar, ui32> trie(buffer.Buffer().Data(), buffer.Buffer().Size());
- ui32 value = 0;
- auto iter(MakeSearchIterator(trie));
- MoveIter(iter, TConvertKey<TChar>::Convert(TStringBuf("abc")));
- UNIT_ASSERT(!iter.GetValue(&value));
- iter = MakeSearchIterator(trie);
- MoveIter(iter, TConvertKey<TChar>::Convert(TStringBuf("abbbc")));
- UNIT_ASSERT(iter.GetValue(&value));
- UNIT_ASSERT_EQUAL(value, 3);
- iter = MakeSearchIterator(trie);
- UNIT_ASSERT(iter.Advance(TConvertKey<TChar>::Convert(TStringBuf("bdfa"))));
- UNIT_ASSERT(!iter.GetValue(&value));
- iter = MakeSearchIterator(trie);
- UNIT_ASSERT(iter.Advance(TConvertKey<TChar>::Convert(TStringBuf("bdfaa"))));
- UNIT_ASSERT(iter.GetValue(&value));
- UNIT_ASSERT_EQUAL(value, 4);
- UNIT_ASSERT(!MakeSearchIterator(trie).Advance(TChar('z')));
- UNIT_ASSERT(!MakeSearchIterator(trie).Advance(TConvertKey<TChar>::Convert(TStringBuf("cdf"))));
- UNIT_ASSERT(!MakeSearchIterator(trie).Advance(TConvertKey<TChar>::Convert(TStringBuf("abca"))));
- }
- void TCompactTrieTest::TestSearchIterChar() {
- TestSearchIterImpl<char>();
- }
- void TCompactTrieTest::TestSearchIterWchar() {
- TestSearchIterImpl<wchar16>();
- }
- void TCompactTrieTest::TestSearchIterWchar32() {
- TestSearchIterImpl<wchar32>();
- }
- void TCompactTrieTest::TestCopyAndAssignment() {
- TBufferOutput bufout;
- typedef TCompactTrie<> TTrie;
- CreateTrie<char>(bufout, false, false);
- TTrie trie(bufout.Buffer().Data(), bufout.Buffer().Size());
- TTrie copy(trie);
- UNIT_ASSERT(copy.HasCorrectSkipper());
- TTrie assign;
- assign = trie;
- UNIT_ASSERT(assign.HasCorrectSkipper());
- TTrie move(std::move(trie));
- UNIT_ASSERT(move.HasCorrectSkipper());
- TTrie moveAssign;
- moveAssign = TTrie(bufout.Buffer().Data(), bufout.Buffer().Size());
- UNIT_ASSERT(moveAssign.HasCorrectSkipper());
- }
- template <class TTrie>
- void TCompactTrieTest::TestFirstSymbolIteratorForTrie(const TTrie& trie, const TStringBuf& narrowAnswers) {
- NCompactTrie::TFirstSymbolIterator<TTrie> it;
- it.SetTrie(trie, trie.GetSkipper());
- typename TTrie::TKey answers = MakeWideKey<typename TTrie::TSymbol>(narrowAnswers);
- auto answer = answers.begin();
- for (; !it.AtEnd(); it.MakeStep(), ++answer) {
- UNIT_ASSERT(answer != answers.end());
- UNIT_ASSERT(it.GetKey() == *answer);
- }
- UNIT_ASSERT(answer == answers.end());
- }
- template <class TSymbol>
- void TCompactTrieTest::TestFirstSymbolIterator() {
- TBufferOutput bufout;
- typedef TCompactTrie<TSymbol> TTrie;
- CreateTrie<TSymbol>(bufout, false, false);
- TTrie trie(bufout.Buffer().Data(), bufout.Buffer().Size());
- TStringBuf rootAnswers = "abcdf";
- TestFirstSymbolIteratorForTrie(trie, rootAnswers);
- TStringBuf aAnswers = "abcd";
- TestFirstSymbolIteratorForTrie(trie.FindTails(MakeWideKey<TSymbol>("a", 1)), aAnswers);
- }
- void TCompactTrieTest::TestFirstSymbolIterator8() {
- TestFirstSymbolIterator<char>();
- }
- void TCompactTrieTest::TestFirstSymbolIterator16() {
- TestFirstSymbolIterator<wchar16>();
- }
- void TCompactTrieTest::TestFirstSymbolIterator32() {
- TestFirstSymbolIterator<ui32>();
- }
- void TCompactTrieTest::TestFirstSymbolIteratorChar32() {
- TestFirstSymbolIterator<wchar32>();
- }
- void TCompactTrieTest::TestArrayPacker() {
- using TDataInt = std::array<int, 2>;
- const std::pair<TString, TDataInt> dataXxx{"xxx", {{15, 16}}};
- const std::pair<TString, TDataInt> dataYyy{"yyy", {{20, 30}}};
- TCompactTrieBuilder<char, TDataInt> trieBuilderOne;
- trieBuilderOne.Add(dataXxx.first, dataXxx.second);
- trieBuilderOne.Add(dataYyy.first, dataYyy.second);
- TBufferOutput bufferOne;
- trieBuilderOne.Save(bufferOne);
- const TCompactTrie<char, TDataInt> trieOne(bufferOne.Buffer().Data(), bufferOne.Buffer().Size());
- UNIT_ASSERT_VALUES_EQUAL(dataXxx.second, trieOne.Get(dataXxx.first));
- UNIT_ASSERT_VALUES_EQUAL(dataYyy.second, trieOne.Get(dataYyy.first));
- using TDataStroka = std::array<TString, 2>;
- const std::pair<TString, TDataStroka> dataZzz{"zzz", {{"hello", "there"}}};
- const std::pair<TString, TDataStroka> dataWww{"www", {{"half", "life"}}};
- TCompactTrieBuilder<char, TDataStroka> trieBuilderTwo;
- trieBuilderTwo.Add(dataZzz.first, dataZzz.second);
- trieBuilderTwo.Add(dataWww.first, dataWww.second);
- TBufferOutput bufferTwo;
- trieBuilderTwo.Save(bufferTwo);
- const TCompactTrie<char, TDataStroka> trieTwo(bufferTwo.Buffer().Data(), bufferTwo.Buffer().Size());
- UNIT_ASSERT_VALUES_EQUAL(dataZzz.second, trieTwo.Get(dataZzz.first));
- UNIT_ASSERT_VALUES_EQUAL(dataWww.second, trieTwo.Get(dataWww.first));
- }
- void TCompactTrieTest::TestBuilderFindLongestPrefix() {
- const size_t sizes[] = {10, 100};
- const double branchProbabilities[] = {0.01, 0.1, 0.5, 0.9, 0.99};
- for (size_t size : sizes) {
- for (double branchProbability : branchProbabilities) {
- TestBuilderFindLongestPrefix(size, branchProbability, false, false);
- TestBuilderFindLongestPrefix(size, branchProbability, false, true);
- TestBuilderFindLongestPrefix(size, branchProbability, true, false);
- TestBuilderFindLongestPrefix(size, branchProbability, true, true);
- }
- }
- }
- void TCompactTrieTest::TestBuilderFindLongestPrefix(size_t keysCount, double branchProbability, bool isPrefixGrouped, bool hasEmptyKey) {
- TVector<TString> keys;
- TString keyToAdd;
- for (size_t i = 0; i < keysCount; ++i) {
- const size_t prevKeyLen = keyToAdd.Size();
- // add two random chars to prev key
- keyToAdd += RandChar();
- keyToAdd += RandChar();
- const bool changeBranch = prevKeyLen && RandomNumber<double>() < branchProbability;
- if (changeBranch) {
- const size_t branchPlace = RandomNumber<size_t>(prevKeyLen + 1); // random place in [0, prevKeyLen]
- *(keyToAdd.begin() + branchPlace) = RandChar();
- }
- keys.push_back(keyToAdd);
- }
- if (isPrefixGrouped)
- Sort(keys.begin(), keys.end());
- else
- Shuffle(keys.begin(), keys.end());
- TCompactTrieBuilder<char, TString> builder(isPrefixGrouped ? CTBF_PREFIX_GROUPED : CTBF_NONE);
- const TString EMPTY_VALUE = "empty";
- if (hasEmptyKey)
- builder.Add(nullptr, 0, EMPTY_VALUE);
- for (size_t i = 0; i < keysCount; ++i) {
- const TString& key = keys[i];
- for (size_t j = 0; j < keysCount; ++j) {
- const TString& otherKey = keys[j];
- const bool exists = j < i;
- size_t expectedSize = 0;
- if (exists) {
- expectedSize = otherKey.size();
- } else {
- size_t max = 0;
- for (size_t k = 0; k < i; ++k)
- if (keys[k].Size() < otherKey.Size() && keys[k].Size() > max && otherKey.StartsWith(keys[k]))
- max = keys[k].Size();
- expectedSize = max;
- }
- size_t prefixSize = 0xfcfcfc;
- TString value = "abcd";
- const bool expectedResult = hasEmptyKey || expectedSize != 0;
- UNIT_ASSERT_VALUES_EQUAL_C(expectedResult, builder.FindLongestPrefix(otherKey.data(), otherKey.size(), &prefixSize, &value), "otherKey = " << HexEncode(otherKey));
- if (expectedResult) {
- UNIT_ASSERT_VALUES_EQUAL(expectedSize, prefixSize);
- if (expectedSize) {
- UNIT_ASSERT_VALUES_EQUAL(TStringBuf(otherKey).SubStr(0, prefixSize), value);
- } else {
- UNIT_ASSERT_VALUES_EQUAL(EMPTY_VALUE, value);
- }
- } else {
- UNIT_ASSERT_VALUES_EQUAL("abcd", value);
- UNIT_ASSERT_VALUES_EQUAL(0xfcfcfc, prefixSize);
- }
- for (int c = 0; c < 10; ++c) {
- TString extendedKey = otherKey;
- extendedKey += RandChar();
- size_t extendedPrefixSize = 0xdddddd;
- TString extendedValue = "dcba";
- UNIT_ASSERT_VALUES_EQUAL(expectedResult, builder.FindLongestPrefix(extendedKey.data(), extendedKey.size(), &extendedPrefixSize, &extendedValue));
- if (expectedResult) {
- UNIT_ASSERT_VALUES_EQUAL(value, extendedValue);
- UNIT_ASSERT_VALUES_EQUAL(prefixSize, extendedPrefixSize);
- } else {
- UNIT_ASSERT_VALUES_EQUAL("dcba", extendedValue);
- UNIT_ASSERT_VALUES_EQUAL(0xdddddd, extendedPrefixSize);
- }
- }
- }
- builder.Add(key.data(), key.size(), key);
- }
- TBufferOutput buffer;
- builder.Save(buffer);
- }
- void TCompactTrieTest::TestBuilderFindLongestPrefixWithEmptyValue() {
- TCompactTrieBuilder<wchar16, ui32> builder;
- builder.Add(u"", 42);
- builder.Add(u"yandex", 271828);
- builder.Add(u"ya", 31415);
- size_t prefixLen = 123;
- ui32 value = 0;
- UNIT_ASSERT(builder.FindLongestPrefix(u"google", &prefixLen, &value));
- UNIT_ASSERT_VALUES_EQUAL(prefixLen, 0);
- UNIT_ASSERT_VALUES_EQUAL(value, 42);
- UNIT_ASSERT(builder.FindLongestPrefix(u"yahoo", &prefixLen, &value));
- UNIT_ASSERT_VALUES_EQUAL(prefixLen, 2);
- UNIT_ASSERT_VALUES_EQUAL(value, 31415);
- TBufferOutput buffer;
- builder.Save(buffer);
- }
- void TCompactTrieTest::TestPatternSearcherEmpty() {
- TCompactPatternSearcherBuilder<char, ui32> builder;
- TBufferOutput searcherData;
- builder.Save(searcherData);
- TCompactPatternSearcher<char, ui32> searcher(
- searcherData.Buffer().Data(),
- searcherData.Buffer().Size()
- );
- UNIT_ASSERT(searcher.SearchMatches("a").empty());
- UNIT_ASSERT(searcher.SearchMatches("").empty());
- UNIT_ASSERT(searcher.SearchMatches("abc").empty());
- }
- void TCompactTrieTest::TestPatternSearcherOnDataset(
- const TVector<TString>& patterns,
- const TVector<TString>& samples
- ) {
- TCompactPatternSearcherBuilder<char, ui32> builder;
- for (size_t patternIdx = 0; patternIdx < patterns.size(); ++patternIdx) {
- builder.Add(patterns[patternIdx], patternIdx);
- }
- TBufferOutput searcherData;
- builder.Save(searcherData);
- TCompactPatternSearcher<char, ui32> searcher(
- searcherData.Buffer().Data(),
- searcherData.Buffer().Size()
- );
- for (const auto& sample : samples) {
- const auto matches = searcher.SearchMatches(sample);
- size_t matchesNum = 0;
- THashSet<TString> processedPatterns;
- for (const auto& pattern : patterns) {
- if (pattern.Empty() || processedPatterns.contains(pattern)) {
- continue;
- }
- for (size_t start = 0; start + pattern.Size() <= sample.Size(); ++start) {
- matchesNum += (pattern == sample.substr(start, pattern.Size()));
- }
- processedPatterns.insert(pattern);
- }
- UNIT_ASSERT_VALUES_EQUAL(matchesNum, matches.size());
- TSet<std::pair<size_t, ui32>> foundMatches;
- for (const auto& match : matches) {
- std::pair<size_t, ui32> matchParams(match.End, match.Data);
- UNIT_ASSERT(!foundMatches.contains(matchParams));
- foundMatches.insert(matchParams);
- const auto& pattern = patterns[match.Data];
- UNIT_ASSERT_VALUES_EQUAL(
- sample.substr(match.End - pattern.size() + 1, pattern.size()),
- pattern
- );
- }
- }
- }
- void TCompactTrieTest::TestPatternSearcherSimple() {
- TestPatternSearcherOnDataset(
- { // patterns
- "abcd",
- "abc",
- "ab",
- "a",
- ""
- },
- { // samples
- "abcde",
- "abcd",
- "abc",
- "ab",
- "a",
- ""
- }
- );
- TestPatternSearcherOnDataset(
- { // patterns
- "a"
- "ab",
- "abcd",
- },
- { // samples
- "abcde",
- "abcd",
- "abc",
- "ab",
- "a",
- ""
- }
- );
- TestPatternSearcherOnDataset(
- { // patterns
- "aaaa",
- "aaa",
- "aa",
- "a",
- },
- { // samples
- "aaaaaaaaaaaa"
- }
- );
- TestPatternSearcherOnDataset(
- { // patterns
- "aa", "ab", "ac", "ad", "ae", "af",
- "ba", "bb", "bc", "bd", "be", "bf",
- "ca", "cb", "cc", "cd", "ce", "cf",
- "da", "db", "dc", "dd", "de", "df",
- "ea", "eb", "ec", "ed", "ee", "ef",
- "fa", "fb", "fc", "fd", "fe", "ff"
- },
- { // samples
- "dcabafeebfdcbacddacadbaabecdbaeffecdbfabcdcabcfaefecdfebacfedacefbdcacfeb",
- "abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefancdefancdef",
- "fedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcbafedcba",
- "",
- "a", "b", "c", "d", "e", "f",
- "aa", "ab", "ac", "ad", "ae", "af",
- "ba", "bb", "bc", "bd", "be", "bf",
- "ca", "cb", "cc", "cd", "ce", "cf",
- "da", "db", "dc", "dd", "de", "df",
- "ea", "eb", "ec", "ed", "ee", "ef",
- "fa", "fb", "fc", "fd", "fe", "ff"
- }
- );
- }
- static char RandChar(
- TFastRng<ui64>& rng,
- int maxChar
- ) {
- return static_cast<char>(rng.GenRand() % (maxChar + 1));
- }
- static TString RandStr(
- TFastRng<ui64>& rng,
- size_t maxLength,
- int maxChar,
- bool nonEmpty = false
- ) {
- Y_ASSERT(maxLength > 0);
- size_t length;
- if (nonEmpty) {
- length = rng.GenRand() % maxLength + 1;
- } else {
- length = rng.GenRand() % (maxLength + 1);
- }
- TString result;
- while (result.size() < length) {
- result += RandChar(rng, maxChar);
- }
- return result;
- }
- void TCompactTrieTest::TestPatternSearcherRandom(
- size_t patternsNum,
- size_t patternMaxLength,
- size_t strMaxLength,
- int maxChar,
- TFastRng<ui64>& rng
- ) {
- auto patternToSearch = RandStr(rng, patternMaxLength, maxChar, /*nonEmpty*/true);
- TVector<TString> patterns = {patternToSearch};
- while (patterns.size() < patternsNum) {
- patterns.push_back(RandStr(rng, patternMaxLength, maxChar, /*nonEmpty*/true));
- }
- auto filler = RandStr(rng, strMaxLength - patternToSearch.Size() + 1, maxChar);
- size_t leftFillerSize = rng.GenRand() % (filler.size() + 1);
- auto leftFiller = filler.substr(0, leftFillerSize);
- auto rightFiller = filler.substr(leftFillerSize, filler.size() - leftFillerSize);
- auto sample = leftFiller + patternToSearch + rightFiller;
- TestPatternSearcherOnDataset(patterns, {sample});
- }
- void TCompactTrieTest::TestPatternSearcherRandom() {
- TFastRng<ui64> rng(0);
- for (size_t patternMaxLen : {1, 2, 10}) {
- for (size_t strMaxLen : TVector<size_t>{patternMaxLen, 2 * patternMaxLen, 10}) {
- for (int maxChar : {0, 1, 5, 255}) {
- for (size_t patternsNum : {1, 10}) {
- for (size_t testIdx = 0; testIdx < 3; ++testIdx) {
- TestPatternSearcherRandom(
- patternsNum,
- patternMaxLen,
- strMaxLen,
- maxChar,
- rng
- );
- }
- }
- }
- }
- }
- }
|