123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465 |
- #include "../mkql_rh_hash.h"
- #include <library/cpp/testing/unittest/registar.h>
- #include <unordered_map>
- #include <unordered_set>
- namespace NKikimr {
- namespace NMiniKQL {
- Y_UNIT_TEST_SUITE(TMiniKQLRobinHoodHashTest) {
- Y_UNIT_TEST(Map) {
- TRobinHoodHashMap<i32> rh(sizeof(i64));
- std::unordered_map<i32, i64> h;
- for (ui64 i = 0; i < 10000; ++i) {
- auto k = i % 1000;
- auto [it, inserted] = h.emplace(k, 0);
- bool isNew;
- auto iter = rh.Insert(k, isNew);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
- UNIT_ASSERT_VALUES_EQUAL(isNew, inserted);
- it->second += i;
- if (isNew) {
- *(i64*)rh.GetMutablePayload(iter) = i;
- rh.CheckGrow();
- } else {
- *(i64*)rh.GetMutablePayload(iter) += i;
- }
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- }
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto key = rh.GetKey(it);
- auto hit = h.find(key);
- UNIT_ASSERT(hit != h.end());
- UNIT_ASSERT_VALUES_EQUAL(*(i64*)rh.GetPayload(it), hit->second);
- h.erase(key);
- }
- UNIT_ASSERT(h.empty());
- }
- Y_UNIT_TEST(FixedMap) {
- TRobinHoodHashFixedMap<i32, i64> rh;
- std::unordered_map<i32, i64> h;
- for (ui64 i = 0; i < 10000; ++i) {
- auto k = i % 1000;
- auto [it, inserted] = h.emplace(k, 0);
- bool isNew;
- auto iter = rh.Insert(k, isNew);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
- UNIT_ASSERT_VALUES_EQUAL(isNew, inserted);
- it->second += i;
- if (isNew) {
- *(i64*)rh.GetMutablePayload(iter) = i;
- rh.CheckGrow();
- } else {
- *(i64*)rh.GetMutablePayload(iter) += i;
- }
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- }
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto key = rh.GetKey(it);
- auto hit = h.find(key);
- UNIT_ASSERT(hit != h.end());
- UNIT_ASSERT_VALUES_EQUAL(*(i64*)rh.GetPayload(it), hit->second);
- h.erase(key);
- }
- UNIT_ASSERT(h.empty());
- }
- Y_UNIT_TEST(Set) {
- TRobinHoodHashSet<i32> rh;
- std::unordered_set<i32> h;
- for (ui64 i = 0; i < 10000; ++i) {
- auto k = i % 1000;
- auto[it, inserted] = h.emplace(k);
- bool isNew;
- auto iter = rh.Insert(k, isNew);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
- UNIT_ASSERT_VALUES_EQUAL(isNew, inserted);
- if (isNew) {
- rh.CheckGrow();
- }
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- }
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto key = rh.GetKey(it);
- auto hit = h.find(key);
- UNIT_ASSERT(hit != h.end());
- h.erase(key);
- }
- UNIT_ASSERT(h.empty());
- }
- Y_UNIT_TEST(MapBatch) {
- using THashTable = TRobinHoodHashMap<i32>;
- THashTable rh(sizeof(i64));
- std::unordered_map<i32, i64> h;
- std::array<TRobinHoodBatchRequestItem<i32>, PrefetchBatchSize> batch;
- std::array<bool, PrefetchBatchSize> batchInserted;
- std::array<ui64, PrefetchBatchSize> batchI;
- ui32 batchLen = 0;
- auto processBatch = [&]() {
- rh.BatchInsert({batch.data(), batchLen}, [&](size_t i, THashTable::iterator iter, bool isNew) {
- UNIT_ASSERT_VALUES_EQUAL(isNew, batchInserted[i]);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), batch[i].GetKey());
- if (isNew) {
- *(i64*)rh.GetMutablePayload(iter) = batchI[i];
- } else {
- *(i64*)rh.GetMutablePayload(iter) += batchI[i];
- }
- });
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- };
- for (ui64 i = 0; i < 10000; ++i) {
- if (batchLen == batch.size()) {
- processBatch();
- batchLen = 0;
- }
- auto k = i % 1000;
- auto [it, inserted] = h.emplace(k, 0);
- batchI[batchLen] = i;
- batchInserted[batchLen] = inserted;
- batch[batchLen].ConstructKey(k);
- ++batchLen;
- it->second += i;
- }
- processBatch();
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto key = rh.GetKey(it);
- auto hit = h.find(key);
- UNIT_ASSERT(hit != h.end());
- UNIT_ASSERT_VALUES_EQUAL(*(i64*)rh.GetPayload(it), hit->second);
- h.erase(key);
- }
- UNIT_ASSERT(h.empty());
- }
- Y_UNIT_TEST(FixedMapBatch) {
- using THashTable = TRobinHoodHashFixedMap<i32, i64>;
- THashTable rh(sizeof(i64));
- std::unordered_map<i32, i64> h;
- std::array<TRobinHoodBatchRequestItem<i32>, PrefetchBatchSize> batch;
- std::array<bool, PrefetchBatchSize> batchInserted;
- std::array<ui64, PrefetchBatchSize> batchI;
- ui32 batchLen = 0;
- auto processBatch = [&]() {
- rh.BatchInsert({batch.data(), batchLen}, [&](size_t i, THashTable::iterator iter, bool isNew) {
- UNIT_ASSERT_VALUES_EQUAL(isNew, batchInserted[i]);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), batch[i].GetKey());
- if (isNew) {
- *(i64*)rh.GetMutablePayload(iter) = batchI[i];
- } else {
- *(i64*)rh.GetMutablePayload(iter) += batchI[i];
- }
- });
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- };
- for (ui64 i = 0; i < 10000; ++i) {
- if (batchLen == batch.size()) {
- processBatch();
- batchLen = 0;
- }
- auto k = i % 1000;
- auto [it, inserted] = h.emplace(k, 0);
- batchI[batchLen] = i;
- batchInserted[batchLen] = inserted;
- batch[batchLen].ConstructKey(k);
- ++batchLen;
- it->second += i;
- }
- processBatch();
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto key = rh.GetKey(it);
- auto hit = h.find(key);
- UNIT_ASSERT(hit != h.end());
- UNIT_ASSERT_VALUES_EQUAL(*(i64*)rh.GetPayload(it), hit->second);
- h.erase(key);
- }
- UNIT_ASSERT(h.empty());
- }
- Y_UNIT_TEST(SetBatch) {
- using THashTable = TRobinHoodHashSet<i32>;
- THashTable rh(sizeof(i64));
- std::unordered_set<i32> h;
- std::array<TRobinHoodBatchRequestItem<i32>, PrefetchBatchSize> batch;
- std::array<bool, PrefetchBatchSize> batchInserted;
- ui32 batchLen = 0;
- auto processBatch = [&]() {
- rh.BatchInsert({batch.data(), batchLen}, [&](size_t i, THashTable::iterator iter, bool isNew) {
- UNIT_ASSERT_VALUES_EQUAL(isNew, batchInserted[i]);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), batch[i].GetKey());
- });
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- };
- for (ui64 i = 0; i < 10000; ++i) {
- if (batchLen == batch.size()) {
- processBatch();
- batchLen = 0;
- }
- auto k = i % 1000;
- auto [it, inserted] = h.emplace(k);
- batchInserted[batchLen] = inserted;
- batch[batchLen].ConstructKey(k);
- ++batchLen;
- }
- processBatch();
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto key = rh.GetKey(it);
- auto hit = h.find(key);
- UNIT_ASSERT(hit != h.end());
- h.erase(key);
- }
- UNIT_ASSERT(h.empty());
- }
- Y_UNIT_TEST(Power2Collisions) {
- TRobinHoodHashSet<ui64> rh;
- for (ui64 i = 0; i < 10000; ++i) {
- auto k = i << 32;
- bool isNew;
- auto iter = rh.Insert(k, isNew);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
- if (isNew) {
- rh.CheckGrow();
- }
- UNIT_ASSERT_VALUES_EQUAL(i + 1, rh.GetSize());
- }
- i32 maxDistance = 0;
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto distance = rh.GetPSL(it).Distance;
- maxDistance = Max(maxDistance, distance);
- }
- Cerr << "maxDistance: " << maxDistance << "\n";
- UNIT_ASSERT(maxDistance < 10);
- }
- Y_UNIT_TEST(RehashCollisions) {
- TRobinHoodHashSet<ui64> rh;
- TVector<ui64> values;
- const ui64 N = 1500000;
- values.reserve(N);
- for (ui64 i = 1; i <= N; ++i) {
- auto k = 64 * (i >> 4) + ((i & 8) ? 32 : 0) + (i & 7);
- values.push_back(k);
- }
- /*
- for (ui64 i = 0; i < 32; ++i) {
- Cerr << values[i] << "\n";
- }
- for (ui64 i = 0; i < 32; ++i) {
- Cerr << values[values.size() - 32 + i] << "\n";
- }*/
- for (ui64 i = 0; i < values.size(); ++i) {
- auto k = values[i];
- bool isNew;
- auto iter = rh.Insert(k, isNew);
- if (rh.GetKey(iter) != k) {
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
- }
- if (isNew) {
- rh.CheckGrow();
- }
- if (i + 1 != rh.GetSize()) {
- UNIT_ASSERT_VALUES_EQUAL(i + 1, rh.GetSize());
- }
- }
- TRobinHoodHashSet<ui64> rh2;
- i32 maxDistance1 = 0;
- ui64 j = 0;
- for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
- if (!rh.IsValid(it)) {
- continue;
- }
- auto distance = rh.GetPSL(it).Distance;
- maxDistance1 = Max(maxDistance1, distance);
- auto k = rh.GetKey(it);
- bool isNew;
- auto iter = rh2.Insert(k, isNew);
- if (rh2.GetKey(iter) != k) {
- UNIT_ASSERT_VALUES_EQUAL(rh2.GetKey(iter), k);
- }
- if (isNew) {
- rh2.CheckGrow();
- }
- if (j + 1 != rh2.GetSize()) {
- UNIT_ASSERT_VALUES_EQUAL(j + 1, rh2.GetSize());
- }
- ++j;
- }
- Cerr << "maxDistance1: " << maxDistance1 << "\n";
- UNIT_ASSERT(maxDistance1 < 20);
- i32 maxDistance2 = 0;
- for (auto it = rh2.Begin(); it != rh2.End(); rh2.Advance(it)) {
- if (!rh2.IsValid(it)) {
- continue;
- }
- auto distance = rh2.GetPSL(it).Distance;
- maxDistance2 = Max(maxDistance2, distance);
- }
- Cerr << "maxDistance2: " << maxDistance2 << "\n";
- UNIT_ASSERT(maxDistance2 < 20);
- }
- Y_UNIT_TEST(Lookup) {
- TRobinHoodHashMap<i32> rh(sizeof(i64));
- std::unordered_map<i32, i64> h;
- for (ui64 i = 0; i < 10000; ++i) {
- auto k = i % 1000;
- auto [it, inserted] = h.emplace(k, 0);
- bool isNew;
- auto iter = rh.Insert(k, isNew);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
- UNIT_ASSERT_VALUES_EQUAL(isNew, inserted);
- it->second += i;
- if (isNew) {
- *(i64*)rh.GetMutablePayload(iter) = i;
- rh.CheckGrow();
- } else {
- *(i64*)rh.GetMutablePayload(iter) += i;
- }
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- }
- for (ui64 i = 0; i < 1000; ++i) {
- auto iter = rh.Lookup(i);
- auto hit = h.find(i);
- UNIT_ASSERT_VALUES_EQUAL(*(i64*)rh.GetPayload(iter), hit->second);
- h.erase(hit);
- }
- UNIT_ASSERT(h.empty());
- }
- Y_UNIT_TEST(BatchLookup) {
- using THashTable = TRobinHoodHashMap<i32>;
- THashTable rh(sizeof(i64));
- std::unordered_map<i32, i64> h;
- for (ui64 i = 0; i < 10000; ++i) {
- auto k = i % 1000;
- auto [it, inserted] = h.emplace(k, 0);
- bool isNew;
- auto iter = rh.Insert(k, isNew);
- UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
- UNIT_ASSERT_VALUES_EQUAL(isNew, inserted);
- it->second += i;
- if (isNew) {
- *(i64*)rh.GetMutablePayload(iter) = i;
- rh.CheckGrow();
- } else {
- *(i64*)rh.GetMutablePayload(iter) += i;
- }
- UNIT_ASSERT_VALUES_EQUAL(h.size(), rh.GetSize());
- }
- std::array<TRobinHoodBatchRequestItem<i32>, PrefetchBatchSize> batch;
- std::array<ui64, PrefetchBatchSize> batchI;
- ui32 batchLen = 0;
- auto processBatch = [&]() {
- rh.BatchLookup({batch.data(), batchLen}, [&](size_t i, THashTable::iterator iter) {
- auto key = batchI[i];
- auto hit = h.find(key);
- UNIT_ASSERT_VALUES_EQUAL(*(i64*)rh.GetPayload(iter), hit->second);
- h.erase(hit);
- });
- };
- for (ui64 i = 0; i < 1000; ++i) {
- if (batchLen == batch.size()) {
- processBatch();
- batchLen = 0;
- }
- batch[batchLen].ConstructKey(i);
- batchI[batchLen] = i;
- ++batchLen;
- }
- if (batchLen > 0) {
- processBatch();
- }
- UNIT_ASSERT(h.empty());
- }
- }
- }
- }
|