compact_hash_ut.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. #include "compact_hash.h"
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <util/generic/bitmap.h>
  4. #include <util/generic/ylimits.h>
  5. #include <util/generic/hash_set.h>
  6. #include <util/generic/maybe.h>
  7. #include <util/generic/xrange.h>
  8. #include <util/random/shuffle.h>
  9. using namespace NKikimr;
  10. using namespace NKikimr::NCHash;
  11. Y_UNIT_TEST_SUITE(TCompactHashTest) {
  12. template <typename TItem>
  13. void TestListPoolPagesImpl(size_t listSize, ui16 countOfLists, size_t expectedListCapacity, ui32 expectedMark) {
  14. using TPool = TListPool<TItem>;
  15. TAlignedPagePool pagePool(__LOCATION__);
  16. TPool pool(pagePool);
  17. UNIT_ASSERT(countOfLists > 1);
  18. THashSet<TItem*> lists;
  19. void* pageAddr = nullptr;
  20. for (size_t i = 0; i < countOfLists / 2; ++i) {
  21. TItem* l = pool.template GetList<TItem>(listSize);
  22. UNIT_ASSERT_VALUES_EQUAL(expectedMark, TPool::GetMark(l));
  23. UNIT_ASSERT_VALUES_EQUAL(listSize, TListPoolBase::GetPartListSize(l));
  24. UNIT_ASSERT_VALUES_EQUAL(expectedListCapacity, TListPoolBase::GetListCapacity(l));
  25. if (0 == i) {
  26. pageAddr = TAlignedPagePool::GetPageStart(l);
  27. } else {
  28. // All lists are from the same page
  29. UNIT_ASSERT_VALUES_EQUAL(pageAddr, TAlignedPagePool::GetPageStart(l));
  30. }
  31. UNIT_ASSERT(lists.insert(l).second);
  32. }
  33. // Return all lists except one
  34. while (lists.size() > 1) {
  35. auto it = lists.begin();
  36. pool.ReturnList(*it);
  37. lists.erase(it);
  38. }
  39. for (size_t i = 1; i < countOfLists; ++i) {
  40. TItem* l = pool.template GetList<TItem>(listSize);
  41. UNIT_ASSERT_VALUES_EQUAL(expectedMark, TPool::GetMark(l));
  42. UNIT_ASSERT_VALUES_EQUAL(listSize, TListPoolBase::GetPartListSize(l));
  43. UNIT_ASSERT_VALUES_EQUAL(expectedListCapacity, TListPoolBase::GetListCapacity(l));
  44. UNIT_ASSERT_VALUES_EQUAL(pageAddr, TAlignedPagePool::GetPageStart(l));
  45. UNIT_ASSERT(lists.insert(l).second);
  46. }
  47. for (auto l: lists) {
  48. pool.ReturnList(l);
  49. }
  50. THashSet<TItem*> lists2;
  51. for (size_t i = 0; i < countOfLists; ++i) {
  52. TItem* l = pool.template GetList<TItem>(listSize);
  53. // All lists are from the same page
  54. UNIT_ASSERT_VALUES_EQUAL(pageAddr, TAlignedPagePool::GetPageStart(l));
  55. UNIT_ASSERT(1 == lists.erase(l));
  56. UNIT_ASSERT(lists2.insert(l).second);
  57. }
  58. TItem* l = pool.template GetList<TItem>(listSize); // New page
  59. UNIT_ASSERT_VALUES_UNEQUAL(pageAddr, TAlignedPagePool::GetPageStart(l));
  60. UNIT_ASSERT_VALUES_EQUAL(listSize, TListPoolBase::GetPartListSize(l));
  61. UNIT_ASSERT_VALUES_EQUAL(expectedListCapacity, TListPoolBase::GetListCapacity(l));
  62. pool.ReturnList(l);
  63. for (auto l: lists2) {
  64. pool.ReturnList(l);
  65. }
  66. }
  67. template <typename TItem>
  68. void TestListPoolLargeImpl() {
  69. using TPool = TListPool<TItem>;
  70. TAlignedPagePool pagePool(__LOCATION__);
  71. TPool pool(pagePool);
  72. const size_t listSize = TListPoolBase::GetMaxListSize<TItem>();
  73. TItem* l = pool.template GetList<TItem>(listSize);
  74. pool.template IncrementList<TItem>(l);
  75. UNIT_ASSERT_VALUES_EQUAL((ui32)TListPoolBase::LARGE_MARK, TListPoolBase::GetMark(l));
  76. UNIT_ASSERT_VALUES_EQUAL(listSize + 1, TListPoolBase::GetPartListSize(l));
  77. TListPoolBase::SetPartListSize(l, TListPoolBase::GetListCapacity(l));
  78. pool.template IncrementList<TItem>(l);
  79. UNIT_ASSERT_VALUES_EQUAL(1, TListPoolBase::GetPartListSize(l));
  80. UNIT_ASSERT_VALUES_EQUAL(1 + TListPoolBase::GetListCapacity(l), TListPoolBase::GetFullListSize(l));
  81. pool.ReturnList(l);
  82. }
  83. Y_UNIT_TEST(TestListPoolSmallPagesByte) {
  84. ui16 count = TListPoolBase::GetSmallPageCapacity<ui8>(2);
  85. TestListPoolPagesImpl<ui8>(2, count, 2, TListPoolBase::SMALL_MARK);
  86. }
  87. Y_UNIT_TEST(TestListPoolMediumPagesByte) {
  88. size_t listSize = TListPoolBase::MAX_SMALL_LIST_SIZE + 1;
  89. ui16 count = TListPoolBase::GetMediumPageCapacity<ui8>(listSize);
  90. TestListPoolPagesImpl<ui8>(listSize, count, FastClp2(listSize), TListPoolBase::MEDIUM_MARK);
  91. }
  92. Y_UNIT_TEST(TestListPoolLargPagesByte) {
  93. TestListPoolLargeImpl<ui8>();
  94. }
  95. Y_UNIT_TEST(TestListPoolSmallPagesUi64) {
  96. ui16 count = TListPoolBase::GetSmallPageCapacity<ui64>(2);
  97. TestListPoolPagesImpl<ui64>(2, count, 2, TListPoolBase::SMALL_MARK);
  98. }
  99. Y_UNIT_TEST(TestListPoolMediumPagesUi64) {
  100. size_t listSize = TListPoolBase::MAX_SMALL_LIST_SIZE + 1;
  101. ui16 count = TListPoolBase::GetMediumPageCapacity<ui64>(listSize);
  102. TestListPoolPagesImpl<ui64>(listSize, count, FastClp2(listSize), TListPoolBase::MEDIUM_MARK);
  103. }
  104. Y_UNIT_TEST(TestListPoolLargPagesUi64) {
  105. TestListPoolLargeImpl<ui64>();
  106. }
  107. struct TItem {
  108. ui8 A[256];
  109. };
  110. Y_UNIT_TEST(TestListPoolSmallPagesObj) {
  111. ui16 count = TListPoolBase::GetSmallPageCapacity<TItem>(2);
  112. TestListPoolPagesImpl<TItem>(2, count, 2, TListPoolBase::SMALL_MARK);
  113. }
  114. Y_UNIT_TEST(TestListPoolMediumPagesObj) {
  115. size_t listSize = TListPoolBase::MAX_SMALL_LIST_SIZE + 1;
  116. ui16 count = TListPoolBase::GetMediumPageCapacity<TItem>(listSize);
  117. TestListPoolPagesImpl<TItem>(listSize, count, FastClp2(listSize), TListPoolBase::MEDIUM_MARK);
  118. }
  119. Y_UNIT_TEST(TestListPoolLargPagesObj) {
  120. TestListPoolLargeImpl<TItem>();
  121. }
  122. struct TItemHash {
  123. template <typename T>
  124. size_t operator() (T num) const {
  125. return num % 13;
  126. }
  127. };
  128. template <typename TItem>
  129. void TestHashImpl() {
  130. const ui32 elementsCount = 32;
  131. const ui64 sumKeysTarget = elementsCount * (elementsCount - 1) / 2;
  132. const ui32 addition = 20;
  133. const ui64 sumValuesTarget = sumKeysTarget + addition * elementsCount;
  134. TAlignedPagePool pagePool(__LOCATION__);
  135. TCompactHash<TItem, TItem, TItemHash> hash(pagePool);
  136. TVector<TItem> elements(elementsCount);
  137. std::iota(elements.begin(), elements.end(), 0);
  138. Shuffle(elements.begin(), elements.end());
  139. for (TItem i: elements) {
  140. hash.Insert(i, i + addition);
  141. }
  142. {
  143. decltype(hash) hash2(std::move(hash));
  144. decltype(hash) hash3(pagePool);
  145. hash3.Swap(hash2);
  146. hash = hash3;
  147. }
  148. for (TItem i: elements) {
  149. UNIT_ASSERT(hash.Has(i));
  150. UNIT_ASSERT(hash.Find(i).Ok());
  151. UNIT_ASSERT_VALUES_EQUAL(i + addition, hash.Find(i).Get().second);
  152. }
  153. UNIT_ASSERT(!hash.Has(elementsCount + 1));
  154. UNIT_ASSERT(!hash.Has(elementsCount + 10));
  155. UNIT_ASSERT_VALUES_EQUAL(elementsCount, hash.Size());
  156. UNIT_ASSERT_VALUES_EQUAL(elementsCount, hash.UniqSize());
  157. ui64 sumKeys = 0;
  158. ui64 sumValues = 0;
  159. for (auto it = hash.Iterate(); it.Ok(); ++it) {
  160. UNIT_ASSERT_VALUES_EQUAL(it.Get().first + addition, it.Get().second);
  161. sumKeys += it.Get().first;
  162. sumValues += it.Get().second;
  163. }
  164. UNIT_ASSERT_VALUES_EQUAL(sumKeys, sumKeysTarget);
  165. UNIT_ASSERT_VALUES_EQUAL(sumValues, sumValuesTarget);
  166. }
  167. template <typename TItem>
  168. void TestMultiHashImpl() {
  169. const ui32 keysCount = 10;
  170. const ui64 elementsCount = keysCount * (keysCount + 1) / 2;
  171. TAlignedPagePool pagePool(__LOCATION__);
  172. TCompactMultiHash<TItem, TItem, TItemHash> hash(pagePool);
  173. TVector<TItem> keys(keysCount);
  174. std::iota(keys.begin(), keys.end(), 0);
  175. Shuffle(keys.begin(), keys.end());
  176. ui64 sumKeysTarget = 0;
  177. ui64 sumValuesTarget = 0;
  178. for (TItem k: keys) {
  179. sumKeysTarget += k;
  180. for (TItem i = 0; i < k + 1; ++i) {
  181. hash.Insert(k, i);
  182. sumValuesTarget += i;
  183. }
  184. }
  185. {
  186. decltype(hash) hash2(std::move(hash));
  187. decltype(hash) hash3(pagePool);
  188. hash3.Swap(hash2);
  189. hash = hash3;
  190. }
  191. for (TItem k: keys) {
  192. UNIT_ASSERT(hash.Has(k));
  193. UNIT_ASSERT_VALUES_EQUAL(k + 1, hash.Count(k));
  194. auto it = hash.Find(k);
  195. UNIT_ASSERT(it.Ok());
  196. TDynBitMap set;
  197. for (; it.Ok(); ++it) {
  198. UNIT_ASSERT_VALUES_EQUAL(k, it.GetKey());
  199. UNIT_ASSERT(!set.Test(it.GetValue()));
  200. set.Set(it.GetValue());
  201. }
  202. UNIT_ASSERT_VALUES_EQUAL(set.Count(), k + 1);
  203. }
  204. UNIT_ASSERT(!hash.Has(keysCount + 1));
  205. UNIT_ASSERT(!hash.Has(keysCount + 10));
  206. UNIT_ASSERT(!hash.Find(keysCount + 1).Ok());
  207. UNIT_ASSERT_VALUES_EQUAL(elementsCount, hash.Size());
  208. UNIT_ASSERT_VALUES_EQUAL(keysCount, hash.UniqSize());
  209. ui64 sumKeys = 0;
  210. ui64 sumValues = 0;
  211. TMaybe<TItem> prevKey;
  212. for (auto it = hash.Iterate(); it.Ok(); ++it) {
  213. const auto key = it.GetKey();
  214. if (prevKey != key) {
  215. sumKeys += key;
  216. for (auto valIt = it.MakeCurrentKeyIter(); valIt.Ok(); ++valIt) {
  217. UNIT_ASSERT_VALUES_EQUAL(key, valIt.GetKey());
  218. sumValues += valIt.GetValue();
  219. }
  220. prevKey = key;
  221. }
  222. }
  223. UNIT_ASSERT_VALUES_EQUAL(sumKeys, sumKeysTarget);
  224. UNIT_ASSERT_VALUES_EQUAL(sumValues, sumValuesTarget);
  225. // Test large lists
  226. TItem val = 0;
  227. for (size_t i = 0; i < TListPoolBase::GetLargePageCapacity<TItem>(); ++i) {
  228. hash.Insert(keysCount, val++);
  229. }
  230. TItem check = 0;
  231. for (auto i = hash.Find(keysCount); i.Ok(); ++i) {
  232. UNIT_ASSERT_VALUES_EQUAL(keysCount, i.GetKey());
  233. UNIT_ASSERT_VALUES_EQUAL(check++, i.GetValue());
  234. }
  235. UNIT_ASSERT_VALUES_EQUAL(check, val);
  236. UNIT_ASSERT_VALUES_EQUAL(TListPoolBase::GetLargePageCapacity<TItem>(), hash.Count(keysCount));
  237. for (size_t i = 0; i < TListPoolBase::GetLargePageCapacity<TItem>() + 1; ++i) {
  238. hash.Insert(keysCount, val++);
  239. }
  240. {
  241. decltype(hash) hash2(std::move(hash));
  242. decltype(hash) hash3(pagePool);
  243. hash3.Swap(hash2);
  244. hash = hash3;
  245. }
  246. check = 0;
  247. for (auto i = hash.Find(keysCount); i.Ok(); ++i) {
  248. UNIT_ASSERT_VALUES_EQUAL(keysCount, i.GetKey());
  249. UNIT_ASSERT_VALUES_EQUAL(check++, i.GetValue());
  250. }
  251. UNIT_ASSERT_VALUES_EQUAL(check, val);
  252. UNIT_ASSERT_VALUES_EQUAL(2 * TListPoolBase::GetLargePageCapacity<TItem>() + 1, hash.Count(keysCount));
  253. }
  254. template <typename TItem>
  255. void TestSetImpl() {
  256. const ui32 elementsCount = 32;
  257. const ui64 sumKeysTarget = elementsCount * (elementsCount - 1) / 2;
  258. TAlignedPagePool pagePool(__LOCATION__);
  259. TCompactHashSet<TItem, TItemHash> hash(pagePool);
  260. TVector<TItem> elements(elementsCount);
  261. std::iota(elements.begin(), elements.end(), 0);
  262. Shuffle(elements.begin(), elements.end());
  263. for (TItem i: elements) {
  264. hash.Insert(i);
  265. }
  266. {
  267. decltype(hash) hash2(std::move(hash));
  268. decltype(hash) hash3(pagePool);
  269. hash3.Swap(hash2);
  270. hash = hash3;
  271. }
  272. for (TItem i: elements) {
  273. UNIT_ASSERT(hash.Has(i));
  274. }
  275. UNIT_ASSERT(!hash.Has(elementsCount + 1));
  276. UNIT_ASSERT(!hash.Has(elementsCount + 10));
  277. UNIT_ASSERT_VALUES_EQUAL(elementsCount, hash.Size());
  278. UNIT_ASSERT_VALUES_EQUAL(elementsCount, hash.UniqSize());
  279. ui64 sumKeys = 0;
  280. for (auto i = hash.Iterate(); i.Ok(); ++i) {
  281. sumKeys += *i;
  282. }
  283. UNIT_ASSERT_VALUES_EQUAL(sumKeys, sumKeysTarget);
  284. }
  285. Y_UNIT_TEST(TestHashByte) {
  286. TestHashImpl<ui8>();
  287. }
  288. Y_UNIT_TEST(TestMultiHashByte) {
  289. TestMultiHashImpl<ui8>();
  290. }
  291. Y_UNIT_TEST(TestSetByte) {
  292. TestSetImpl<ui8>();
  293. }
  294. Y_UNIT_TEST(TestHashUi16) {
  295. TestHashImpl<ui16>();
  296. }
  297. Y_UNIT_TEST(TestMultiHashUi16) {
  298. TestMultiHashImpl<ui16>();
  299. }
  300. Y_UNIT_TEST(TestSetUi16) {
  301. TestSetImpl<ui16>();
  302. }
  303. Y_UNIT_TEST(TestHashUi64) {
  304. TestHashImpl<ui64>();
  305. }
  306. Y_UNIT_TEST(TestMultiHashUi64) {
  307. TestMultiHashImpl<ui64>();
  308. }
  309. Y_UNIT_TEST(TestSetUi64) {
  310. TestSetImpl<ui64>();
  311. }
  312. struct TStressHash {
  313. TStressHash(size_t param)
  314. : Param(param)
  315. {
  316. }
  317. template <typename T>
  318. size_t operator() (T num) const {
  319. return num % Param;
  320. }
  321. const size_t Param;
  322. };
  323. Y_UNIT_TEST(TestStressSmallLists) {
  324. TAlignedPagePool pagePool(__LOCATION__);
  325. for (size_t listSize: xrange<size_t>(2, 17, 1)) {
  326. const size_t backets = TListPoolBase::GetSmallPageCapacity<ui64>(listSize);
  327. const size_t elementsCount = backets * listSize;
  328. for (size_t count: xrange<size_t>(1, elementsCount + 1, elementsCount / 16)) {
  329. TCompactHashSet<ui64, TStressHash> hash(pagePool, elementsCount, TStressHash(backets));
  330. for (auto i: xrange(count)) {
  331. hash.Insert(i);
  332. }
  333. UNIT_ASSERT_VALUES_EQUAL(count, hash.Size());
  334. UNIT_ASSERT_VALUES_EQUAL(count, hash.UniqSize());
  335. //hash.PrintStat(Cerr);
  336. }
  337. }
  338. }
  339. }