mkql_todict_ut.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. #include "mkql_computation_node_ut.h"
  2. #include <yql/essentials/minikql/mkql_node_cast.h>
  3. #include <yql/essentials/minikql/mkql_string_util.h>
  4. #include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
  5. #include <util/random/shuffle.h>
  6. #include <map>
  7. #include <optional>
  8. namespace NKikimr::NMiniKQL {
  9. static const TStringBuf data[] = {
  10. "13d49d4db08e57d645fe4d44bbed4738f386af6e9e742cf186961063feb9919b",
  11. "14d285e88582d87c41d3e6d2e9352686d0363ea74a297fe02f901f18c19978a3",
  12. "1795ad46329c4fc6b3355dc22d252c5fe390a971ddf009b54fdeceb93d3b8930",
  13. "18042e88fb4cf6b09cb8e6c5588ae525fc7a37bd2248a857d83ac1d1dcdf0a64",
  14. "1b30b154ac814f7e4ed7e7488e037d781b78fbc336cac027f4c301ad9368514e",
  15. "1a0b94cebdc038bb293575af4f6954e9dbf19801e581ad03be90b4aef36347d7",
  16. "1c9ac5b87de7d68efae1bdf1ad47e58d28a6a70e966f19899798c61b2a65b6e2",
  17. "1618c1e3d9dbc3edaccb7934eca55d2d96cb59d2633655f57401dba99deec3ef",
  18. "1bd7a6ff86a1940283e202b142ecba685fea86f93f2aafad8cd37d80582aca95",
  19. "0fba3f2f741b0579a3eec906f5341e0556fbd74088fcdfbe776bd6122fa81681",
  20. "19768b3228cef7a82e0f1c367d78c42596fa511c735bd85d7cafca0965045562",
  21. "1a9c0a14272795d7ad39a4725754c3f1d013a761c41fba16e06ae247833fd42b",
  22. "1562ce72ff7229866723e81e35db30d08c8b8dc7b7d076cff787f157d70763e6",
  23. "0faf214bafe219413618fdf186bb9290e6a610755d90947cd686b72899e78445",
  24. "14f3fe97da837197b98f6303ac5aa1b6f34bffe9841fe65f084a27f4bd4ced8a",
  25. "198c0706af7107ababebf1500875ba64508519b21aa534d0f55e8a32e394799d",
  26. "1bb66a4593b77b1650a4a530bae30e454c2815477769d67fe6c6b337ae4acafe",
  27. "0f67ef1ca6ef6b2d699dfac0360e8f24dc8960428cff058fe285d63ab55ef6d3",
  28. "1097009fe853793802120394fbb6404df82725240d410c69375e5a53ec5395b8",
  29. "1b1262275eae8a055732253e8d717c19ebde386b25e51dd546413e7ee997c5e1",
  30. "1c4a73588541a0c869b4ee27c32cc4218f3c8db13713c61cedc387336a2720c9",
  31. "1c73442f0ac53d8b38f779231680fab806a6cb9c86d25d9db5fa67c0ebf8e803",
  32. "19152f0c06baf7962ca287a303b85437f321d725985f1586ac8358bdb6a0df63",
  33. "13436f337815f5929559e6f621b850ed60b36f23ce9d8d06db981b70d40ad3db",
  34. "298268d866eea5d6fcae470fdbb6d7787d73ab1e50b8126d6452d81264fbdafd",
  35. "1a67b4e4c213baa140c5a00352cdbc9256b4e2fe81482c380b08ebe2e6b76e1b",
  36. "19824d2008be54e35a0e2a9d2df9746e96f73367518b111695e1c3857966c241",
  37. "2997c49ed21482d30b8ef89bd26bfdb6384dda6825032145fe0a3ad9d2f2a7e3",
  38. "137ccc1d4ab00210bd9af5ee875cb799bd818f4803470abca68a9655ea73be01",
  39. "12d4cf2eb41c90ede84bece72f76e97d7d0144c45341a0176f656b295cb838c3",
  40. "11d02da4f449e6aeee4f213409baed6eaab35496688d743991870ba093676c44",
  41. "163fb1ef04a1453a44fb897014287f7ceefe0b553d99718f986ada32cec6ca29",
  42. "16f579a7eda4d7f5cde29724bf35e1b36e95fbeb72914ba2ba8f19192b92dab7",
  43. "0f60c1387bf29d8d81174bd43c136e75f0f33b8b4d2712de0cc3a23f04fac76e",
  44. "0f83662d3b4cc9aaa0f76c8801d2d32909c050286d514acc108b6c3d9964679b",
  45. "1a30b7c4bf1c4eaaa92802cb90a27b5073d4a5ec095271490df8443b8f5df88f",
  46. "105af591b517f126c985f54e833d61907ff49945ab123a606caa6d9bda0e3d66",
  47. "1a5196fdfc1b81974905a66e6f1ff31403fc84b4d84effde521e848153f42e10",
  48. "17d6cb5ba9489d8397cb1e1d054e45cff6c7225aeeba5c9e76cacd9da6c9a0c1",
  49. "127ab4e2169329082bdd546e94c4fb6643999b14a26e08eaa719689789767014",
  50. "143883410f000b5f3ff4c6434b0654516e9502d0a50a2b3ecdc20c8d3e198915",
  51. "16ccd345646dd3d39e6bd157b51513c1b319bd1f441199003052a74b2eddb53d",
  52. "11e9f02dc56d575fac5a306a6e83f597ffda1bd81a01f13fdda059ab24d90892",
  53. "13f75a9e662faea5fc93f0f83d363c42083345cdcc42f1b0d320d11409ef3052",
  54. "18cca97e8c6ede52e0b7d8c53c85c0fac68f6d1b7c6622a4cebc21433e6d8eea",
  55. "160d6b818fab5ad00a1e81e46888c5ff3e5f2c175c013ce17d88c31df4475aba",
  56. "1c4d09dff19175af7fc0d8e8fd23e9288fc2839dedfc067dcf9f5a3e3a9d92aa",
  57. "16e25b2a6eef4cde6879c20c94c4360604b9099c29e1abaf9fc079fe67cfcaac",
  58. "2a577ab7e2541e2cc2cc20e6a76c4ea9b77501808db9c4045be82c680cf227d5",
  59. "11b4753fd9cc33656dbd59769b3202b7f68bd067bf7f64bd54676f6f60366ef1",
  60. "1932a0aecc4a569d7d3fbcdd329b92c0b4dbd870d6be48ec4f18285ab3183676",
  61. "2a2e6b62a4383cb48ffbb69b2f356ceb0410593f5b5500142498692dec7c125f",
  62. };
  63. Y_UNIT_TEST_SUITE(TMiniKQLToDictTest) {
  64. Y_UNIT_TEST_LLVM(TestCompactUtf8Set) {
  65. TSetup<LLVM> setup;
  66. TProgramBuilder& pb = *setup.PgmBuilder;
  67. TVector<TRuntimeNode> items;
  68. for (auto s: data) {
  69. items.push_back(pb.NewDataLiteral<NUdf::EDataSlot::Utf8>(s));
  70. }
  71. Shuffle(items.begin(), items.end());
  72. auto dataType = pb.NewDataType(NUdf::TDataType<NUdf::TUtf8>::Id);
  73. auto list = pb.NewList(dataType, items);
  74. auto dict = pb.ToHashedDict(list, false, [](TRuntimeNode n) { return n; }, [&pb](TRuntimeNode /*n*/) { return pb.NewVoid(); }, true);
  75. auto pgmReturn = pb.Contains(dict, items.front());
  76. auto graph = setup.BuildGraph(pgmReturn);
  77. auto res = graph->GetValue().template Get<bool>();
  78. UNIT_ASSERT_VALUES_EQUAL(res, true);
  79. }
  80. Y_UNIT_TEST_LLVM(TestUtf8Set) {
  81. TSetup<LLVM> setup;
  82. TProgramBuilder& pb = *setup.PgmBuilder;
  83. TVector<TRuntimeNode> items;
  84. for (auto s: data) {
  85. items.push_back(pb.NewDataLiteral<NUdf::EDataSlot::Utf8>(s));
  86. }
  87. Shuffle(items.begin(), items.end());
  88. auto dataType = pb.NewDataType(NUdf::TDataType<NUdf::TUtf8>::Id);
  89. auto list = pb.NewList(dataType, items);
  90. auto dict = pb.ToHashedDict(list, false, [](TRuntimeNode n) { return n; }, [&pb](TRuntimeNode /*n*/) { return pb.NewVoid(); }, false);
  91. auto pgmReturn = pb.Contains(dict, items.front());
  92. auto graph = setup.BuildGraph(pgmReturn);
  93. auto res = graph->GetValue().template Get<bool>();
  94. UNIT_ASSERT_VALUES_EQUAL(res, true);
  95. }
  96. Y_UNIT_TEST_LLVM(TestSqueezeToDict) {
  97. auto test = [](bool stream, bool hashed, bool multi, bool compact, bool withPayload) {
  98. Cerr << "TestSqueezeToDict [on: " << (stream ? "stream" : "flow")
  99. << "type: " << (hashed ? "hashed" : "sorted") << ", multi: " << multi
  100. << ", compact: " << compact << ", payload: " << withPayload << "]" << Endl;
  101. TSetup<LLVM> setup;
  102. TProgramBuilder& pb = *setup.PgmBuilder;
  103. TVector<TRuntimeNode> items;
  104. for (auto s : data) {
  105. items.push_back(pb.NewDataLiteral<NUdf::EDataSlot::Utf8>(s));
  106. }
  107. Shuffle(items.begin(), items.end());
  108. auto dataType = pb.NewDataType(NUdf::TDataType<NUdf::TUtf8>::Id);
  109. auto list = pb.NewList(dataType, items);
  110. auto input = stream ? pb.Iterator(list, items) : pb.ToFlow(list);
  111. auto pgmReturn = hashed
  112. ? pb.SqueezeToHashedDict(input, multi, [](TRuntimeNode n) { return n; },
  113. [&pb, withPayload](TRuntimeNode n) { return withPayload ? n : pb.NewVoid(); }, compact)
  114. : pb.SqueezeToSortedDict(input, multi, [](TRuntimeNode n) { return n; },
  115. [&pb, withPayload](TRuntimeNode n) { return withPayload ? n : pb.NewVoid(); }, compact);
  116. if (!stream) {
  117. pgmReturn = pb.FromFlow(pgmReturn);
  118. }
  119. auto graph = setup.BuildGraph(pgmReturn);
  120. NUdf::TUnboxedValue res = graph->GetValue();
  121. UNIT_ASSERT(!res.IsSpecial());
  122. NUdf::TUnboxedValue v;
  123. auto status = res.Fetch(v);
  124. UNIT_ASSERT_VALUES_EQUAL(NUdf::EFetchStatus::Ok, status);
  125. for (auto s : data) {
  126. UNIT_ASSERT_C(v.Contains(NUdf::TUnboxedValue(MakeString(s))), s);
  127. }
  128. UNIT_ASSERT(!v.Contains(NUdf::TUnboxedValue(MakeString("green cucumber"))));
  129. status = res.Fetch(v);
  130. UNIT_ASSERT_VALUES_EQUAL(NUdf::EFetchStatus::Finish, status);
  131. };
  132. for (auto stream : {true, false}) {
  133. for (auto hashed : {true, false}) {
  134. for (auto multi : {true, false}) {
  135. for (auto compact : {true, false}) {
  136. for (auto withPayload : {true, false}) {
  137. test(stream, hashed, multi, compact, withPayload);
  138. }
  139. }
  140. }
  141. }
  142. }
  143. }
  144. #if !defined(MKQL_RUNTIME_VERSION) || MKQL_RUNTIME_VERSION >= 23u
  145. Y_UNIT_TEST_LLVM(TestNarrowSqueezeToDict) {
  146. auto test = [](bool hashed, bool multi, bool compact, bool withPayload) {
  147. Cerr << "TestNarrowSqueezeToDict [type: " << (hashed ? "hashed" : "sorted") << ", multi: " << multi
  148. << ", compact: " << compact << ", payload: " << withPayload << "]" << Endl;
  149. TSetup<LLVM> setup;
  150. TProgramBuilder& pb = *setup.PgmBuilder;
  151. TVector<TRuntimeNode> items;
  152. for (auto s : data) {
  153. items.push_back(pb.NewDataLiteral<NUdf::EDataSlot::Utf8>(s));
  154. }
  155. Shuffle(items.begin(), items.end());
  156. auto dataType = pb.NewDataType(NUdf::TDataType<NUdf::TUtf8>::Id);
  157. auto list = pb.NewList(dataType, items);
  158. auto input = pb.ExpandMap(pb.ToFlow(list), [](TRuntimeNode n) ->TRuntimeNode::TList { return {n}; });
  159. auto pgmReturn = hashed
  160. ? pb.NarrowSqueezeToHashedDict(input, multi, [](TRuntimeNode::TList n) { return n.front(); },
  161. [&pb, withPayload](TRuntimeNode::TList n) { return withPayload ? n.back() : pb.NewVoid(); }, compact)
  162. : pb.NarrowSqueezeToSortedDict(input, multi, [](TRuntimeNode::TList n) { return n.front(); },
  163. [&pb, withPayload](TRuntimeNode::TList n) { return withPayload ? n.back() : pb.NewVoid(); }, compact);
  164. pgmReturn = pb.FromFlow(pgmReturn);
  165. auto graph = setup.BuildGraph(pgmReturn);
  166. NUdf::TUnboxedValue res = graph->GetValue();
  167. UNIT_ASSERT(!res.IsSpecial());
  168. NUdf::TUnboxedValue v;
  169. auto status = res.Fetch(v);
  170. UNIT_ASSERT_VALUES_EQUAL(NUdf::EFetchStatus::Ok, status);
  171. for (auto s : data) {
  172. UNIT_ASSERT_C(v.Contains(NUdf::TUnboxedValue(MakeString(s))), s);
  173. }
  174. UNIT_ASSERT(!v.Contains(NUdf::TUnboxedValue(MakeString("green cucumber"))));
  175. status = res.Fetch(v);
  176. UNIT_ASSERT_VALUES_EQUAL(NUdf::EFetchStatus::Finish, status);
  177. };
  178. for (auto hashed : {true, false}) {
  179. for (auto multi : {true, false}) {
  180. for (auto compact : {true, false}) {
  181. for (auto withPayload : {true, false}) {
  182. test(hashed, multi, compact, withPayload);
  183. }
  184. }
  185. }
  186. }
  187. }
  188. #endif
  189. template <bool LLVM>
  190. static void TestDictWithDataKeyImpl(bool optionalKey, bool multi, bool compact, bool withNull, bool withData) {
  191. TSetup<LLVM> setup;
  192. TProgramBuilder& pb = *setup.PgmBuilder;
  193. TType* keyType = pb.NewDataType(NUdf::EDataSlot::Int32, optionalKey);
  194. TType* valueType = pb.NewDataType(NUdf::EDataSlot::Int32, false);
  195. TType* tupleType = pb.NewTupleType({keyType, valueType});
  196. TVector<TRuntimeNode> items;
  197. TVector<TRuntimeNode> keys;
  198. if (withNull) {
  199. UNIT_ASSERT(optionalKey);
  200. keys.push_back(pb.NewEmptyOptional(keyType));
  201. for (size_t k = 0; k < 1 + multi; ++k) {
  202. items.push_back(pb.NewTuple(tupleType, {keys.back(), pb.NewDataLiteral((i32)items.size())}));
  203. }
  204. }
  205. if (withData) {
  206. for (i32 i = 0; i < 2; ++i) {
  207. auto key = pb.NewDataLiteral(i);
  208. if (optionalKey) {
  209. key = pb.NewOptional(key);
  210. }
  211. keys.push_back(key);
  212. for (size_t k = 0; k < 1 + multi; ++k) {
  213. items.push_back(pb.NewTuple(tupleType, {key, pb.NewDataLiteral((i32)items.size())}));
  214. }
  215. }
  216. }
  217. auto list = pb.NewList(tupleType, items);
  218. auto keyList = pb.NewList(keyType, keys);
  219. auto dict = pb.ToHashedDict(list, multi, [&](TRuntimeNode tuple) { return pb.Nth(tuple, 0); }, [&pb](TRuntimeNode tuple) { return pb.Nth(tuple, 1); }, compact);
  220. auto compareLists = [&](bool itemIsTuple, TRuntimeNode list1, TRuntimeNode list2) {
  221. return pb.And({
  222. pb.Equals(
  223. pb.Length(list1),
  224. pb.Length(list2)
  225. ),
  226. pb.Not(
  227. pb.Exists(
  228. pb.Head(
  229. pb.SkipWhile(
  230. pb.Zip({list1, list2}),
  231. [&](TRuntimeNode pair) {
  232. if (itemIsTuple) {
  233. return pb.And({
  234. pb.AggrEquals(pb.Nth(pb.Nth(pair, 0), 0), pb.Nth(pb.Nth(pair, 1), 0)),
  235. pb.AggrEquals(pb.Nth(pb.Nth(pair, 0), 1), pb.Nth(pb.Nth(pair, 1), 1)),
  236. });
  237. } else {
  238. return pb.AggrEquals(pb.Nth(pair, 0), pb.Nth(pair, 1));
  239. }
  240. }
  241. )
  242. )
  243. )
  244. )
  245. });
  246. };
  247. TVector<TRuntimeNode> results;
  248. // Check Dict has items
  249. results.push_back(pb.AggrEquals(
  250. pb.HasItems(dict),
  251. pb.NewDataLiteral(withNull || withData)
  252. ));
  253. // Check Dict length
  254. results.push_back(pb.AggrEquals(
  255. pb.Length(dict),
  256. pb.NewDataLiteral((ui64)keys.size())
  257. ));
  258. // Check Dict Contains
  259. results.push_back(pb.AllOf(
  260. pb.Map(list, [&](TRuntimeNode tuple) {
  261. return pb.Contains(dict, pb.Nth(tuple, 0));
  262. }),
  263. [&](TRuntimeNode item) { return item; }
  264. ));
  265. // Check Dict Lookup
  266. results.push_back(compareLists(false,
  267. pb.Sort(
  268. pb.FlatMap(
  269. pb.Map(
  270. keyList,
  271. [&](TRuntimeNode key) {
  272. return pb.Unwrap(pb.Lookup(dict, key), pb.NewDataLiteral<NUdf::EDataSlot::String>("Lookup failed"), "", 0, 0);
  273. }
  274. ),
  275. [&](TRuntimeNode item) {
  276. return multi ? item : pb.NewOptional(item);
  277. }
  278. ),
  279. pb.NewDataLiteral(true),
  280. [&](TRuntimeNode item) { return item; }
  281. ),
  282. pb.Sort(
  283. pb.Map(list, [&](TRuntimeNode tuple) {
  284. return pb.Nth(tuple, 1);
  285. }),
  286. pb.NewDataLiteral(true),
  287. [&](TRuntimeNode item) { return item; }
  288. )
  289. ));
  290. // Check Dict items iterator
  291. results.push_back(compareLists(true,
  292. pb.Sort(
  293. pb.FlatMap(
  294. pb.DictItems(dict),
  295. [&](TRuntimeNode pair) {
  296. if (multi) {
  297. return pb.Map(
  298. pb.Nth(pair, 1),
  299. [&](TRuntimeNode p) {
  300. return pb.NewTuple({pb.Nth(pair, 0), p});
  301. }
  302. );
  303. } else {
  304. return pb.NewOptional(pair);
  305. }
  306. }
  307. ),
  308. pb.NewTuple({pb.NewDataLiteral(true), pb.NewDataLiteral(true)}),
  309. [&](TRuntimeNode item) { return item; }
  310. ),
  311. list
  312. ));
  313. // Check Dict payloads iterator
  314. results.push_back(compareLists(false,
  315. pb.Sort(
  316. pb.FlatMap(
  317. pb.DictPayloads(dict),
  318. [&](TRuntimeNode item) {
  319. return multi ? item : pb.NewOptional(item);
  320. }
  321. ),
  322. pb.NewDataLiteral(true),
  323. [&](TRuntimeNode item) { return item; }
  324. ),
  325. pb.Map(
  326. list,
  327. [&](TRuntimeNode item) {
  328. return pb.Nth(item, 1);
  329. }
  330. )
  331. ));
  332. auto graph = setup.BuildGraph(pb.NewTuple(results));
  333. NUdf::TUnboxedValue res = graph->GetValue();
  334. UNIT_ASSERT_C(res.GetElement(0).Get<bool>(), "Dict HasItems fail");
  335. UNIT_ASSERT_C(res.GetElement(1).Get<bool>(), "Dict Length fail");
  336. UNIT_ASSERT_C(res.GetElement(2).Get<bool>(), "Dict Contains fail");
  337. UNIT_ASSERT_C(res.GetElement(3).Get<bool>(), "Dict Lookup fail");
  338. UNIT_ASSERT_C(res.GetElement(4).Get<bool>(), "DictItems fail");
  339. UNIT_ASSERT_C(res.GetElement(5).Get<bool>(), "DictPayloads fail");
  340. }
  341. Y_UNIT_TEST_LLVM(TestDictWithDataKey) {
  342. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/true);
  343. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
  344. }
  345. Y_UNIT_TEST_LLVM(TestDictCompactWithDataKey) {
  346. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/true);
  347. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
  348. }
  349. Y_UNIT_TEST_LLVM(TestDictMultiWithDataKey) {
  350. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/true);
  351. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
  352. }
  353. Y_UNIT_TEST_LLVM(TestDictCompactMultiWithDataKey) {
  354. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/true);
  355. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
  356. }
  357. Y_UNIT_TEST_LLVM(TestDictWithOptionalDataKey) {
  358. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/true);
  359. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/true, /*withData*/false);
  360. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/true, /*withData*/true);
  361. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
  362. }
  363. Y_UNIT_TEST_LLVM(TestDictCompactWithOptionalDataKey) {
  364. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/true);
  365. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/true, /*withData*/false);
  366. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/true, /*withData*/true);
  367. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
  368. }
  369. Y_UNIT_TEST_LLVM(TestDictMultiWithOptionalDataKey) {
  370. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/true);
  371. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/true, /*withData*/false);
  372. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/true, /*withData*/true);
  373. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty dict
  374. }
  375. Y_UNIT_TEST_LLVM(TestDictCompactMultiWithOptionalDataKey) {
  376. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/true);
  377. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/true, /*withData*/false);
  378. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/true, /*withData*/true);
  379. TestDictWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*multi*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty dict
  380. }
  381. template <bool LLVM>
  382. static void TestSetWithDataKeyImpl(bool optionalKey, bool compact, bool withNull, bool withData) {
  383. TSetup<LLVM> setup;
  384. TProgramBuilder& pb = *setup.PgmBuilder;
  385. TType* keyType = pb.NewDataType(NUdf::EDataSlot::Int32, optionalKey);
  386. TVector<TRuntimeNode> keys;
  387. if (withNull) {
  388. UNIT_ASSERT(optionalKey);
  389. keys.push_back(pb.NewEmptyOptional(keyType));
  390. }
  391. if (withData) {
  392. for (i32 i = 0; i < 2; ++i) {
  393. auto key = pb.NewDataLiteral(i);
  394. if (optionalKey) {
  395. key = pb.NewOptional(key);
  396. }
  397. keys.push_back(key);
  398. }
  399. }
  400. auto keyList = pb.NewList(keyType, keys);
  401. auto set = pb.ToHashedDict(keyList, false, [&](TRuntimeNode key) { return key; }, [&pb](TRuntimeNode) { return pb.NewVoid(); }, compact);
  402. auto compareLists = [&](TRuntimeNode list1, TRuntimeNode list2) {
  403. return pb.And({
  404. pb.Equals(
  405. pb.Length(list1),
  406. pb.Length(list2)
  407. ),
  408. pb.Not(
  409. pb.Exists(
  410. pb.Head(
  411. pb.SkipWhile(
  412. pb.Zip({list1, list2}),
  413. [&](TRuntimeNode pair) {
  414. return pb.AggrEquals(pb.Nth(pair, 0), pb.Nth(pair, 1));
  415. }
  416. )
  417. )
  418. )
  419. )
  420. });
  421. };
  422. TVector<TRuntimeNode> results;
  423. // Check Set has items
  424. results.push_back(pb.AggrEquals(
  425. pb.HasItems(set),
  426. pb.NewDataLiteral(withNull || withData)
  427. ));
  428. // Check Set length
  429. results.push_back(pb.AggrEquals(
  430. pb.Length(set),
  431. pb.NewDataLiteral((ui64)keys.size())
  432. ));
  433. // Check Set Contains
  434. results.push_back(pb.AllOf(
  435. pb.Map(keyList, [&](TRuntimeNode key) {
  436. return pb.Contains(set, key);
  437. }),
  438. [&](TRuntimeNode item) { return item; }
  439. ));
  440. // Check Set Lookup
  441. results.push_back(pb.AllOf(
  442. pb.Map(keyList, [&](TRuntimeNode key) {
  443. return pb.Exists(pb.Lookup(set, key));
  444. }),
  445. [&](TRuntimeNode item) { return item; }
  446. ));
  447. // Check Set items iterator
  448. results.push_back(compareLists(
  449. pb.Sort(
  450. pb.DictKeys(set),
  451. pb.NewDataLiteral(true),
  452. [&](TRuntimeNode item) { return item; }
  453. ),
  454. keyList
  455. ));
  456. auto graph = setup.BuildGraph(pb.NewTuple(results));
  457. NUdf::TUnboxedValue res = graph->GetValue();
  458. UNIT_ASSERT_C(res.GetElement(0).Get<bool>(), "Set HasItems fail");
  459. UNIT_ASSERT_C(res.GetElement(1).Get<bool>(), "Set Length fail");
  460. UNIT_ASSERT_C(res.GetElement(2).Get<bool>(), "Set Contains fail");
  461. UNIT_ASSERT_C(res.GetElement(3).Get<bool>(), "Set Lookup fail");
  462. UNIT_ASSERT_C(res.GetElement(4).Get<bool>(), "Set DictKeys fail");
  463. }
  464. Y_UNIT_TEST_LLVM(TestSetWithDataKey) {
  465. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/false, /*withNull*/false, /*withData*/true);
  466. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/false, /*withNull*/false, /*withData*/false); // empty set
  467. }
  468. Y_UNIT_TEST_LLVM(TestSetCompactWithDataKey) {
  469. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/true, /*withNull*/false, /*withData*/true);
  470. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/false, /*compact*/true, /*withNull*/false, /*withData*/false); // empty set
  471. }
  472. Y_UNIT_TEST_LLVM(TestSetWithOptionalDataKey) {
  473. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/false, /*withData*/true);
  474. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/true, /*withData*/false);
  475. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/true, /*withData*/true);
  476. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/false, /*withNull*/false, /*withData*/false); // empty set
  477. }
  478. Y_UNIT_TEST_LLVM(TestSetCompactWithOptionalDataKey) {
  479. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/false, /*withData*/true);
  480. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/true, /*withData*/false);
  481. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/true, /*withData*/true);
  482. TestSetWithDataKeyImpl<LLVM>(/*optionalKey*/true, /*compact*/true, /*withNull*/false, /*withData*/false); // empty set
  483. }
  484. }
  485. } // namespace