mkql_map_join_ut.cpp 55 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150
  1. #include "mkql_computation_node_ut.h"
  2. #include <yql/essentials/minikql/mkql_runtime_version.h>
  3. namespace NKikimr {
  4. namespace NMiniKQL {
  5. Y_UNIT_TEST_SUITE(TMiniKQLMapJoinCoreTest) {
  6. Y_UNIT_TEST_LLVM(TestInnerOnTuple) {
  7. TSetup<LLVM> setup;
  8. TProgramBuilder& pb = *setup.PgmBuilder;
  9. const auto optionalUi64Type = pb.NewDataType(NUdf::TDataType<ui64>::Id, true);
  10. const auto tupleType = pb.NewTupleType({optionalUi64Type, optionalUi64Type});
  11. const auto emptyOptionalUi64 = pb.NewEmptyOptional(optionalUi64Type);
  12. const auto key1 = pb.NewTuple(tupleType, {
  13. pb.NewOptional(pb.NewDataLiteral<ui64>(1)),
  14. pb.NewOptional(pb.NewDataLiteral<ui64>(1)),
  15. });
  16. const auto key2 = pb.NewTuple(tupleType, {
  17. pb.NewOptional(pb.NewDataLiteral<ui64>(2)),
  18. pb.NewOptional(pb.NewDataLiteral<ui64>(2)),
  19. });
  20. const auto key3 = pb.NewTuple(tupleType, {
  21. pb.NewOptional(pb.NewDataLiteral<ui64>(3)),
  22. emptyOptionalUi64,
  23. });
  24. const auto key4 = pb.NewTuple(tupleType, {
  25. pb.NewOptional(pb.NewDataLiteral<ui64>(4)),
  26. pb.NewOptional(pb.NewDataLiteral<ui64>(4)),
  27. });
  28. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  29. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  30. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  31. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  32. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  33. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  34. const auto structType = pb.NewStructType({
  35. {"Key", tupleType},
  36. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  37. });
  38. const auto list1 = pb.NewList(structType, {
  39. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  40. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  41. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  42. });
  43. const auto list2 = pb.NewList(structType, {
  44. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  45. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  46. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  47. });
  48. const auto dict2 = pb.ToSortedDict(list2, false,
  49. [&](TRuntimeNode item) {
  50. return pb.Member(item, "Key");
  51. },
  52. [&](TRuntimeNode item) {
  53. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  54. });
  55. const auto resultType = pb.NewFlowType(pb.NewStructType({
  56. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  57. {"Right", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  58. }));
  59. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::Inner, {0U}, {1U, 0U}, {0U, 1U}, resultType));
  60. const auto graph = setup.BuildGraph(pgmReturn);
  61. const auto iterator = graph->GetValue().GetListIterator();
  62. NUdf::TUnboxedValue tuple;
  63. UNIT_ASSERT(iterator.Next(tuple));
  64. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  65. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  66. UNIT_ASSERT(iterator.Next(tuple));
  67. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  68. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  69. UNIT_ASSERT(!iterator.Next(tuple));
  70. UNIT_ASSERT(!iterator.Next(tuple));
  71. }
  72. Y_UNIT_TEST_LLVM(TestInner) {
  73. for (ui32 pass = 0; pass < 1; ++pass) {
  74. TSetup<LLVM> setup;
  75. TProgramBuilder& pb = *setup.PgmBuilder;
  76. const auto key1 = pb.NewDataLiteral<ui32>(1);
  77. const auto key2 = pb.NewDataLiteral<ui32>(2);
  78. const auto key3 = pb.NewDataLiteral<ui32>(3);
  79. const auto key4 = pb.NewDataLiteral<ui32>(4);
  80. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  81. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  82. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  83. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  84. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  85. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  86. const auto structType = pb.NewStructType({
  87. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  88. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  89. });
  90. const auto list1 = pb.NewList(structType, {
  91. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  92. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  93. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  94. });
  95. const auto list2 = pb.NewList(structType, {
  96. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  97. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  98. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  99. });
  100. const auto dict2 = pb.ToHashedDict(list2, false,
  101. [&](TRuntimeNode item) {
  102. return pb.Member(item, "Key");
  103. },
  104. [&](TRuntimeNode item) {
  105. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  106. });
  107. const auto resultType = pb.NewFlowType(pb.NewStructType({
  108. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  109. {"Right", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  110. }));
  111. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::Inner, {0U}, {1U, 0U}, {0U, 1U}, resultType));
  112. const auto graph = setup.BuildGraph(pgmReturn);
  113. const auto iterator = graph->GetValue().GetListIterator();
  114. NUdf::TUnboxedValue tuple;
  115. UNIT_ASSERT(iterator.Next(tuple));
  116. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  117. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  118. UNIT_ASSERT(iterator.Next(tuple));
  119. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  120. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  121. UNIT_ASSERT(!iterator.Next(tuple));
  122. UNIT_ASSERT(!iterator.Next(tuple));
  123. }
  124. }
  125. Y_UNIT_TEST_LLVM(TestInnerMulti) {
  126. for (ui32 pass = 0; pass < 1; ++pass) {
  127. TSetup<LLVM> setup;
  128. TProgramBuilder& pb = *setup.PgmBuilder;
  129. const auto key1 = pb.NewDataLiteral<ui32>(1);
  130. const auto key2 = pb.NewDataLiteral<ui32>(2);
  131. const auto key3 = pb.NewDataLiteral<ui32>(2);
  132. const auto key4 = pb.NewDataLiteral<ui32>(3);
  133. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  134. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  135. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  136. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  137. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  138. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  139. const auto structType = pb.NewStructType({
  140. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  141. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  142. });
  143. const auto list1 = pb.NewList(structType, {
  144. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  145. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  146. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  147. });
  148. const auto list2 = pb.NewList(structType, {
  149. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  150. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  151. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  152. });
  153. const auto dict2 = pb.ToHashedDict(list2, true,
  154. [&](TRuntimeNode item) {
  155. return pb.Member(item, "Key");
  156. },
  157. [&](TRuntimeNode item) {
  158. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  159. });
  160. const auto resultType = pb.NewFlowType(pb.NewStructType({
  161. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  162. {"Right", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  163. }));
  164. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::Inner, {0U}, {1U, 0U}, {0U, 1U}, resultType));
  165. const auto graph = setup.BuildGraph(pgmReturn);
  166. const auto iterator = graph->GetValue().GetListIterator();
  167. NUdf::TUnboxedValue tuple;
  168. UNIT_ASSERT(iterator.Next(tuple));
  169. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  170. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  171. UNIT_ASSERT(iterator.Next(tuple));
  172. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  173. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  174. UNIT_ASSERT(iterator.Next(tuple));
  175. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  176. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  177. UNIT_ASSERT(iterator.Next(tuple));
  178. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  179. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  180. UNIT_ASSERT(!iterator.Next(tuple));
  181. UNIT_ASSERT(!iterator.Next(tuple));
  182. }
  183. }
  184. Y_UNIT_TEST_LLVM(TestLeft) {
  185. for (ui32 pass = 0; pass < 1; ++pass) {
  186. TSetup<LLVM> setup;
  187. TProgramBuilder& pb = *setup.PgmBuilder;
  188. const auto key1 = pb.NewDataLiteral<ui32>(1);
  189. const auto key2 = pb.NewDataLiteral<ui32>(2);
  190. const auto key3 = pb.NewDataLiteral<ui32>(3);
  191. const auto key4 = pb.NewDataLiteral<ui32>(4);
  192. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  193. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  194. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  195. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  196. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  197. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  198. const auto structType = pb.NewStructType({
  199. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  200. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  201. });
  202. const auto list1 = pb.NewList(structType, {
  203. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  204. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  205. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  206. });
  207. const auto list2 = pb.NewList(structType, {
  208. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  209. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  210. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  211. });
  212. const auto dict2 = pb.ToHashedDict(list2, false,
  213. [&](TRuntimeNode item) {
  214. return pb.Member(item, "Key");
  215. },
  216. [&](TRuntimeNode item) {
  217. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  218. });
  219. const auto resultType = pb.NewFlowType(pb.NewStructType({
  220. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  221. {"Right", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  222. }));
  223. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::Left, {0U}, {1U, 0U}, {0U, 1U}, resultType));
  224. const auto graph = setup.BuildGraph(pgmReturn);
  225. const auto iterator = graph->GetValue().GetListIterator();
  226. NUdf::TUnboxedValue tuple;
  227. UNIT_ASSERT(iterator.Next(tuple));
  228. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
  229. UNIT_ASSERT(!tuple.GetElement(1));
  230. UNIT_ASSERT(iterator.Next(tuple));
  231. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  232. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  233. UNIT_ASSERT(iterator.Next(tuple));
  234. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  235. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  236. UNIT_ASSERT(!iterator.Next(tuple));
  237. UNIT_ASSERT(!iterator.Next(tuple));
  238. }
  239. }
  240. Y_UNIT_TEST_LLVM(TestLeftMulti) {
  241. for (ui32 pass = 0; pass < 1; ++pass) {
  242. TSetup<LLVM> setup;
  243. TProgramBuilder& pb = *setup.PgmBuilder;
  244. const auto key1 = pb.NewDataLiteral<ui32>(1);
  245. const auto key2 = pb.NewDataLiteral<ui32>(2);
  246. const auto key3 = pb.NewDataLiteral<ui32>(2);
  247. const auto key4 = pb.NewDataLiteral<ui32>(3);
  248. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  249. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  250. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  251. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  252. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  253. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  254. const auto structType = pb.NewStructType({
  255. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  256. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  257. });
  258. const auto list1 = pb.NewList(structType, {
  259. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  260. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  261. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  262. });
  263. const auto list2 = pb.NewList(structType, {
  264. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  265. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  266. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  267. });
  268. const auto dict2 = pb.ToHashedDict(list2, true,
  269. [&](TRuntimeNode item) {
  270. return pb.Member(item, "Key");
  271. },
  272. [&](TRuntimeNode item) {
  273. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  274. });
  275. const auto resultType = pb.NewFlowType(pb.NewStructType({
  276. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  277. {"Right", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  278. }));
  279. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::Left, {0U}, {1U, 0U}, {0U, 1U}, resultType));
  280. const auto graph = setup.BuildGraph(pgmReturn);
  281. const auto iterator = graph->GetValue().GetListIterator();
  282. NUdf::TUnboxedValue tuple;
  283. UNIT_ASSERT(iterator.Next(tuple));
  284. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
  285. UNIT_ASSERT(!tuple.GetElement(1));
  286. UNIT_ASSERT(iterator.Next(tuple));
  287. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  288. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  289. UNIT_ASSERT(iterator.Next(tuple));
  290. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  291. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  292. UNIT_ASSERT(iterator.Next(tuple));
  293. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  294. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  295. UNIT_ASSERT(iterator.Next(tuple));
  296. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  297. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  298. UNIT_ASSERT(!iterator.Next(tuple));
  299. UNIT_ASSERT(!iterator.Next(tuple));
  300. }
  301. }
  302. Y_UNIT_TEST_LLVM(TestLeftSemi) {
  303. for (ui32 pass = 0; pass < 1; ++pass) {
  304. TSetup<LLVM> setup;
  305. TProgramBuilder& pb = *setup.PgmBuilder;
  306. const auto key1 = pb.NewDataLiteral<ui32>(1);
  307. const auto key2 = pb.NewDataLiteral<ui32>(2);
  308. const auto key3 = pb.NewDataLiteral<ui32>(2);
  309. const auto key4 = pb.NewDataLiteral<ui32>(3);
  310. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  311. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  312. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  313. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  314. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  315. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  316. const auto structType = pb.NewStructType({
  317. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  318. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  319. });
  320. const auto list1 = pb.NewList(structType, {
  321. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  322. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  323. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  324. });
  325. const auto list2 = pb.NewList(structType, {
  326. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  327. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  328. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  329. });
  330. const auto dict2 = pb.ToHashedDict(list2, true,
  331. [&](TRuntimeNode item) {
  332. return pb.Member(item, "Key");
  333. },
  334. [&](TRuntimeNode item) {
  335. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  336. });
  337. const auto resultType = pb.NewFlowType(pb.NewStructType({
  338. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  339. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  340. }));
  341. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::LeftSemi, {0U}, {1U, 1U, 0U, 0U}, {}, resultType));
  342. const auto graph = setup.BuildGraph(pgmReturn);
  343. const auto iterator = graph->GetValue().GetListIterator();
  344. NUdf::TUnboxedValue tuple;
  345. UNIT_ASSERT(iterator.Next(tuple));
  346. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(0).Get<ui32>(), 2);
  347. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "B");
  348. UNIT_ASSERT(iterator.Next(tuple));
  349. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(0).Get<ui32>(), 2);
  350. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "C");
  351. UNIT_ASSERT(!iterator.Next(tuple));
  352. UNIT_ASSERT(!iterator.Next(tuple));
  353. }
  354. }
  355. Y_UNIT_TEST_LLVM(TestLeftOnly) {
  356. for (ui32 pass = 0; pass < 1; ++pass) {
  357. TSetup<LLVM> setup;
  358. TProgramBuilder& pb = *setup.PgmBuilder;
  359. const auto key1 = pb.NewDataLiteral<ui32>(1);
  360. const auto key2 = pb.NewDataLiteral<ui32>(2);
  361. const auto key3 = pb.NewDataLiteral<ui32>(2);
  362. const auto key4 = pb.NewDataLiteral<ui32>(3);
  363. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  364. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  365. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  366. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  367. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  368. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  369. const auto structType = pb.NewStructType({
  370. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  371. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  372. });
  373. const auto list1 = pb.NewList(structType, {
  374. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  375. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  376. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  377. });
  378. const auto list2 = pb.NewList(structType, {
  379. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  380. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  381. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  382. });
  383. const auto dict2 = pb.ToHashedDict(list2, true,
  384. [&](TRuntimeNode item) {
  385. return pb.Member(item, "Key");
  386. },
  387. [&](TRuntimeNode item) {
  388. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  389. });
  390. const auto resultType = pb.NewFlowType(pb.NewStructType({
  391. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  392. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  393. }));
  394. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::LeftOnly, {0U}, {1U, 1U, 0U, 0U}, {}, resultType));
  395. const auto graph = setup.BuildGraph(pgmReturn);
  396. const auto iterator = graph->GetValue().GetListIterator();
  397. NUdf::TUnboxedValue tuple;
  398. UNIT_ASSERT(iterator.Next(tuple));
  399. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(0).Get<ui32>(), 1);
  400. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "A");
  401. UNIT_ASSERT(!iterator.Next(tuple));
  402. UNIT_ASSERT(!iterator.Next(tuple));
  403. }
  404. }
  405. Y_UNIT_TEST_LLVM(TestLeftSemiWithNullKey) {
  406. for (ui32 pass = 0; pass < 1; ++pass) {
  407. TSetup<LLVM> setup;
  408. TProgramBuilder& pb = *setup.PgmBuilder;
  409. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  410. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  411. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  412. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  413. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  414. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  415. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  416. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  417. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  418. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  419. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  420. const auto structType = pb.NewStructType({
  421. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id, true)},
  422. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  423. });
  424. const auto list1 = pb.NewList(structType, {
  425. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key0), "Payload", payload4),
  426. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  427. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  428. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  429. });
  430. const auto list2 = pb.NewList(structType, {
  431. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key0), "Payload", payload3),
  432. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  433. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  434. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  435. });
  436. const auto dict2 = pb.ToHashedDict(list2, true,
  437. [&](TRuntimeNode item) {
  438. return pb.Member(item, "Key");
  439. },
  440. [&](TRuntimeNode item) {
  441. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  442. });
  443. const auto resultType = pb.NewFlowType(pb.NewStructType({
  444. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  445. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  446. }));
  447. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::LeftSemi, {0U}, {1U, 1U, 0U, 0U}, {}, resultType));
  448. const auto graph = setup.BuildGraph(pgmReturn);
  449. const auto iterator = graph->GetValue().GetListIterator();
  450. NUdf::TUnboxedValue tuple;
  451. UNIT_ASSERT(iterator.Next(tuple));
  452. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(0).Get<ui32>(), 2);
  453. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "B");
  454. UNIT_ASSERT(iterator.Next(tuple));
  455. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(0).Get<ui32>(), 2);
  456. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "C");
  457. UNIT_ASSERT(!iterator.Next(tuple));
  458. UNIT_ASSERT(!iterator.Next(tuple));
  459. }
  460. }
  461. Y_UNIT_TEST_LLVM(TestLeftOnlyWithNullKey) {
  462. for (ui32 pass = 0; pass < 1; ++pass) {
  463. TSetup<LLVM> setup;
  464. TProgramBuilder& pb = *setup.PgmBuilder;
  465. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  466. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  467. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  468. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  469. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  470. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  471. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  472. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  473. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  474. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  475. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  476. const auto structType = pb.NewStructType({
  477. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id, true)},
  478. {"Payload", pb.NewDataType(NUdf::TDataType<char*>::Id)}
  479. });
  480. const auto list1 = pb.NewList(structType, {
  481. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key0), "Payload", payload4),
  482. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key1), "Payload", payload1),
  483. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload2),
  484. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload3)
  485. });
  486. const auto list2 = pb.NewList(structType, {
  487. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key0), "Payload", payload3),
  488. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key2), "Payload", payload4),
  489. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key3), "Payload", payload5),
  490. pb.AddMember(pb.AddMember(pb.NewEmptyStruct(), "Key", key4), "Payload", payload6)
  491. });
  492. const auto dict2 = pb.ToHashedDict(list2, true,
  493. [&](TRuntimeNode item) {
  494. return pb.Member(item, "Key");
  495. },
  496. [&](TRuntimeNode item) {
  497. return pb.AddMember(pb.NewEmptyStruct(), "Payload", pb.Member(item, "Payload"));
  498. });
  499. const auto resultType = pb.NewFlowType(pb.NewStructType({
  500. {"Left", pb.NewDataType(NUdf::TDataType<char*>::Id)},
  501. {"Key", pb.NewDataType(NUdf::TDataType<ui32>::Id)},
  502. }));
  503. const auto pgmReturn = pb.Collect(pb.MapJoinCore(pb.ToFlow(list1), dict2, EJoinKind::LeftOnly, {0U}, {1U, 1U, 0U, 0U}, {}, resultType));
  504. const auto graph = setup.BuildGraph(pgmReturn);
  505. const auto iterator = graph->GetValue().GetListIterator();
  506. NUdf::TUnboxedValue tuple;
  507. UNIT_ASSERT(iterator.Next(tuple));
  508. UNIT_ASSERT(!tuple.GetElement(0));
  509. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  510. UNIT_ASSERT(iterator.Next(tuple));
  511. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(0).Get<ui32>(), 1);
  512. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "A");
  513. UNIT_ASSERT(!iterator.Next(tuple));
  514. UNIT_ASSERT(!iterator.Next(tuple));
  515. }
  516. }
  517. }
  518. #if !defined(MKQL_RUNTIME_VERSION) || MKQL_RUNTIME_VERSION >= 18u
  519. Y_UNIT_TEST_SUITE(TMiniKQLWideMapJoinCoreTest) {
  520. Y_UNIT_TEST_LLVM(TestInner) {
  521. for (ui32 pass = 0; pass < 1; ++pass) {
  522. TSetup<LLVM> setup;
  523. TProgramBuilder& pb = *setup.PgmBuilder;
  524. const auto key1 = pb.NewDataLiteral<ui32>(1);
  525. const auto key2 = pb.NewDataLiteral<ui32>(2);
  526. const auto key3 = pb.NewDataLiteral<ui32>(4);
  527. const auto key4 = pb.NewDataLiteral<ui32>(4);
  528. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  529. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  530. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  531. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  532. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  533. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  534. const auto tupleType = pb.NewTupleType({
  535. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  536. pb.NewDataType(NUdf::TDataType<char*>::Id)
  537. });
  538. const auto list1 = pb.NewList(tupleType, {
  539. pb.NewTuple({key1, payload1}),
  540. pb.NewTuple({key2, payload2}),
  541. pb.NewTuple({key3, payload3})
  542. });
  543. const auto list2 = pb.NewList(tupleType, {
  544. pb.NewTuple({key2, payload4}),
  545. pb.NewTuple({key3, payload5}),
  546. pb.NewTuple({key4, payload6})
  547. });
  548. const auto dict2 = pb.ToHashedDict(list2, false,
  549. [&](TRuntimeNode item) {
  550. return pb.Nth(item, 0U);
  551. },
  552. [&](TRuntimeNode item) {
  553. return pb.NewTuple({pb.Nth(item, 1U)});
  554. });
  555. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  556. pb.NewDataType(NUdf::TDataType<char*>::Id),
  557. pb.NewDataType(NUdf::TDataType<char*>::Id)
  558. }));
  559. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  560. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  561. dict2, EJoinKind::Inner, {0U}, {1U, 0U}, {0U, 1U}, resultType),
  562. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  563. );
  564. const auto graph = setup.BuildGraph(pgmReturn);
  565. const auto iterator = graph->GetValue().GetListIterator();
  566. NUdf::TUnboxedValue tuple;
  567. UNIT_ASSERT(iterator.Next(tuple));
  568. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  569. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  570. UNIT_ASSERT(iterator.Next(tuple));
  571. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  572. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  573. UNIT_ASSERT(!iterator.Next(tuple));
  574. UNIT_ASSERT(!iterator.Next(tuple));
  575. }
  576. }
  577. Y_UNIT_TEST_LLVM(TestInnerMulti) {
  578. for (ui32 pass = 0; pass < 1; ++pass) {
  579. TSetup<LLVM> setup;
  580. TProgramBuilder& pb = *setup.PgmBuilder;
  581. const auto key1 = pb.NewDataLiteral<ui32>(1);
  582. const auto key2 = pb.NewDataLiteral<ui32>(2);
  583. const auto key3 = pb.NewDataLiteral<ui32>(2);
  584. const auto key4 = pb.NewDataLiteral<ui32>(3);
  585. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  586. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  587. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  588. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  589. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  590. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  591. const auto tupleType = pb.NewTupleType({
  592. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  593. pb.NewDataType(NUdf::TDataType<char*>::Id)
  594. });
  595. const auto list1 = pb.NewList(tupleType, {
  596. pb.NewTuple({key1, payload1}),
  597. pb.NewTuple({key2, payload2}),
  598. pb.NewTuple({key3, payload3})
  599. });
  600. const auto list2 = pb.NewList(tupleType, {
  601. pb.NewTuple({key2, payload4}),
  602. pb.NewTuple({key3, payload5}),
  603. pb.NewTuple({key4, payload6})
  604. });
  605. const auto dict2 = pb.ToHashedDict(list2, true,
  606. [&](TRuntimeNode item) {
  607. return pb.Nth(item, 0U);
  608. },
  609. [&](TRuntimeNode item) {
  610. return pb.NewTuple({pb.Nth(item, 1U)});
  611. });
  612. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  613. pb.NewDataType(NUdf::TDataType<char*>::Id),
  614. pb.NewDataType(NUdf::TDataType<char*>::Id)
  615. }));
  616. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  617. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  618. dict2, EJoinKind::Inner, {0U}, {1U, 0U}, {0U, 1U}, resultType),
  619. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  620. );
  621. const auto graph = setup.BuildGraph(pgmReturn);
  622. const auto iterator = graph->GetValue().GetListIterator();
  623. NUdf::TUnboxedValue tuple;
  624. UNIT_ASSERT(iterator.Next(tuple));
  625. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  626. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  627. UNIT_ASSERT(iterator.Next(tuple));
  628. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  629. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  630. UNIT_ASSERT(iterator.Next(tuple));
  631. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  632. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  633. UNIT_ASSERT(iterator.Next(tuple));
  634. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  635. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  636. UNIT_ASSERT(!iterator.Next(tuple));
  637. UNIT_ASSERT(!iterator.Next(tuple));
  638. }
  639. }
  640. Y_UNIT_TEST_LLVM(TestLeft) {
  641. for (ui32 pass = 0; pass < 1; ++pass) {
  642. TSetup<LLVM> setup;
  643. TProgramBuilder& pb = *setup.PgmBuilder;
  644. const auto key1 = pb.NewDataLiteral<ui32>(1);
  645. const auto key2 = pb.NewDataLiteral<ui32>(2);
  646. const auto key3 = pb.NewDataLiteral<ui32>(3);
  647. const auto key4 = pb.NewDataLiteral<ui32>(4);
  648. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  649. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  650. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  651. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  652. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  653. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  654. const auto tupleType = pb.NewTupleType({
  655. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  656. pb.NewDataType(NUdf::TDataType<char*>::Id)
  657. });
  658. const auto list1 = pb.NewList(tupleType, {
  659. pb.NewTuple({key1, payload1}),
  660. pb.NewTuple({key2, payload2}),
  661. pb.NewTuple({key3, payload3})
  662. });
  663. const auto list2 = pb.NewList(tupleType, {
  664. pb.NewTuple({key2, payload4}),
  665. pb.NewTuple({key3, payload5}),
  666. pb.NewTuple({key4, payload6})
  667. });
  668. const auto dict2 = pb.ToHashedDict(list2, false,
  669. [&](TRuntimeNode item) {
  670. return pb.Nth(item, 0U);
  671. },
  672. [&](TRuntimeNode item) {
  673. return pb.NewTuple({pb.Nth(item, 1U)});
  674. });
  675. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  676. pb.NewDataType(NUdf::TDataType<char*>::Id),
  677. pb.NewDataType(NUdf::TDataType<char*>::Id)
  678. }));
  679. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  680. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  681. dict2, EJoinKind::Left, {0U}, {1U, 0U}, {0U, 1U}, resultType),
  682. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  683. );
  684. const auto graph = setup.BuildGraph(pgmReturn);
  685. const auto iterator = graph->GetValue().GetListIterator();
  686. NUdf::TUnboxedValue tuple;
  687. UNIT_ASSERT(iterator.Next(tuple));
  688. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
  689. UNIT_ASSERT(!tuple.GetElement(1));
  690. UNIT_ASSERT(iterator.Next(tuple));
  691. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  692. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  693. UNIT_ASSERT(iterator.Next(tuple));
  694. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  695. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  696. UNIT_ASSERT(!iterator.Next(tuple));
  697. UNIT_ASSERT(!iterator.Next(tuple));
  698. }
  699. }
  700. Y_UNIT_TEST_LLVM(TestLeftMulti) {
  701. for (ui32 pass = 0; pass < 1; ++pass) {
  702. TSetup<LLVM> setup;
  703. TProgramBuilder& pb = *setup.PgmBuilder;
  704. const auto key1 = pb.NewDataLiteral<ui32>(1);
  705. const auto key2 = pb.NewDataLiteral<ui32>(2);
  706. const auto key3 = pb.NewDataLiteral<ui32>(2);
  707. const auto key4 = pb.NewDataLiteral<ui32>(3);
  708. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  709. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  710. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  711. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  712. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  713. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  714. const auto tupleType = pb.NewTupleType({
  715. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  716. pb.NewDataType(NUdf::TDataType<char*>::Id)
  717. });
  718. const auto list1 = pb.NewList(tupleType, {
  719. pb.NewTuple({key1, payload1}),
  720. pb.NewTuple({key2, payload2}),
  721. pb.NewTuple({key3, payload3})
  722. });
  723. const auto list2 = pb.NewList(tupleType, {
  724. pb.NewTuple({key2, payload4}),
  725. pb.NewTuple({key3, payload5}),
  726. pb.NewTuple({key4, payload6})
  727. });
  728. const auto dict2 = pb.ToHashedDict(list2, true,
  729. [&](TRuntimeNode item) {
  730. return pb.Nth(item, 0U);
  731. },
  732. [&](TRuntimeNode item) {
  733. return pb.NewTuple({pb.Nth(item, 1U)});
  734. });
  735. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  736. pb.NewDataType(NUdf::TDataType<char*>::Id),
  737. pb.NewDataType(NUdf::TDataType<char*>::Id)
  738. }));
  739. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  740. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  741. dict2, EJoinKind::Left, {0U}, {1U, 0U}, {0U, 1U}, resultType),
  742. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  743. );
  744. const auto graph = setup.BuildGraph(pgmReturn);
  745. const auto iterator = graph->GetValue().GetListIterator();
  746. NUdf::TUnboxedValue tuple;
  747. UNIT_ASSERT(iterator.Next(tuple));
  748. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
  749. UNIT_ASSERT(!tuple.GetElement(1));
  750. UNIT_ASSERT(iterator.Next(tuple));
  751. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  752. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  753. UNIT_ASSERT(iterator.Next(tuple));
  754. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  755. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  756. UNIT_ASSERT(iterator.Next(tuple));
  757. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  758. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "X");
  759. UNIT_ASSERT(iterator.Next(tuple));
  760. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  761. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(1), "Y");
  762. UNIT_ASSERT(!iterator.Next(tuple));
  763. UNIT_ASSERT(!iterator.Next(tuple));
  764. }
  765. }
  766. Y_UNIT_TEST_LLVM(TestLeftSemi) {
  767. for (ui32 pass = 0; pass < 1; ++pass) {
  768. TSetup<LLVM> setup;
  769. TProgramBuilder& pb = *setup.PgmBuilder;
  770. const auto key1 = pb.NewDataLiteral<ui32>(1);
  771. const auto key2 = pb.NewDataLiteral<ui32>(2);
  772. const auto key3 = pb.NewDataLiteral<ui32>(2);
  773. const auto key4 = pb.NewDataLiteral<ui32>(3);
  774. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  775. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  776. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  777. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  778. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  779. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  780. const auto tupleType = pb.NewTupleType({
  781. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  782. pb.NewDataType(NUdf::TDataType<char*>::Id)
  783. });
  784. const auto list1 = pb.NewList(tupleType, {
  785. pb.NewTuple({key1, payload1}),
  786. pb.NewTuple({key2, payload2}),
  787. pb.NewTuple({key3, payload3})
  788. });
  789. const auto list2 = pb.NewList(tupleType, {
  790. pb.NewTuple({key2, payload4}),
  791. pb.NewTuple({key3, payload5}),
  792. pb.NewTuple({key4, payload6})
  793. });
  794. const auto dict2 = pb.ToHashedDict(list2, true,
  795. [&](TRuntimeNode item) {
  796. return pb.Nth(item, 0U);
  797. },
  798. [&](TRuntimeNode item) {
  799. return pb.NewTuple({pb.Nth(item, 1U)});
  800. });
  801. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  802. pb.NewDataType(NUdf::TDataType<char*>::Id),
  803. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  804. }));
  805. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  806. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  807. dict2, EJoinKind::LeftSemi, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  808. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  809. );
  810. const auto graph = setup.BuildGraph(pgmReturn);
  811. const auto iterator = graph->GetValue().GetListIterator();
  812. NUdf::TUnboxedValue tuple;
  813. UNIT_ASSERT(iterator.Next(tuple));
  814. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  815. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 2);
  816. UNIT_ASSERT(iterator.Next(tuple));
  817. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  818. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 2);
  819. UNIT_ASSERT(!iterator.Next(tuple));
  820. UNIT_ASSERT(!iterator.Next(tuple));
  821. }
  822. }
  823. Y_UNIT_TEST_LLVM(TestLeftOnly) {
  824. for (ui32 pass = 0; pass < 1; ++pass) {
  825. TSetup<LLVM> setup;
  826. TProgramBuilder& pb = *setup.PgmBuilder;
  827. const auto key1 = pb.NewDataLiteral<ui32>(1);
  828. const auto key2 = pb.NewDataLiteral<ui32>(2);
  829. const auto key3 = pb.NewDataLiteral<ui32>(2);
  830. const auto key4 = pb.NewDataLiteral<ui32>(3);
  831. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  832. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  833. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  834. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  835. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  836. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  837. const auto tupleType = pb.NewTupleType({
  838. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  839. pb.NewDataType(NUdf::TDataType<char*>::Id)
  840. });
  841. const auto list1 = pb.NewList(tupleType, {
  842. pb.NewTuple({key1, payload1}),
  843. pb.NewTuple({key2, payload2}),
  844. pb.NewTuple({key3, payload3})
  845. });
  846. const auto list2 = pb.NewList(tupleType, {
  847. pb.NewTuple({key2, payload4}),
  848. pb.NewTuple({key3, payload5}),
  849. pb.NewTuple({key4, payload6})
  850. });
  851. const auto dict2 = pb.ToHashedDict(list2, true,
  852. [&](TRuntimeNode item) {
  853. return pb.Nth(item, 0U);
  854. },
  855. [&](TRuntimeNode item) {
  856. return pb.NewTuple({pb.Nth(item, 1U)});
  857. });
  858. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  859. pb.NewDataType(NUdf::TDataType<char*>::Id),
  860. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  861. }));
  862. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  863. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  864. dict2, EJoinKind::LeftOnly, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  865. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  866. );
  867. const auto graph = setup.BuildGraph(pgmReturn);
  868. const auto iterator = graph->GetValue().GetListIterator();
  869. NUdf::TUnboxedValue tuple;
  870. UNIT_ASSERT(iterator.Next(tuple));
  871. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
  872. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 1);
  873. UNIT_ASSERT(!iterator.Next(tuple));
  874. UNIT_ASSERT(!iterator.Next(tuple));
  875. }
  876. }
  877. Y_UNIT_TEST_LLVM(TestLeftSemiWithNullKey) {
  878. for (ui32 pass = 0; pass < 1; ++pass) {
  879. TSetup<LLVM> setup;
  880. TProgramBuilder& pb = *setup.PgmBuilder;
  881. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  882. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  883. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  884. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  885. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  886. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  887. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  888. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  889. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  890. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  891. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  892. const auto tupleType = pb.NewTupleType({
  893. pb.NewDataType(NUdf::TDataType<ui32>::Id, true),
  894. pb.NewDataType(NUdf::TDataType<char*>::Id)
  895. });
  896. const auto list1 = pb.NewList(tupleType, {
  897. pb.NewTuple({key0, payload4}),
  898. pb.NewTuple({key1, payload1}),
  899. pb.NewTuple({key2, payload2}),
  900. pb.NewTuple({key3, payload3})
  901. });
  902. const auto list2 = pb.NewList(tupleType, {
  903. pb.NewTuple({key0, payload3}),
  904. pb.NewTuple({key2, payload4}),
  905. pb.NewTuple({key3, payload5}),
  906. pb.NewTuple({key4, payload6})
  907. });
  908. const auto dict2 = pb.ToHashedDict(list2, true,
  909. [&](TRuntimeNode item) {
  910. return pb.Nth(item, 0U);
  911. },
  912. [&](TRuntimeNode item) {
  913. return pb.NewTuple({pb.Nth(item, 1U)});
  914. });
  915. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  916. pb.NewDataType(NUdf::TDataType<char*>::Id),
  917. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  918. }));
  919. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  920. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  921. dict2, EJoinKind::LeftSemi, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  922. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  923. );
  924. const auto graph = setup.BuildGraph(pgmReturn);
  925. const auto iterator = graph->GetValue().GetListIterator();
  926. NUdf::TUnboxedValue tuple;
  927. UNIT_ASSERT(iterator.Next(tuple));
  928. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "B");
  929. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 2);
  930. UNIT_ASSERT(iterator.Next(tuple));
  931. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "C");
  932. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 2);
  933. UNIT_ASSERT(!iterator.Next(tuple));
  934. UNIT_ASSERT(!iterator.Next(tuple));
  935. }
  936. }
  937. Y_UNIT_TEST_LLVM(TestLeftOnlyWithNullKey) {
  938. for (ui32 pass = 0; pass < 1; ++pass) {
  939. TSetup<LLVM> setup;
  940. TProgramBuilder& pb = *setup.PgmBuilder;
  941. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  942. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  943. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  944. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  945. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  946. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  947. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  948. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  949. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  950. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  951. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  952. const auto tupleType = pb.NewTupleType({
  953. pb.NewDataType(NUdf::TDataType<ui32>::Id, true),
  954. pb.NewDataType(NUdf::TDataType<char*>::Id)
  955. });
  956. const auto list1 = pb.NewList(tupleType, {
  957. pb.NewTuple({key0, payload4}),
  958. pb.NewTuple({key1, payload1}),
  959. pb.NewTuple({key2, payload2}),
  960. pb.NewTuple({key3, payload3})
  961. });
  962. const auto list2 = pb.NewList(tupleType, {
  963. pb.NewTuple({key0, payload3}),
  964. pb.NewTuple({key2, payload4}),
  965. pb.NewTuple({key3, payload5}),
  966. pb.NewTuple({key4, payload6})
  967. });
  968. const auto dict2 = pb.ToHashedDict(list2, true,
  969. [&](TRuntimeNode item) {
  970. return pb.Nth(item, 0U);
  971. },
  972. [&](TRuntimeNode item) {
  973. return pb.NewTuple({pb.Nth(item, 1U)});
  974. });
  975. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  976. pb.NewDataType(NUdf::TDataType<char*>::Id),
  977. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  978. }));
  979. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.MapJoinCore(pb.ExpandMap(pb.ToFlow(list1),
  980. [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  981. dict2, EJoinKind::LeftOnly, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  982. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  983. );
  984. const auto graph = setup.BuildGraph(pgmReturn);
  985. const auto iterator = graph->GetValue().GetListIterator();
  986. NUdf::TUnboxedValue tuple;
  987. UNIT_ASSERT(iterator.Next(tuple));
  988. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "X");
  989. UNIT_ASSERT(!tuple.GetElement(1));
  990. UNIT_ASSERT(iterator.Next(tuple));
  991. UNBOXED_VALUE_STR_EQUAL(tuple.GetElement(0), "A");
  992. UNIT_ASSERT_VALUES_EQUAL(tuple.GetElement(1).Get<ui32>(), 1);
  993. UNIT_ASSERT(!iterator.Next(tuple));
  994. UNIT_ASSERT(!iterator.Next(tuple));
  995. }
  996. }
  997. }
  998. #endif
  999. }
  1000. }