mkql_grace_join_ut.cpp 112 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639
  1. #include "mkql_computation_node_ut.h"
  2. #include <yql/essentials/minikql/mkql_runtime_version.h>
  3. #include <yql/essentials/minikql/comp_nodes/mkql_grace_join_imp.h>
  4. #include <yql/essentials/minikql/computation/mock_spiller_factory_ut.h>
  5. #include <chrono>
  6. #include <iostream>
  7. #include <cstring>
  8. #include <vector>
  9. #include <cassert>
  10. #include <cstdlib>
  11. #include <stdlib.h>
  12. #include <random>
  13. #include <util/system/compiler.h>
  14. #include <util/stream/null.h>
  15. #include <util/system/mem_info.h>
  16. #include <cstdint>
  17. namespace NKikimr {
  18. namespace NMiniKQL {
  19. constexpr bool IsVerbose = false;
  20. #define CTEST (IsVerbose ? Cerr : Cnull)
  21. Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinMemTest) {
  22. Y_UNIT_TEST(TestMem1) {
  23. const ui64 TupleSize = 1024;
  24. const ui64 NBuckets = 128;
  25. const ui64 NTuples = 100000;
  26. const ui64 BucketSize = (2* NTuples * (TupleSize + 1) ) / NBuckets;
  27. ui64 *bigTuple = (ui64 * ) malloc(TupleSize * sizeof(ui64));
  28. ui64 *buckets[NBuckets];
  29. ui64 tuplesPos[NBuckets];
  30. std::mt19937_64 rng;
  31. std::uniform_int_distribution<ui64> dist(0, 10000 - 1);
  32. for (ui64 i = 0; i < TupleSize; i++)
  33. {
  34. bigTuple[i] = dist(rng);
  35. }
  36. ui64 bucket = 0;
  37. ui64 milliseconds = 0;
  38. const ui64 BitsForData = 30;
  39. char* a = (char * )malloc(1 << BitsForData);
  40. char* b = (char *) malloc(1 << BitsForData);
  41. UNIT_ASSERT(a);
  42. UNIT_ASSERT(b);
  43. memset(a, 1, 1 << BitsForData);
  44. memset(b, 2, 1 << BitsForData);
  45. std::chrono::steady_clock::time_point begin01 = std::chrono::steady_clock::now();
  46. memcpy(b, a, 1 << BitsForData);
  47. std::chrono::steady_clock::time_point end01 = std::chrono::steady_clock::now();
  48. UNIT_ASSERT(*a == 1);
  49. UNIT_ASSERT(*b == 1);
  50. Y_DO_NOT_OPTIMIZE_AWAY(a);
  51. Y_DO_NOT_OPTIMIZE_AWAY(b);
  52. ui64 microseconds = std::chrono::duration_cast<std::chrono::microseconds>(end01 - begin01).count();
  53. CTEST << "Time for memcpy = " << microseconds << "[microseconds]" << Endl;
  54. CTEST << "Data size = " << (1<<BitsForData) / (1024 * 1024) << "[MB]" << Endl;
  55. CTEST << "Memcpy speed = " << ( (1<<BitsForData) ) / (microseconds) << "MB/sec" << Endl;
  56. CTEST << Endl;
  57. std::vector<std::vector<ui64>> vec_buckets;
  58. vec_buckets.resize(NBuckets);
  59. for (ui64 i = 0; i < NBuckets; i++)
  60. {
  61. vec_buckets[i].resize(2 * TupleSize * NTuples / (NBuckets - 1), 0);
  62. vec_buckets[i].clear();
  63. // vec_buckets[i].reserve( 2 * TupleSize * NTuples / (NBuckets - 1));
  64. }
  65. for (ui64 i = 0; i < NBuckets; i++) {
  66. buckets[i] = (ui64 * ) malloc( (BucketSize * sizeof(ui64) * 32) / 32);
  67. memset( buckets[i], 1, (BucketSize * sizeof(ui64) * 32) / 32);
  68. tuplesPos[i] = 0;
  69. }
  70. std::chrono::steady_clock::time_point begin02 = std::chrono::steady_clock::now();
  71. std::uniform_int_distribution<ui64> bucketDist(0, NBuckets - 1);
  72. for (ui64 i = 0; i < NTuples; i++)
  73. {
  74. bucket = i % NBuckets;
  75. // bucket = bucketDist(rng);
  76. std::vector<ui64> &curr_vec = vec_buckets[bucket];
  77. curr_vec.insert(curr_vec.end(), bigTuple, bigTuple + TupleSize);
  78. }
  79. std::chrono::steady_clock::time_point end02 = std::chrono::steady_clock::now();
  80. milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end02 - begin02).count();
  81. CTEST << "Time for std::insert = " << milliseconds << "[ms]" << Endl;
  82. CTEST << "Total MB = " << (TupleSize * NTuples * sizeof(ui64) / (1024 * 1024)) << Endl;
  83. CTEST << "std::insert speed = " << (TupleSize * NTuples * sizeof(ui64) * 1000) / (milliseconds * 1024 * 1024) << "MB/sec" << Endl;
  84. CTEST << Endl;
  85. std::chrono::steady_clock::time_point begin03 = std::chrono::steady_clock::now();
  86. for (ui64 i = 0; i < NTuples; i++)
  87. {
  88. bucket = i % NBuckets;
  89. // bucket = bucketDist(rng);
  90. ui64 * dst = buckets[bucket] + tuplesPos[bucket];
  91. std::memcpy(dst, bigTuple, TupleSize*sizeof(ui64));
  92. tuplesPos[bucket] += TupleSize;
  93. }
  94. std::chrono::steady_clock::time_point end03 = std::chrono::steady_clock::now();
  95. milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  96. CTEST << "Time for std::memcpy = " << milliseconds << "[ms]" << Endl;
  97. CTEST << "Total MB = " << (TupleSize * NTuples * sizeof(ui64) / (1024 * 1024)) << Endl;
  98. CTEST << "std:memcpy speed = " << (TupleSize * NTuples * sizeof(ui64) * 1000) / (milliseconds * 1024 * 1024) << "MB/sec" << Endl;
  99. CTEST << Endl;
  100. for (ui64 i = 0; i < NBuckets; i++) {
  101. tuplesPos[i] = 0;
  102. }
  103. std::chrono::steady_clock::time_point begin04 = std::chrono::steady_clock::now();
  104. for (ui64 i = 0; i < NTuples; i++)
  105. {
  106. bucket = bucketDist(rng);
  107. ui64 * dst = buckets[bucket] + tuplesPos[bucket];
  108. ui64 *dst1 = dst + 1;
  109. ui64 *dst2 = dst + 2;
  110. ui64 *dst3 = dst + 3;
  111. ui64 *src = bigTuple;
  112. ui64 *src1 = bigTuple + 1;
  113. ui64 *src2 = bigTuple + 2;
  114. ui64 *src3 = bigTuple + 3;
  115. for (ui64 i = 0; i < TupleSize; i += 4)
  116. {
  117. *dst++ = *src++;
  118. *dst1++ = *src1++;
  119. *dst2++ = *src2++;
  120. *dst3++ = *src3++;
  121. }
  122. tuplesPos[bucket] += TupleSize;
  123. }
  124. std::chrono::steady_clock::time_point end04 = std::chrono::steady_clock::now();
  125. milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end04 - begin04).count();
  126. CTEST << "Time for loop copy = " << milliseconds << "[ms]" << Endl;
  127. CTEST << "Total MB = " << (TupleSize * NTuples * sizeof(ui64) / (1024 * 1024)) << Endl;
  128. CTEST << "Loop copy speed = " << (TupleSize * NTuples * sizeof(ui64) * 1000) / (milliseconds * 1024 * 1024) << "MB/sec" << Endl;
  129. CTEST << Endl;
  130. for (ui64 i = 0; i < NBuckets; i++) {
  131. free(buckets[i]);
  132. }
  133. free(b);
  134. free(a);
  135. free(bigTuple);
  136. UNIT_ASSERT(true);
  137. }
  138. }
  139. Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinImpTest) {
  140. constexpr ui64 BigTableTuples = 600000;
  141. constexpr ui64 SmallTableTuples = 150000;
  142. constexpr ui64 BigTupleSize = 40;
  143. Y_UNIT_TEST_TWIN(TestTryToPreallocateMemoryForJoin, EXCEPTION) {
  144. TSetup<false> setup;
  145. ui64 tuple[11] = {0,1,2,3,4,5,6,7,8,9,10};
  146. ui32 strSizes[2] = {4, 4};
  147. char * strVals[] = {(char *)"aaaaa", (char *)"bbbb"};
  148. char * bigStrVal[] = {(char *)"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
  149. (char *)"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"};
  150. ui32 bigStrSize[2] = {151, 151};
  151. GraceJoin::TTable bigTable(1,1,1,1);
  152. GraceJoin::TTable smallTable(1,1,1,1);
  153. GraceJoin::TTable joinTable(1,1,1,1);
  154. const ui64 TupleSize = 1024;
  155. ui64 bigTuple[TupleSize];
  156. std::mt19937_64 rng; // deterministic PRNG
  157. std::uniform_int_distribution<ui64> dist(0, 10000 - 1);
  158. for (ui64 i = 0; i < TupleSize; i++) {
  159. bigTuple[i] = dist(rng);
  160. }
  161. std::uniform_int_distribution<ui64> smallDist(0, SmallTableTuples - 1);
  162. smallTable.AddTuple(tuple, bigStrVal, bigStrSize);
  163. for ( ui64 i = 0; i < SmallTableTuples + 1; i++) {
  164. tuple[1] = smallDist(rng);
  165. tuple[2] = tuple[1];
  166. smallTable.AddTuple(tuple, strVals, strSizes);
  167. }
  168. for ( ui64 i = 0; i < BigTableTuples; i++) {
  169. tuple[1] = smallDist(rng);
  170. tuple[2] = tuple[1];
  171. bigTable.AddTuple(tuple, strVals, strSizes);
  172. }
  173. ui64 allocationsCount = 0;
  174. if (EXCEPTION) {
  175. TlsAllocState->SetLimit(1);
  176. TlsAllocState->SetIncreaseMemoryLimitCallback([&allocationsCount](ui64, ui64 required) {
  177. // Preallocate memory for some buckets before fail
  178. if (allocationsCount++ > 5) {
  179. throw TMemoryLimitExceededException();
  180. }
  181. TlsAllocState->SetLimit(required);
  182. });
  183. }
  184. bool preallocationResult = joinTable.TryToPreallocateMemoryForJoin(smallTable, bigTable, EJoinKind::Inner, true, true);
  185. UNIT_ASSERT_EQUAL(preallocationResult, !EXCEPTION);
  186. }
  187. Y_UNIT_TEST_LLVM(TestImp1) {
  188. TSetup<LLVM> setup;
  189. ui64 tuple[11] = {0,1,2,3,4,5,6,7,8,9,10};
  190. ui32 strSizes[2] = {4, 4};
  191. char * strVals[] = {(char *)"aaaaa", (char *)"bbbb"};
  192. char * bigStrVal[] = {(char *)"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
  193. (char *)"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"};
  194. ui32 bigStrSize[2] = {151, 151};
  195. NMemInfo::TMemInfo mi = NMemInfo::GetMemInfo();
  196. CTEST << "Mem usage before tables tuples added (MB): " << mi.RSS / (1024 * 1024) << Endl;
  197. GraceJoin::TTable bigTable(1,1,1,1);
  198. GraceJoin::TTable smallTable(1,1,1,1);
  199. GraceJoin::TTable joinTable(1,1,1,1);
  200. std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
  201. const ui64 TupleSize = 1024;
  202. ui64 bigTuple[TupleSize];
  203. std::mt19937_64 rng; // deterministic PRNG
  204. std::uniform_int_distribution<ui64> dist(0, 10000 - 1);
  205. for (ui64 i = 0; i < TupleSize; i++) {
  206. bigTuple[i] = dist(rng);
  207. }
  208. ui64 milliseconds = 0;
  209. std::uniform_int_distribution<ui64> smallDist(0, SmallTableTuples - 1);
  210. std::chrono::steady_clock::time_point begin03 = std::chrono::steady_clock::now();
  211. smallTable.AddTuple(tuple, bigStrVal, bigStrSize);
  212. for ( ui64 i = 0; i < SmallTableTuples + 1; i++) {
  213. tuple[1] = smallDist(rng);
  214. tuple[2] = tuple[1];
  215. smallTable.AddTuple(tuple, strVals, strSizes);
  216. }
  217. for ( ui64 i = 0; i < BigTableTuples; i++) {
  218. tuple[1] = smallDist(rng);
  219. tuple[2] = tuple[1];
  220. bigTable.AddTuple(tuple, strVals, strSizes);
  221. }
  222. std::chrono::steady_clock::time_point end03 = std::chrono::steady_clock::now();
  223. milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  224. CTEST << "Time for hash = " << milliseconds << "[ms]" << Endl;
  225. CTEST << "Adding tuples speed: " << (BigTupleSize * (BigTableTuples + SmallTableTuples) * 1000) / ( milliseconds * 1024 * 1024) << "MB/sec" << Endl;
  226. CTEST << Endl;
  227. mi = NMemInfo::GetMemInfo();
  228. CTEST << "Mem usage after tables tuples added (MB): " << mi.RSS / (1024 * 1024) << Endl;
  229. bigTable.Clear();
  230. smallTable.Clear();
  231. begin03 = std::chrono::steady_clock::now();
  232. smallTable.AddTuple(tuple, bigStrVal, bigStrSize);
  233. for ( ui64 i = 0; i < SmallTableTuples + 1; i++) {
  234. tuple[1] = smallDist(rng);
  235. tuple[2] = tuple[1];
  236. smallTable.AddTuple(tuple, strVals, strSizes);
  237. }
  238. for ( ui64 i = 0; i < BigTableTuples; i++) {
  239. tuple[1] = smallDist(rng);
  240. tuple[2] = tuple[1];
  241. bigTable.AddTuple(tuple, strVals, strSizes);
  242. }
  243. end03 = std::chrono::steady_clock::now();
  244. milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  245. CTEST << "Time for hash = " << milliseconds << "[ms]" << Endl;
  246. CTEST << "Adding tuples speed: " << (BigTupleSize * (BigTableTuples + SmallTableTuples) * 1000) / ( milliseconds * 1024 * 1024) << "MB/sec" << Endl;
  247. CTEST << Endl;
  248. mi = NMemInfo::GetMemInfo();
  249. CTEST << "Mem usage after tables tuples added (MB): " << mi.RSS / (1024 * 1024) << Endl;
  250. std::vector<ui64> vals1, vals2;
  251. std::vector<char *> strVals1, strVals2;
  252. std::vector<ui32> strSizes1, strSizes2;
  253. GraceJoin::TupleData td1, td2;
  254. vals1.resize(100);
  255. vals2.resize(100);
  256. strVals1.resize(100);
  257. strVals2.resize(100);
  258. strSizes1.resize(100);
  259. strSizes2.resize(100);
  260. td1.IntColumns = vals1.data();
  261. td1.StrColumns = strVals1.data();
  262. td1.StrSizes = strSizes1.data();
  263. td2.IntColumns = vals2.data();
  264. td2.StrColumns = strVals2.data();
  265. td2.StrSizes = strSizes2.data();
  266. ui64 numBigTuples = 0;
  267. bigTable.ResetIterator();
  268. std::chrono::steady_clock::time_point begin04 = std::chrono::steady_clock::now();
  269. while(bigTable.NextTuple(td1)) { numBigTuples++; }
  270. CTEST << "Num of big tuples 1: " << numBigTuples << Endl;
  271. std::chrono::steady_clock::time_point end04 = std::chrono::steady_clock::now();
  272. CTEST << "Time for get 1 = " << std::chrono::duration_cast<std::chrono::milliseconds>(end04 - begin04).count() << "[ms]" << Endl;
  273. CTEST << Endl;
  274. numBigTuples = 0;
  275. bigTable.ResetIterator();
  276. std::chrono::steady_clock::time_point begin041 = std::chrono::steady_clock::now();
  277. while(bigTable.NextTuple(td2)) { numBigTuples++; }
  278. CTEST << "Num of big tuples 2: " << numBigTuples << Endl;
  279. std::chrono::steady_clock::time_point end041 = std::chrono::steady_clock::now();
  280. CTEST << "Time for get 2 = " << std::chrono::duration_cast<std::chrono::milliseconds>(end041 - begin041).count() << "[ms]" << Endl;
  281. CTEST << Endl;
  282. std::chrono::steady_clock::time_point begin05 = std::chrono::steady_clock::now();
  283. joinTable.Join(smallTable,bigTable);
  284. std::chrono::steady_clock::time_point end05 = std::chrono::steady_clock::now();
  285. CTEST << "Time for join = " << std::chrono::duration_cast<std::chrono::milliseconds>(end05 - begin05).count() << "[ms]" << Endl;
  286. CTEST << Endl;
  287. mi = NMemInfo::GetMemInfo();
  288. CTEST << "Mem usage after tables join (MB): " << mi.RSS / (1024 * 1024) << Endl;
  289. joinTable.ResetIterator();
  290. ui64 numJoinedTuples = 0;
  291. std::chrono::steady_clock::time_point begin042 = std::chrono::steady_clock::now();
  292. while(joinTable.NextJoinedData(td1, td2)) { numJoinedTuples++; }
  293. CTEST << "Num of joined tuples : " << numJoinedTuples << Endl;
  294. std::chrono::steady_clock::time_point end042 = std::chrono::steady_clock::now();
  295. CTEST << "Time for get joined tuples: = " << std::chrono::duration_cast<std::chrono::milliseconds>(end042 - begin042).count() << "[ms]" << Endl;
  296. CTEST << Endl;
  297. std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
  298. CTEST << "Time difference = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "[ms]" << Endl;
  299. CTEST << Endl;
  300. }
  301. Y_UNIT_TEST_LLVM(TestImp1Batch) {
  302. TSetup<LLVM> setup;
  303. ui64 tuple[11] = {0,1,2,3,4,5,6,7,8,9,10};
  304. ui32 strSizes[2] = {4, 4};
  305. char * strVals[] = {(char *)"aaaaa", (char *)"bbbb"};
  306. char * bigStrVal[] = {(char *)"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
  307. (char *)"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"};
  308. ui32 bigStrSize[2] = {151, 151};
  309. NMemInfo::TMemInfo mi = NMemInfo::GetMemInfo();
  310. CTEST << "Mem usage before tables tuples added (MB): " << mi.RSS / (1024 * 1024) << Endl;
  311. GraceJoin::TTable bigTable(1,1,1,1);
  312. GraceJoin::TTable smallTable(1,1,1,1);
  313. GraceJoin::TTable joinTable(1,1,1,1);
  314. std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
  315. const ui64 TupleSize = 1024;
  316. ui64 bigTuple[TupleSize];
  317. std::mt19937_64 rng; // deterministic PRNG
  318. std::uniform_int_distribution<ui64> dist(0, 10000 - 1);
  319. for (ui64 i = 0; i < TupleSize; i++) {
  320. bigTuple[i] = dist(rng);
  321. }
  322. ui64 millisecondsAdd = 0;
  323. ui64 millisecondsJoin = 0;
  324. ui64 millisecondsNextJoinTuple = 0;
  325. ui64 millisecondsNextTuple = 0;
  326. const ui64 BatchTuples = 100000;
  327. std::uniform_int_distribution<ui64> smallDist(0, SmallTableTuples - 1);
  328. {
  329. std::chrono::steady_clock::time_point begin03 = std::chrono::steady_clock::now();
  330. smallTable.AddTuple(tuple, bigStrVal, bigStrSize);
  331. for ( ui64 i = 0; i < SmallTableTuples + 1; i++) {
  332. tuple[1] = smallDist(rng);
  333. tuple[2] = tuple[1];
  334. smallTable.AddTuple(tuple, strVals, strSizes);
  335. }
  336. std::chrono::steady_clock::time_point end03 = std::chrono::steady_clock::now();
  337. millisecondsAdd += std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  338. }
  339. for ( ui64 pos = 0; pos < BigTableTuples; ) {
  340. std::chrono::steady_clock::time_point begin03 = std::chrono::steady_clock::now();
  341. ui64 limit = std::min(pos + BatchTuples, BigTableTuples);
  342. for (; pos < limit; ++pos) {
  343. tuple[1] = smallDist(rng);
  344. tuple[2] = tuple[1];
  345. bigTable.AddTuple(tuple, strVals, strSizes);
  346. }
  347. bigTable.Clear();
  348. std::chrono::steady_clock::time_point end03 = std::chrono::steady_clock::now();
  349. millisecondsAdd += std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  350. }
  351. CTEST << "Time for hash = " << millisecondsAdd << "[ms]" << Endl;
  352. CTEST << "Adding tuples speed: " << (BigTupleSize * (BigTableTuples + SmallTableTuples) * 1000) / ( millisecondsAdd * 1024 * 1024) << "MB/sec" << Endl;
  353. CTEST << Endl;
  354. mi = NMemInfo::GetMemInfo();
  355. CTEST << "Mem usage after tables tuples added (MB): " << mi.RSS / (1024 * 1024) << Endl;
  356. millisecondsAdd = 0;
  357. smallTable.Clear();
  358. {
  359. auto begin03 = std::chrono::steady_clock::now();
  360. smallTable.AddTuple(tuple, bigStrVal, bigStrSize);
  361. for ( ui64 i = 0; i < SmallTableTuples + 1; i++) {
  362. tuple[1] = smallDist(rng);
  363. tuple[2] = tuple[1];
  364. smallTable.AddTuple(tuple, strVals, strSizes);
  365. }
  366. auto end03 = std::chrono::steady_clock::now();
  367. millisecondsAdd += std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  368. }
  369. std::vector<ui64> vals1, vals2;
  370. std::vector<char *> strVals1, strVals2;
  371. std::vector<ui32> strSizes1, strSizes2;
  372. GraceJoin::TupleData td1, td2;
  373. vals1.resize(100);
  374. vals2.resize(100);
  375. strVals1.resize(100);
  376. strVals2.resize(100);
  377. strSizes1.resize(100);
  378. strSizes2.resize(100);
  379. td1.IntColumns = vals1.data();
  380. td1.StrColumns = strVals1.data();
  381. td1.StrSizes = strSizes1.data();
  382. td2.IntColumns = vals2.data();
  383. td2.StrColumns = strVals2.data();
  384. td2.StrSizes = strSizes2.data();
  385. ui64 numJoinedTuples = 0;
  386. ui64 numBigTuples = 0;
  387. for ( ui64 pos = 0; pos < BigTableTuples; ) {
  388. std::chrono::steady_clock::time_point begin03 = std::chrono::steady_clock::now();
  389. bigTable.Clear();
  390. ui64 limit = std::min(pos + BatchTuples, BigTableTuples);
  391. for (; pos < limit; ++pos) {
  392. tuple[1] = smallDist(rng);
  393. tuple[2] = tuple[1];
  394. bigTable.AddTuple(tuple, strVals, strSizes);
  395. }
  396. auto end03 = std::chrono::steady_clock::now();
  397. millisecondsAdd += std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  398. bigTable.ResetIterator();
  399. std::chrono::steady_clock::time_point begin04 = std::chrono::steady_clock::now();
  400. while(bigTable.NextTuple(td1)) { numBigTuples++; }
  401. std::chrono::steady_clock::time_point end04 = std::chrono::steady_clock::now();
  402. millisecondsNextTuple += std::chrono::duration_cast<std::chrono::milliseconds>(end04 - begin04).count();
  403. std::chrono::steady_clock::time_point begin05 = std::chrono::steady_clock::now();
  404. joinTable.Join(smallTable, bigTable, EJoinKind::Inner, false, pos < BigTableTuples);
  405. std::chrono::steady_clock::time_point end05 = std::chrono::steady_clock::now();
  406. millisecondsJoin += std::chrono::duration_cast<std::chrono::milliseconds>(end05 - begin05).count();
  407. joinTable.ResetIterator();
  408. std::chrono::steady_clock::time_point begin042 = std::chrono::steady_clock::now();
  409. while(joinTable.NextJoinedData(td1, td2)) { numJoinedTuples++; }
  410. std::chrono::steady_clock::time_point end042 = std::chrono::steady_clock::now();
  411. millisecondsNextJoinTuple += std::chrono::duration_cast<std::chrono::milliseconds>(end042 - begin042).count();
  412. }
  413. std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
  414. CTEST << "Num of big tuples 1: " << numBigTuples << Endl;
  415. CTEST << "Time for get 1 = " << millisecondsNextTuple << "[ms]" << Endl;
  416. CTEST << Endl;
  417. CTEST << "Time for join = " << millisecondsJoin << "[ms]" << Endl;
  418. CTEST << Endl;
  419. CTEST << "Time for get joined tuples: = " << millisecondsNextJoinTuple << "[ms]" << Endl;
  420. CTEST << Endl;
  421. mi = NMemInfo::GetMemInfo();
  422. CTEST << "Mem usage after tables add and join (MB): " << mi.RSS / (1024 * 1024) << Endl;
  423. CTEST << "Time for hash = " << millisecondsAdd << "[ms]" << Endl;
  424. CTEST << "Adding tuples speed: " << (BigTupleSize * (BigTableTuples + SmallTableTuples) * 1000) / ( millisecondsAdd * 1024 * 1024) << "MB/sec" << Endl;
  425. CTEST << Endl;
  426. CTEST << "Num of joined tuples : " << numJoinedTuples << Endl;
  427. CTEST << "Time difference = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "[ms]" << Endl;
  428. CTEST << Endl;
  429. }
  430. }
  431. Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinAnyTest) {
  432. Y_UNIT_TEST_LLVM(TestImp2) {
  433. TSetup<LLVM> setup;
  434. ui64 tuple[11] = {0,1,2,3,4,5,6,7,8,9,10};
  435. ui32 strSizes[2] = {4, 4};
  436. char * strVals[] = {(char *)"aaaaa", (char *)"bbbb"};
  437. char * bigStrVal[] = {(char *)"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
  438. (char *)"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"};
  439. ui32 bigStrSize[2] = {151, 151};
  440. GraceJoin::TTable bigTable (1,1,1,1,0,0,1, nullptr, true);
  441. GraceJoin::TTable smallTable(1,1,1,1,0,0,1, nullptr, true);
  442. GraceJoin::TTable joinTable (1,1,1,1,0,0,1, nullptr, true);
  443. std::mt19937_64 rng;
  444. std::uniform_int_distribution<ui64> dist(0, 10000 - 1);
  445. std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
  446. const ui64 TupleSize = 1024;
  447. ui64 bigTuple[TupleSize];
  448. for (ui64 i = 0; i < TupleSize; i++) {
  449. bigTuple[i] = dist(rng);
  450. }
  451. ui64 milliseconds = 0;
  452. const ui64 BigTableTuples = 600000;
  453. const ui64 SmallTableTuples = 150000;
  454. const ui64 BigTupleSize = 40;
  455. std::uniform_int_distribution<ui64> smallDist(0, SmallTableTuples - 1);
  456. std::chrono::steady_clock::time_point begin03 = std::chrono::steady_clock::now();
  457. smallTable.AddTuple(tuple, bigStrVal, bigStrSize);
  458. for ( ui64 i = 0; i < SmallTableTuples + 1; i++) {
  459. tuple[1] = i;
  460. tuple[2] = tuple[1];
  461. smallTable.AddTuple(tuple, strVals, strSizes);
  462. }
  463. for ( ui64 i = 0; i < BigTableTuples; i++) {
  464. tuple[1] = i % SmallTableTuples;
  465. tuple[2] = tuple[1];
  466. bigTable.AddTuple(tuple, strVals, strSizes);
  467. }
  468. std::chrono::steady_clock::time_point end03 = std::chrono::steady_clock::now();
  469. milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  470. CTEST << "Time for hash = " << milliseconds << "[ms]" << Endl;
  471. CTEST << "Adding tuples speed: " << (BigTupleSize * (BigTableTuples + SmallTableTuples) * 1000) / ( milliseconds * 1024 * 1024) << "MB/sec" << Endl;
  472. CTEST << Endl;
  473. std::vector<ui64> vals1, vals2;
  474. std::vector<char *> strVals1, strVals2;
  475. std::vector<ui32> strSizes1, strSizes2;
  476. GraceJoin::TupleData td1, td2;
  477. vals1.resize(100);
  478. vals2.resize(100);
  479. strVals1.resize(100);
  480. strVals2.resize(100);
  481. strSizes1.resize(100);
  482. strSizes2.resize(100);
  483. td1.IntColumns = vals1.data();
  484. td1.StrColumns = strVals1.data();
  485. td1.StrSizes = strSizes1.data();
  486. td2.IntColumns = vals2.data();
  487. td2.StrColumns = strVals2.data();
  488. td2.StrSizes = strSizes2.data();
  489. ui64 numBigTuples = 0;
  490. bigTable.ResetIterator();
  491. std::chrono::steady_clock::time_point begin04 = std::chrono::steady_clock::now();
  492. while(bigTable.NextTuple(td1)) { numBigTuples++; }
  493. CTEST << "Num of big tuples 1: " << numBigTuples << Endl;
  494. std::chrono::steady_clock::time_point end04 = std::chrono::steady_clock::now();
  495. CTEST << "Time for get 1 = " << std::chrono::duration_cast<std::chrono::milliseconds>(end04 - begin04).count() << "[ms]" << Endl;
  496. CTEST << Endl;
  497. numBigTuples = 0;
  498. bigTable.ResetIterator();
  499. std::chrono::steady_clock::time_point begin041 = std::chrono::steady_clock::now();
  500. while(bigTable.NextTuple(td2)) { numBigTuples++; }
  501. CTEST << "Num of big tuples 2: " << numBigTuples << Endl;
  502. std::chrono::steady_clock::time_point end041 = std::chrono::steady_clock::now();
  503. CTEST << "Time for get 2 = " << std::chrono::duration_cast<std::chrono::milliseconds>(end041 - begin041).count() << "[ms]" << Endl;
  504. CTEST << Endl;
  505. std::chrono::steady_clock::time_point begin05 = std::chrono::steady_clock::now();
  506. joinTable.Join(smallTable,bigTable);
  507. std::chrono::steady_clock::time_point end05 = std::chrono::steady_clock::now();
  508. CTEST << "Time for join = " << std::chrono::duration_cast<std::chrono::milliseconds>(end05 - begin05).count() << "[ms]" << Endl;
  509. CTEST << Endl;
  510. joinTable.ResetIterator();
  511. ui64 numJoinedTuples = 0;
  512. std::chrono::steady_clock::time_point begin042 = std::chrono::steady_clock::now();
  513. while(joinTable.NextJoinedData(td1, td2)) { numJoinedTuples++; }
  514. CTEST << "Num of joined tuples : " << numJoinedTuples << Endl;
  515. std::chrono::steady_clock::time_point end042 = std::chrono::steady_clock::now();
  516. CTEST << "Time for get joined tuples: = " << std::chrono::duration_cast<std::chrono::milliseconds>(end042 - begin042).count() << "[ms]" << Endl;
  517. CTEST << Endl;
  518. std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
  519. CTEST << "Time difference = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "[ms]" << Endl;
  520. CTEST << Endl;
  521. }
  522. }
  523. Y_UNIT_TEST_SUITE(TMiniKQLGraceSelfJoinTest) {
  524. Y_UNIT_TEST_LLVM(TestImp3) {
  525. TSetup<LLVM> setup;
  526. ui64 tuple[11] = {0,1,2,3,4,5,6,7,8,9,10};
  527. ui32 strSizes[2] = {4, 4};
  528. char * strVals[] = {(char *)"aaaaa", (char *)"bbbb"};
  529. char * bigStrVal[] = {(char *)"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
  530. (char *)"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"};
  531. ui32 bigStrSize[2] = {151, 151};
  532. GraceJoin::TTable bigTable (1,1,1,1,0,0,1, nullptr, false);
  533. GraceJoin::TTable smallTable(1,1,1,1,0,0,1, nullptr, false);
  534. GraceJoin::TTable joinTable (1,1,1,1,0,0,1, nullptr, false);
  535. std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
  536. const ui64 TupleSize = 1024;
  537. ui64 bigTuple[TupleSize];
  538. std::mt19937_64 rng;
  539. std::uniform_int_distribution<ui64> dist(0, 10000 - 1);
  540. for (ui64 i = 0; i < TupleSize; i++) {
  541. bigTuple[i] = dist(rng);
  542. }
  543. ui64 milliseconds = 0;
  544. const ui64 BigTableTuples = 600000;
  545. const ui64 SmallTableTuples = 150000;
  546. const ui64 BigTupleSize = 40;
  547. std::chrono::steady_clock::time_point begin03 = std::chrono::steady_clock::now();
  548. smallTable.AddTuple(tuple, bigStrVal, bigStrSize);
  549. for ( ui64 i = 0; i < SmallTableTuples + 1; i++) {
  550. tuple[1] = i;
  551. tuple[2] = tuple[1];
  552. smallTable.AddTuple(tuple, strVals, strSizes);
  553. }
  554. for ( ui64 i = 0; i < BigTableTuples; i++) {
  555. tuple[1] = i % SmallTableTuples;
  556. tuple[2] = tuple[1];
  557. bigTable.AddTuple(tuple, strVals, strSizes);
  558. }
  559. std::chrono::steady_clock::time_point end03 = std::chrono::steady_clock::now();
  560. milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end03 - begin03).count();
  561. CTEST << "Time for hash = " << milliseconds << "[ms]" << Endl;
  562. CTEST << "Adding tuples speed: " << (BigTupleSize * (BigTableTuples + SmallTableTuples) * 1000) / ( milliseconds * 1024 * 1024) << "MB/sec" << Endl;
  563. CTEST << Endl;
  564. std::vector<ui64> vals1, vals2;
  565. std::vector<char *> strVals1, strVals2;
  566. std::vector<ui32> strSizes1, strSizes2;
  567. GraceJoin::TupleData td1, td2;
  568. vals1.resize(100);
  569. vals2.resize(100);
  570. strVals1.resize(100);
  571. strVals2.resize(100);
  572. strSizes1.resize(100);
  573. strSizes2.resize(100);
  574. td1.IntColumns = vals1.data();
  575. td1.StrColumns = strVals1.data();
  576. td1.StrSizes = strSizes1.data();
  577. td2.IntColumns = vals2.data();
  578. td2.StrColumns = strVals2.data();
  579. td2.StrSizes = strSizes2.data();
  580. ui64 numBigTuples = 0;
  581. bigTable.ResetIterator();
  582. std::chrono::steady_clock::time_point begin04 = std::chrono::steady_clock::now();
  583. while(bigTable.NextTuple(td1)) { numBigTuples++; }
  584. CTEST << "Num of big tuples 1: " << numBigTuples << Endl;
  585. std::chrono::steady_clock::time_point end04 = std::chrono::steady_clock::now();
  586. CTEST << "Time for get 1 = " << std::chrono::duration_cast<std::chrono::milliseconds>(end04 - begin04).count() << "[ms]" << Endl;
  587. CTEST << Endl;
  588. numBigTuples = 0;
  589. bigTable.ResetIterator();
  590. std::chrono::steady_clock::time_point begin041 = std::chrono::steady_clock::now();
  591. while(bigTable.NextTuple(td2)) { numBigTuples++; }
  592. CTEST << "Num of big tuples 2: " << numBigTuples << Endl;
  593. std::chrono::steady_clock::time_point end041 = std::chrono::steady_clock::now();
  594. CTEST << "Time for get 2 = " << std::chrono::duration_cast<std::chrono::milliseconds>(end041 - begin041).count() << "[ms]" << Endl;
  595. CTEST << Endl;
  596. std::chrono::steady_clock::time_point begin05 = std::chrono::steady_clock::now();
  597. joinTable.Join(bigTable,bigTable);
  598. std::chrono::steady_clock::time_point end05 = std::chrono::steady_clock::now();
  599. CTEST << "Time for join = " << std::chrono::duration_cast<std::chrono::milliseconds>(end05 - begin05).count() << "[ms]" << Endl;
  600. CTEST << Endl;
  601. joinTable.ResetIterator();
  602. ui64 numJoinedTuples = 0;
  603. std::chrono::steady_clock::time_point begin042 = std::chrono::steady_clock::now();
  604. while(joinTable.NextJoinedData(td1, td2)) { numJoinedTuples++; }
  605. CTEST << "Num of joined tuples : " << numJoinedTuples << Endl;
  606. std::chrono::steady_clock::time_point end042 = std::chrono::steady_clock::now();
  607. CTEST << "Time for get joined tuples: = " << std::chrono::duration_cast<std::chrono::milliseconds>(end042 - begin042).count() << "[ms]" << Endl;
  608. CTEST << Endl;
  609. std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
  610. CTEST << "Time difference = " << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() << "[ms]" << Endl;
  611. CTEST << Endl;
  612. }
  613. }
  614. #if !defined(MKQL_RUNTIME_VERSION) || MKQL_RUNTIME_VERSION >= 40u
  615. Y_UNIT_TEST_SUITE(TMiniKQLSelfJoinTest) {
  616. Y_UNIT_TEST_LLVM_SPILLING(TestInner1) {
  617. if (SPILLING && RuntimeVersion < 50) return;
  618. for (ui32 pass = 0; pass < 1; ++pass) {
  619. TSetup<LLVM, SPILLING> setup;
  620. TProgramBuilder& pb = *setup.PgmBuilder;
  621. const auto key1 = pb.NewDataLiteral<ui32>(1);
  622. const auto key2 = pb.NewDataLiteral<ui32>(2);
  623. const auto key3 = pb.NewDataLiteral<ui32>(4);
  624. const auto key4 = pb.NewDataLiteral<ui32>(4);
  625. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  626. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  627. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  628. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  629. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  630. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  631. const auto tupleType = pb.NewTupleType({
  632. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  633. pb.NewDataType(NUdf::TDataType<char*>::Id)
  634. });
  635. const auto list1 = pb.NewList(tupleType, {
  636. pb.NewTuple({key1, payload1}),
  637. pb.NewTuple({key2, payload2}),
  638. pb.NewTuple({key3, payload3}),
  639. pb.NewTuple({key4, payload4})
  640. });
  641. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  642. pb.NewDataType(NUdf::TDataType<char*>::Id),
  643. pb.NewDataType(NUdf::TDataType<char*>::Id)
  644. }));
  645. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceSelfJoin(
  646. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  647. EJoinKind::Inner, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  648. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  649. );
  650. if (SPILLING) {
  651. setup.RenameCallable(pgmReturn, "GraceSelfJoin", "GraceSelfJoinWithSpilling");
  652. }
  653. const auto graph = setup.BuildGraph(pgmReturn);
  654. if (SPILLING) {
  655. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  656. }
  657. const auto iterator = graph->GetValue().GetListIterator();
  658. NUdf::TUnboxedValue tuple;
  659. std::map<std::pair<TString, TString>, ui32> u;
  660. while (iterator.Next(tuple)) {
  661. auto t0 = tuple.GetElement(0);
  662. auto t1 = tuple.GetElement(1);
  663. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  664. }
  665. UNIT_ASSERT(!iterator.Next(tuple));
  666. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("C"))], 1);
  667. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("X"))], 1);
  668. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("X"), TString("C"))], 1);
  669. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("X"), TString("X"))], 1);
  670. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("B"))], 1);
  671. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), TString("A"))], 1);
  672. UNIT_ASSERT_EQUAL(u.size(), 6);
  673. }
  674. }
  675. Y_UNIT_TEST_LLVM_SPILLING(TestDiffKeys) {
  676. if (SPILLING && RuntimeVersion < 50) return;
  677. for (ui32 pass = 0; pass < 1; ++pass) {
  678. TSetup<LLVM, SPILLING> setup;
  679. TProgramBuilder& pb = *setup.PgmBuilder;
  680. const auto key1 = pb.NewDataLiteral<ui32>(1);
  681. const auto key2 = pb.NewDataLiteral<ui32>(2);
  682. const auto key3 = pb.NewDataLiteral<ui32>(4);
  683. const auto key4 = pb.NewDataLiteral<ui32>(4);
  684. const auto key11 = pb.NewDataLiteral<ui32>(1);
  685. const auto key21 = pb.NewDataLiteral<ui32>(1);
  686. const auto key31 = pb.NewDataLiteral<ui32>(2);
  687. const auto key41 = pb.NewDataLiteral<ui32>(3);
  688. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  689. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  690. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  691. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  692. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  693. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  694. const auto tupleType = pb.NewTupleType({
  695. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  696. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  697. pb.NewDataType(NUdf::TDataType<char*>::Id)
  698. });
  699. const auto list1 = pb.NewList(tupleType, {
  700. pb.NewTuple({key1, key11, payload1}),
  701. pb.NewTuple({key2, key21, payload2}),
  702. pb.NewTuple({key3, key31, payload3}),
  703. pb.NewTuple({key4, key41, payload4})
  704. });
  705. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  706. pb.NewDataType(NUdf::TDataType<char*>::Id),
  707. pb.NewDataType(NUdf::TDataType<char*>::Id)
  708. }));
  709. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceSelfJoin(
  710. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }),
  711. EJoinKind::Inner, {0U}, {1U}, {2U, 0U}, {2U, 1U}, resultType),
  712. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  713. );
  714. if (SPILLING) {
  715. setup.RenameCallable(pgmReturn, "GraceSelfJoin", "GraceSelfJoinWithSpilling");
  716. }
  717. const auto graph = setup.BuildGraph(pgmReturn);
  718. if (SPILLING) {
  719. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  720. }
  721. const auto iterator = graph->GetValue().GetListIterator();
  722. NUdf::TUnboxedValue tuple;
  723. std::map<std::pair<TString, TString>, ui32> u;
  724. while (iterator.Next(tuple)) {
  725. auto t0 = tuple.GetElement(0);
  726. auto t1 = tuple.GetElement(1);
  727. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  728. }
  729. UNIT_ASSERT(!iterator.Next(tuple));
  730. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), TString("A"))], 1);
  731. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), TString("B"))], 1);
  732. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("C"))], 1);
  733. UNIT_ASSERT_EQUAL(u.size(), 3);
  734. }
  735. }
  736. }
  737. #endif
  738. Y_UNIT_TEST_SUITE(TMiniKQLGraceJoinTest) {
  739. Y_UNIT_TEST_LLVM_SPILLING(TestInner1) {
  740. if (SPILLING && RuntimeVersion < 50) return;
  741. for (ui32 pass = 0; pass < 1; ++pass) {
  742. TSetup<LLVM, SPILLING> setup;
  743. TProgramBuilder& pb = *setup.PgmBuilder;
  744. const auto key1 = pb.NewDataLiteral<ui32>(1);
  745. const auto key2 = pb.NewDataLiteral<ui32>(2);
  746. const auto key3 = pb.NewDataLiteral<ui32>(4);
  747. const auto key4 = pb.NewDataLiteral<ui32>(4);
  748. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  749. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  750. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  751. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  752. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  753. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  754. const auto tupleType = pb.NewTupleType({
  755. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  756. pb.NewDataType(NUdf::TDataType<char*>::Id)
  757. });
  758. const auto list1 = pb.NewList(tupleType, {
  759. pb.NewTuple({key1, payload1}),
  760. pb.NewTuple({key2, payload2}),
  761. pb.NewTuple({key3, payload3})
  762. });
  763. const auto list2 = pb.NewList(tupleType, {
  764. pb.NewTuple({key2, payload4}),
  765. pb.NewTuple({key3, payload5}),
  766. pb.NewTuple({key4, payload6})
  767. });
  768. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  769. pb.NewDataType(NUdf::TDataType<char*>::Id),
  770. pb.NewDataType(NUdf::TDataType<char*>::Id)
  771. }));
  772. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  773. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  774. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  775. EJoinKind::Inner, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  776. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  777. );
  778. if (SPILLING) {
  779. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  780. }
  781. const auto graph = setup.BuildGraph(pgmReturn);
  782. if (SPILLING) {
  783. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  784. }
  785. const auto iterator = graph->GetValue().GetListIterator();
  786. NUdf::TUnboxedValue tuple;
  787. std::map<std::pair<TString, TString>, ui32> u;
  788. while (iterator.Next(tuple)) {
  789. auto t0 = tuple.GetElement(0);
  790. auto t1 = tuple.GetElement(1);
  791. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  792. }
  793. UNIT_ASSERT(!iterator.Next(tuple));
  794. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  795. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Z"))], 1);
  796. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  797. UNIT_ASSERT_EQUAL(u.size(), 3);
  798. }
  799. }
  800. Y_UNIT_TEST_LLVM_SPILLING(TestInnerDoubleCondition1) {
  801. if (SPILLING && RuntimeVersion < 50) return;
  802. for (ui32 pass = 0; pass < 1; ++pass) {
  803. TSetup<LLVM, SPILLING> setup;
  804. TProgramBuilder& pb = *setup.PgmBuilder;
  805. const auto key1 = pb.NewDataLiteral<ui32>(1);
  806. const auto key2 = pb.NewDataLiteral<ui32>(2);
  807. const auto key3 = pb.NewDataLiteral<ui32>(4);
  808. const auto key4 = pb.NewDataLiteral<ui32>(4);
  809. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  810. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  811. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  812. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  813. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  814. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  815. const auto tupleType1 = pb.NewTupleType({
  816. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  817. pb.NewDataType(NUdf::TDataType<char*>::Id)
  818. });
  819. const auto tupleType2 = pb.NewTupleType({
  820. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  821. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  822. pb.NewDataType(NUdf::TDataType<char*>::Id)
  823. });
  824. const auto list1 = pb.NewList(tupleType1, {
  825. pb.NewTuple({key1, payload1}),
  826. pb.NewTuple({key2, payload2}),
  827. pb.NewTuple({key3, payload3})
  828. });
  829. const auto list2 = pb.NewList(tupleType2, {
  830. pb.NewTuple({key2, key2, payload4}),
  831. pb.NewTuple({key3, key2, payload5}),
  832. pb.NewTuple({key4, key1, payload6})
  833. });
  834. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  835. pb.NewDataType(NUdf::TDataType<char*>::Id),
  836. pb.NewDataType(NUdf::TDataType<char*>::Id)
  837. }));
  838. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  839. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  840. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }),
  841. EJoinKind::Inner, {0U, 0U}, {0U, 1U}, {1U, 0U}, {2U, 1U}, resultType),
  842. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  843. );
  844. if (SPILLING) {
  845. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  846. }
  847. const auto graph = setup.BuildGraph(pgmReturn);
  848. if (SPILLING) {
  849. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  850. }
  851. const auto iterator = graph->GetValue().GetListIterator();
  852. NUdf::TUnboxedValue tuple;
  853. std::map<std::pair<TString, TString>, ui32> u;
  854. while (iterator.Next(tuple)) {
  855. auto t0 = tuple.GetElement(0);
  856. auto t1 = tuple.GetElement(1);
  857. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  858. }
  859. UNIT_ASSERT(!iterator.Next(tuple));
  860. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  861. UNIT_ASSERT_EQUAL(u.size(), 1);
  862. }
  863. }
  864. Y_UNIT_TEST_LLVM_SPILLING(TestInnerManyKeyStrings) {
  865. if (SPILLING && RuntimeVersion < 50) return;
  866. for (ui32 pass = 0; pass < 1; ++pass) {
  867. TSetup<LLVM, SPILLING> setup;
  868. TProgramBuilder& pb = *setup.PgmBuilder;
  869. const auto key1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A1");
  870. const auto key2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A2");
  871. const auto key3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A3");
  872. const auto key4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B1");
  873. const auto key5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B2");
  874. const auto key6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B3");
  875. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  876. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  877. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  878. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  879. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  880. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  881. const auto tupleType1 = pb.NewTupleType({
  882. pb.NewDataType(NUdf::TDataType<char*>::Id),
  883. pb.NewDataType(NUdf::TDataType<char*>::Id),
  884. pb.NewDataType(NUdf::TDataType<char*>::Id)
  885. });
  886. const auto tupleType2 = pb.NewTupleType({
  887. pb.NewDataType(NUdf::TDataType<char*>::Id),
  888. pb.NewDataType(NUdf::TDataType<char*>::Id),
  889. pb.NewDataType(NUdf::TDataType<char*>::Id)
  890. });
  891. const auto list1 = pb.NewList(tupleType1, {
  892. pb.NewTuple({key1, key4, payload1}),
  893. pb.NewTuple({key2, key5, payload2}),
  894. pb.NewTuple({key3, key6, payload3})
  895. });
  896. const auto list2 = pb.NewList(tupleType2, {
  897. pb.NewTuple({key4, key1, payload4}),
  898. pb.NewTuple({key5, key2, payload5}),
  899. pb.NewTuple({key6, key6, payload6})
  900. });
  901. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  902. pb.NewDataType(NUdf::TDataType<char*>::Id),
  903. pb.NewDataType(NUdf::TDataType<char*>::Id)
  904. }));
  905. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  906. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }),
  907. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }),
  908. EJoinKind::Inner, {0U, 1U}, {1U, 0U}, {1U, 0U}, {2U, 1U}, resultType),
  909. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  910. );
  911. if (SPILLING) {
  912. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  913. }
  914. const auto graph = setup.BuildGraph(pgmReturn);
  915. if (SPILLING) {
  916. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  917. }
  918. const auto iterator = graph->GetValue().GetListIterator();
  919. NUdf::TUnboxedValue tuple;
  920. std::map<std::pair<TString, TString>, ui32> u;
  921. while (iterator.Next(tuple)) {
  922. auto t0 = tuple.GetElement(0);
  923. auto t1 = tuple.GetElement(1);
  924. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  925. }
  926. UNIT_ASSERT(!iterator.Next(tuple));
  927. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B2"), TString("Y"))], 1);
  928. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B1"), TString("X"))], 1);
  929. UNIT_ASSERT_EQUAL(u.size(), 2);
  930. }
  931. }
  932. Y_UNIT_TEST_LLVM_SPILLING(TestInnerManyKeyUuid) {
  933. if (SPILLING && RuntimeVersion < 50) return;
  934. for (ui32 pass = 0; pass < 1; ++pass) {
  935. TSetup<LLVM, SPILLING> setup;
  936. TProgramBuilder& pb = *setup.PgmBuilder;
  937. const auto key1 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("A1A1A1A1A1A1A1A1");
  938. const auto key2 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("A2A2A2A2A2A2A2A2");
  939. const auto key3 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("A3A3A3A3A3A3A3A3");
  940. const auto key4 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("B1B1B1B1B1B1B1B1");
  941. const auto key5 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("B2B2B2B2B2B2B2B2");
  942. const auto key6 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("B3B3B3B3B3B3B3B3");
  943. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("AAAAAAAAAAAAAAAA");
  944. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("BBBBBBBBBBBBBBBB");
  945. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("CCCCCCCCCCCCCCCC");
  946. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("XXXXXXXXXXXXXXXX");
  947. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("YYYYYYYYYYYYYYYY");
  948. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::Uuid>("ZZZZZZZZZZZZZZZZ");
  949. const auto tupleType1 = pb.NewTupleType({
  950. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id),
  951. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id),
  952. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id)
  953. });
  954. const auto tupleType2 = pb.NewTupleType({
  955. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id),
  956. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id),
  957. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id)
  958. });
  959. const auto list1 = pb.NewList(tupleType1, {
  960. pb.NewTuple({key1, key4, payload1}),
  961. pb.NewTuple({key2, key5, payload2}),
  962. pb.NewTuple({key3, key6, payload3})
  963. });
  964. const auto list2 = pb.NewList(tupleType2, {
  965. pb.NewTuple({key4, key1, payload4}),
  966. pb.NewTuple({key5, key2, payload5}),
  967. pb.NewTuple({key6, key6, payload6})
  968. });
  969. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  970. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id),
  971. pb.NewDataType(NUdf::TDataType<NUdf::TUuid>::Id)
  972. }));
  973. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  974. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }),
  975. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U), pb.Nth(item, 2U)}; }),
  976. EJoinKind::Inner, {0U, 1U}, {1U, 0U}, {1U, 0U}, {2U, 1U}, resultType),
  977. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  978. );
  979. if (SPILLING) {
  980. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  981. }
  982. const auto graph = setup.BuildGraph(pgmReturn);
  983. if (SPILLING) {
  984. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  985. }
  986. const auto iterator = graph->GetValue().GetListIterator();
  987. NUdf::TUnboxedValue tuple;
  988. std::map<std::pair<TString, TString>, ui32> u;
  989. while (iterator.Next(tuple)) {
  990. auto t0 = tuple.GetElement(0);
  991. auto t1 = tuple.GetElement(1);
  992. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  993. }
  994. UNIT_ASSERT(!iterator.Next(tuple));
  995. UNIT_ASSERT_EQUAL(u[std::make_pair(
  996. TString("B2B2B2B2B2B2B2B2"),
  997. TString("YYYYYYYYYYYYYYYY")
  998. )], 1);
  999. UNIT_ASSERT_EQUAL(u[std::make_pair(
  1000. TString("B1B1B1B1B1B1B1B1"),
  1001. TString("XXXXXXXXXXXXXXXX")
  1002. )], 1);
  1003. UNIT_ASSERT_EQUAL(u.size(), 2);
  1004. }
  1005. }
  1006. Y_UNIT_TEST_LLVM_SPILLING(TestInnerStringKey1) {
  1007. if (SPILLING && RuntimeVersion < 50) return;
  1008. for (ui32 pass = 0; pass < 1; ++pass) {
  1009. TSetup<LLVM, SPILLING> setup;
  1010. TProgramBuilder& pb = *setup.PgmBuilder;
  1011. const auto key1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("1");
  1012. const auto key2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("2");
  1013. const auto key3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("4");
  1014. const auto key4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("4");
  1015. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1016. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1017. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1018. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1019. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1020. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1021. const auto tupleType = pb.NewTupleType({
  1022. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1023. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1024. });
  1025. const auto list1 = pb.NewList(tupleType, {
  1026. pb.NewTuple({key1, payload1}),
  1027. pb.NewTuple({key2, payload2}),
  1028. pb.NewTuple({key3, payload3})
  1029. });
  1030. const auto list2 = pb.NewList(tupleType, {
  1031. pb.NewTuple({key2, payload4}),
  1032. pb.NewTuple({key3, payload5}),
  1033. pb.NewTuple({key4, payload6})
  1034. });
  1035. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1036. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1037. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1038. }));
  1039. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1040. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1041. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1042. EJoinKind::Inner, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1043. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1044. );
  1045. if (SPILLING) {
  1046. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1047. }
  1048. const auto graph = setup.BuildGraph(pgmReturn);
  1049. if (SPILLING) {
  1050. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1051. }
  1052. const auto iterator = graph->GetValue().GetListIterator();
  1053. NUdf::TUnboxedValue tuple;
  1054. std::map<std::pair<TString, TString>, ui32> u;
  1055. while (iterator.Next(tuple)) {
  1056. auto t0 = tuple.GetElement(0);
  1057. auto t1 = tuple.GetElement(1);
  1058. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  1059. }
  1060. UNIT_ASSERT(!iterator.Next(tuple));
  1061. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  1062. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  1063. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Z"))], 1);
  1064. UNIT_ASSERT_EQUAL(u.size(), 3);
  1065. }
  1066. }
  1067. Y_UNIT_TEST_LLVM_SPILLING(TMiniKQLGraceJoinTestInnerMulti1) {
  1068. if (SPILLING && RuntimeVersion < 50) return;
  1069. for (ui32 pass = 0; pass < 1; ++pass) {
  1070. TSetup<LLVM, SPILLING> setup;
  1071. TProgramBuilder& pb = *setup.PgmBuilder;
  1072. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1073. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1074. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1075. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1076. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1077. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1078. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1079. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1080. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1081. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1082. const auto tupleType = pb.NewTupleType({
  1083. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1084. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1085. });
  1086. const auto list1 = pb.NewList(tupleType, {
  1087. pb.NewTuple({key1, payload1}),
  1088. pb.NewTuple({key2, payload2}),
  1089. pb.NewTuple({key3, payload3})
  1090. });
  1091. const auto list2 = pb.NewList(tupleType, {
  1092. pb.NewTuple({key2, payload4}),
  1093. pb.NewTuple({key3, payload5}),
  1094. pb.NewTuple({key4, payload6})
  1095. });
  1096. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1097. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1098. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1099. }));
  1100. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1101. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1102. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1103. EJoinKind::Inner, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1104. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1105. );
  1106. if (SPILLING) {
  1107. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1108. }
  1109. const auto graph = setup.BuildGraph(pgmReturn);
  1110. if (SPILLING) {
  1111. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1112. }
  1113. const auto iterator = graph->GetValue().GetListIterator();
  1114. NUdf::TUnboxedValue tuple;
  1115. std::map<std::pair<TString, TString>, ui32> u;
  1116. while (iterator.Next(tuple)) {
  1117. auto t0 = tuple.GetElement(0);
  1118. auto t1 = tuple.GetElement(1);
  1119. ++u[std::make_pair(TString(t0.AsStringRef()), TString(t1.AsStringRef()) )];
  1120. }
  1121. UNIT_ASSERT(!iterator.Next(tuple));
  1122. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  1123. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("Y"))], 1);
  1124. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("X"))], 1);
  1125. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  1126. UNIT_ASSERT_EQUAL(u.size(), 4);
  1127. }
  1128. }
  1129. Y_UNIT_TEST_LLVM_SPILLING(TestLeft1) {
  1130. if (SPILLING && RuntimeVersion < 50) return;
  1131. for (ui32 pass = 0; pass < 1; ++pass) {
  1132. TSetup<LLVM, SPILLING> setup;
  1133. TProgramBuilder& pb = *setup.PgmBuilder;
  1134. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1135. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1136. const auto key3 = pb.NewDataLiteral<ui32>(3);
  1137. const auto key4 = pb.NewDataLiteral<ui32>(4);
  1138. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1139. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1140. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1141. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1142. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1143. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1144. const auto tupleType = pb.NewTupleType({
  1145. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1146. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1147. });
  1148. const auto list1 = pb.NewList(tupleType, {
  1149. pb.NewTuple({key1, payload1}),
  1150. pb.NewTuple({key2, payload2}),
  1151. pb.NewTuple({key3, payload3})
  1152. });
  1153. const auto list2 = pb.NewList(tupleType, {
  1154. pb.NewTuple({key2, payload4}),
  1155. pb.NewTuple({key3, payload5}),
  1156. pb.NewTuple({key4, payload6})
  1157. });
  1158. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1159. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1160. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1161. }));
  1162. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1163. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1164. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1165. EJoinKind::Left, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1166. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1167. );
  1168. if (SPILLING) {
  1169. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1170. }
  1171. const auto graph = setup.BuildGraph(pgmReturn);
  1172. if (SPILLING) {
  1173. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1174. }
  1175. const auto iterator = graph->GetValue().GetListIterator();
  1176. NUdf::TUnboxedValue tuple;
  1177. std::map<std::pair<TString, TString>, ui32> u;
  1178. // use empty TString as replacement for NULL
  1179. while (iterator.Next(tuple)) {
  1180. auto t0 = tuple.GetElement(0);
  1181. auto t1 = tuple.GetElement(1);
  1182. UNIT_ASSERT(!t0 || !t0.AsStringRef().Empty()); // ensure no empty strings
  1183. UNIT_ASSERT(!t1 || !t1.AsStringRef().Empty());
  1184. ++u[std::make_pair(t0 ? TString(t0.AsStringRef()) : TString(), t1 ? TString(t1.AsStringRef()) : TString())];
  1185. }
  1186. UNIT_ASSERT(!iterator.Next(tuple));
  1187. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), TString())], 1);
  1188. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  1189. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  1190. UNIT_ASSERT_EQUAL(u.size(), 3);
  1191. }
  1192. }
  1193. Y_UNIT_TEST_LLVM_SPILLING(TestLeftMulti1) {
  1194. if (SPILLING && RuntimeVersion < 50) return;
  1195. for (ui32 pass = 0; pass < 1; ++pass) {
  1196. TSetup<LLVM, SPILLING> setup;
  1197. TProgramBuilder& pb = *setup.PgmBuilder;
  1198. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1199. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1200. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1201. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1202. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1203. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1204. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1205. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1206. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1207. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1208. const auto tupleType = pb.NewTupleType({
  1209. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1210. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1211. });
  1212. const auto list1 = pb.NewList(tupleType, {
  1213. pb.NewTuple({key1, payload1}),
  1214. pb.NewTuple({key2, payload2}),
  1215. pb.NewTuple({key3, payload3})
  1216. });
  1217. const auto list2 = pb.NewList(tupleType, {
  1218. pb.NewTuple({key2, payload4}),
  1219. pb.NewTuple({key3, payload5}),
  1220. pb.NewTuple({key4, payload6})
  1221. });
  1222. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1223. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1224. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1225. }));
  1226. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1227. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1228. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1229. EJoinKind::Left, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1230. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1231. );
  1232. if (SPILLING) {
  1233. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1234. }
  1235. const auto graph = setup.BuildGraph(pgmReturn);
  1236. if (SPILLING) {
  1237. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1238. }
  1239. const auto iterator = graph->GetValue().GetListIterator();
  1240. NUdf::TUnboxedValue tuple;
  1241. std::map<std::pair<TString, TString>, ui32> u;
  1242. while (iterator.Next(tuple)) {
  1243. auto t0 = tuple.GetElement(0);
  1244. auto t1 = tuple.GetElement(1);
  1245. UNIT_ASSERT(!t0 || !t0.AsStringRef().Empty()); // ensure no empty strings
  1246. UNIT_ASSERT(!t1 || !t1.AsStringRef().Empty());
  1247. ++u[std::make_pair(t0 ? TString(t0.AsStringRef()) : TString(), t1 ? TString(t1.AsStringRef()) : TString())];
  1248. }
  1249. UNIT_ASSERT(!iterator.Next(tuple));
  1250. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), TString())], 1);
  1251. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  1252. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("Y"))], 1);
  1253. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("X"))], 1);
  1254. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  1255. UNIT_ASSERT_EQUAL(u.size(), 5);
  1256. }
  1257. }
  1258. Y_UNIT_TEST_LLVM_SPILLING(TestLeftSemi1) {
  1259. if (SPILLING && RuntimeVersion < 50) return;
  1260. for (ui32 pass = 0; pass < 1; ++pass) {
  1261. TSetup<LLVM, SPILLING> setup;
  1262. TProgramBuilder& pb = *setup.PgmBuilder;
  1263. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1264. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1265. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1266. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1267. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1268. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1269. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1270. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1271. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1272. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1273. const auto tupleType = pb.NewTupleType({
  1274. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1275. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1276. });
  1277. const auto list1 = pb.NewList(tupleType, {
  1278. pb.NewTuple({key1, payload1}),
  1279. pb.NewTuple({key2, payload2}),
  1280. pb.NewTuple({key3, payload3})
  1281. });
  1282. const auto list2 = pb.NewList(tupleType, {
  1283. pb.NewTuple({key2, payload4}),
  1284. pb.NewTuple({key3, payload5}),
  1285. pb.NewTuple({key4, payload6})
  1286. });
  1287. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1288. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1289. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1290. }));
  1291. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1292. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1293. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1294. EJoinKind::LeftSemi, {0U}, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  1295. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1296. );
  1297. if (SPILLING) {
  1298. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1299. }
  1300. const auto graph = setup.BuildGraph(pgmReturn);
  1301. if (SPILLING) {
  1302. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1303. }
  1304. const auto iterator = graph->GetValue().GetListIterator();
  1305. NUdf::TUnboxedValue tuple;
  1306. std::map<std::pair<TString, ui32>, ui32> u;
  1307. while (iterator.Next(tuple)) {
  1308. auto t0 = tuple.GetElement(0);
  1309. auto t1 = tuple.GetElement(1);
  1310. ++u[std::make_pair(TString(t0.AsStringRef()), t1.Get<ui32>())];
  1311. }
  1312. UNIT_ASSERT(!iterator.Next(tuple));
  1313. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), 2)], 1);
  1314. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), 2)], 1);
  1315. UNIT_ASSERT_EQUAL(u.size(), 2);
  1316. }
  1317. }
  1318. Y_UNIT_TEST_LLVM_SPILLING(TestLeftOnly1) {
  1319. if (SPILLING && RuntimeVersion < 50) return;
  1320. for (ui32 pass = 0; pass < 1; ++pass) {
  1321. TSetup<LLVM, SPILLING> setup;
  1322. TProgramBuilder& pb = *setup.PgmBuilder;
  1323. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1324. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1325. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1326. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1327. const auto key5 = pb.NewDataLiteral<ui32>(4);
  1328. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1329. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1330. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1331. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("D");
  1332. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1333. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1334. const auto payload7 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1335. const auto tupleType = pb.NewTupleType({
  1336. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1337. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1338. });
  1339. const auto list1 = pb.NewList(tupleType, {
  1340. pb.NewTuple({key1, payload1}),
  1341. pb.NewTuple({key2, payload2}),
  1342. pb.NewTuple({key3, payload3}),
  1343. pb.NewTuple({key4, payload4}),
  1344. pb.NewTuple({key5, payload4})
  1345. });
  1346. const auto list2 = pb.NewList(tupleType, {
  1347. pb.NewTuple({key2, payload5}),
  1348. pb.NewTuple({key3, payload6}),
  1349. pb.NewTuple({key4, payload7})
  1350. });
  1351. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1352. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1353. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1354. }));
  1355. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1356. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1357. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1358. EJoinKind::LeftOnly, {0U}, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  1359. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1360. );
  1361. if (SPILLING) {
  1362. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1363. }
  1364. const auto graph = setup.BuildGraph(pgmReturn);
  1365. if (SPILLING) {
  1366. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1367. }
  1368. const auto iterator = graph->GetValue().GetListIterator();
  1369. NUdf::TUnboxedValue tuple;
  1370. std::map<std::pair<TString, ui32>, ui32> u;
  1371. while (iterator.Next(tuple)) {
  1372. auto t0 = tuple.GetElement(0);
  1373. auto t1 = tuple.GetElement(1);
  1374. ++u[std::make_pair(TString(t0.AsStringRef()), t1.Get<ui32>())];
  1375. }
  1376. UNIT_ASSERT(!iterator.Next(tuple));
  1377. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("D"), 4)], 1);
  1378. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), 1)], 1);
  1379. UNIT_ASSERT_EQUAL(u.size(), 2);
  1380. }
  1381. }
  1382. Y_UNIT_TEST_LLVM_SPILLING(TestLeftSemiWithNullKey1) {
  1383. if (SPILLING && RuntimeVersion < 50) return;
  1384. for (ui32 pass = 0; pass < 1; ++pass) {
  1385. TSetup<LLVM, SPILLING> setup;
  1386. TProgramBuilder& pb = *setup.PgmBuilder;
  1387. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  1388. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  1389. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1390. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1391. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  1392. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1393. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1394. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1395. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1396. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1397. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1398. const auto tupleType = pb.NewTupleType({
  1399. pb.NewDataType(NUdf::TDataType<ui32>::Id, true),
  1400. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1401. });
  1402. const auto list1 = pb.NewList(tupleType, {
  1403. pb.NewTuple({key0, payload4}),
  1404. pb.NewTuple({key1, payload1}),
  1405. pb.NewTuple({key2, payload2}),
  1406. pb.NewTuple({key3, payload3})
  1407. });
  1408. const auto list2 = pb.NewList(tupleType, {
  1409. pb.NewTuple({key0, payload3}),
  1410. pb.NewTuple({key2, payload4}),
  1411. pb.NewTuple({key3, payload5}),
  1412. pb.NewTuple({key4, payload6})
  1413. });
  1414. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1415. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1416. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1417. }));
  1418. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1419. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1420. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1421. EJoinKind::LeftSemi, {0U}, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  1422. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1423. );
  1424. if (SPILLING) {
  1425. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1426. }
  1427. const auto graph = setup.BuildGraph(pgmReturn);
  1428. if (SPILLING) {
  1429. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1430. }
  1431. const auto iterator = graph->GetValue().GetListIterator();
  1432. NUdf::TUnboxedValue tuple;
  1433. std::map<std::pair<TString, ui32>, ui32> u;
  1434. while (iterator.Next(tuple)) {
  1435. auto t0 = tuple.GetElement(0);
  1436. auto t1 = tuple.GetElement(1);
  1437. ++u[std::make_pair(TString(t0.AsStringRef()), t1.Get<ui32>())];
  1438. }
  1439. UNIT_ASSERT(!iterator.Next(tuple));
  1440. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), 2)], 1);
  1441. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), 2)], 1);
  1442. UNIT_ASSERT_EQUAL(u.size(), 2);
  1443. }
  1444. }
  1445. Y_UNIT_TEST_LLVM_SPILLING(TestLeftOnlyWithNullKey1) {
  1446. if (SPILLING && RuntimeVersion < 50) return;
  1447. for (ui32 pass = 0; pass < 1; ++pass) {
  1448. TSetup<LLVM, SPILLING> setup;
  1449. TProgramBuilder& pb = *setup.PgmBuilder;
  1450. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  1451. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  1452. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1453. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1454. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  1455. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1456. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1457. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1458. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1459. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1460. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1461. const auto tupleType = pb.NewTupleType({
  1462. pb.NewDataType(NUdf::TDataType<ui32>::Id, true),
  1463. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1464. });
  1465. const auto list1 = pb.NewList(tupleType, {
  1466. pb.NewTuple({key0, payload4}),
  1467. pb.NewTuple({key1, payload1}),
  1468. pb.NewTuple({key2, payload2}),
  1469. pb.NewTuple({key3, payload3})
  1470. });
  1471. const auto list2 = pb.NewList(tupleType, {
  1472. pb.NewTuple({key0, payload3}),
  1473. pb.NewTuple({key2, payload4}),
  1474. pb.NewTuple({key3, payload5}),
  1475. pb.NewTuple({key4, payload6})
  1476. });
  1477. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1478. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1479. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1480. }));
  1481. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1482. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1483. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1484. EJoinKind::LeftOnly, {0U}, {0U}, {1U, 0U, 0U, 1U}, {}, resultType),
  1485. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1486. );
  1487. if (SPILLING) {
  1488. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1489. }
  1490. const auto graph = setup.BuildGraph(pgmReturn);
  1491. if (SPILLING) {
  1492. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1493. }
  1494. const auto iterator = graph->GetValue().GetListIterator();
  1495. NUdf::TUnboxedValue tuple;
  1496. std::map<std::pair<TString, ui64>, ui32> u;
  1497. while (iterator.Next(tuple)) {
  1498. auto t0 = tuple.GetElement(0);
  1499. auto t1 = tuple.GetElement(1);
  1500. ++u[std::make_pair(TString(t0.AsStringRef()), t1 ? t1.Get<ui32>() : std::numeric_limits<ui64>::max())];
  1501. // replace NULL with <ui64>::max()
  1502. }
  1503. UNIT_ASSERT(!iterator.Next(tuple));
  1504. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), 1)], 1);
  1505. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("X"), std::numeric_limits<ui64>::max())], 1);
  1506. UNIT_ASSERT_EQUAL(u.size(), 2);
  1507. }
  1508. }
  1509. Y_UNIT_TEST_LLVM_SPILLING(TestRight1) {
  1510. if (SPILLING && RuntimeVersion < 50) return;
  1511. for (ui32 pass = 0; pass < 1; ++pass) {
  1512. TSetup<LLVM, SPILLING> setup;
  1513. TProgramBuilder& pb = *setup.PgmBuilder;
  1514. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1515. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1516. const auto key3 = pb.NewDataLiteral<ui32>(3);
  1517. const auto key4 = pb.NewDataLiteral<ui32>(4);
  1518. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1519. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1520. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1521. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1522. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1523. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1524. const auto tupleType = pb.NewTupleType({
  1525. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1526. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1527. });
  1528. const auto list1 = pb.NewList(tupleType, {
  1529. pb.NewTuple({key1, payload1}),
  1530. pb.NewTuple({key2, payload2}),
  1531. pb.NewTuple({key3, payload3})
  1532. });
  1533. const auto list2 = pb.NewList(tupleType, {
  1534. pb.NewTuple({key2, payload4}),
  1535. pb.NewTuple({key3, payload5}),
  1536. pb.NewTuple({key4, payload6})
  1537. });
  1538. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1539. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1540. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1541. }));
  1542. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1543. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1544. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1545. EJoinKind::Right, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1546. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1547. );
  1548. if (SPILLING) {
  1549. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1550. }
  1551. const auto graph = setup.BuildGraph(pgmReturn);
  1552. if (SPILLING) {
  1553. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1554. }
  1555. const auto iterator = graph->GetValue().GetListIterator();
  1556. NUdf::TUnboxedValue tuple;
  1557. std::map<std::pair<TString, TString>, ui32> u;
  1558. while (iterator.Next(tuple)) {
  1559. auto t0 = tuple.GetElement(0);
  1560. auto t1 = tuple.GetElement(1);
  1561. UNIT_ASSERT(!t0 || !t0.AsStringRef().Empty()); // ensure no empty strings
  1562. UNIT_ASSERT(!t1 || !t1.AsStringRef().Empty());
  1563. ++u[std::make_pair(t0 ? TString(t0.AsStringRef()) : TString(), t1 ? TString(t1.AsStringRef()) : TString())];
  1564. }
  1565. UNIT_ASSERT(!iterator.Next(tuple));
  1566. UNIT_ASSERT_EQUAL(u[std::make_pair(TString(), TString("Z"))], 1);
  1567. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  1568. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  1569. UNIT_ASSERT_EQUAL(u.size(), 3);
  1570. }
  1571. }
  1572. Y_UNIT_TEST_LLVM_SPILLING(TestRightOnly1) {
  1573. if (SPILLING && RuntimeVersion < 50) return;
  1574. for (ui32 pass = 0; pass < 1; ++pass) {
  1575. TSetup<LLVM, SPILLING> setup;
  1576. TProgramBuilder& pb = *setup.PgmBuilder;
  1577. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1578. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1579. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1580. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1581. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1582. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1583. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1584. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1585. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1586. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1587. const auto tupleType = pb.NewTupleType({
  1588. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1589. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1590. });
  1591. const auto list1 = pb.NewList(tupleType, {
  1592. pb.NewTuple({key1, payload1}),
  1593. pb.NewTuple({key2, payload2}),
  1594. pb.NewTuple({key3, payload3})
  1595. });
  1596. const auto list2 = pb.NewList(tupleType, {
  1597. pb.NewTuple({key2, payload4}),
  1598. pb.NewTuple({key3, payload5}),
  1599. pb.NewTuple({key4, payload6})
  1600. });
  1601. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1602. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1603. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1604. }));
  1605. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1606. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1607. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1608. EJoinKind::RightOnly, {0U}, {0U}, {}, {1U, 0U, 0U, 1U}, resultType),
  1609. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1610. );
  1611. if (SPILLING) {
  1612. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1613. }
  1614. const auto graph = setup.BuildGraph(pgmReturn);
  1615. if (SPILLING) {
  1616. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1617. }
  1618. const auto iterator = graph->GetValue().GetListIterator();
  1619. NUdf::TUnboxedValue tuple;
  1620. std::map<std::pair<TString, ui32>, ui32> u;
  1621. while (iterator.Next(tuple)) {
  1622. auto t0 = tuple.GetElement(0);
  1623. auto t1 = tuple.GetElement(1);
  1624. ++u[std::make_pair(TString(t0.AsStringRef()), t1.Get<ui32>())];
  1625. }
  1626. UNIT_ASSERT(!iterator.Next(tuple));
  1627. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("Z"), 3)], 1);
  1628. UNIT_ASSERT_EQUAL(u.size(), 1);
  1629. }
  1630. }
  1631. Y_UNIT_TEST_LLVM_SPILLING(TestRightSemi1) {
  1632. if (SPILLING && RuntimeVersion < 50) return;
  1633. for (ui32 pass = 0; pass < 1; ++pass) {
  1634. TSetup<LLVM, SPILLING> setup;
  1635. TProgramBuilder& pb = *setup.PgmBuilder;
  1636. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1637. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1638. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1639. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1640. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1641. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1642. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1643. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1644. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1645. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1646. const auto tupleType = pb.NewTupleType({
  1647. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1648. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1649. });
  1650. const auto list1 = pb.NewList(tupleType, {
  1651. pb.NewTuple({key1, payload1}),
  1652. pb.NewTuple({key2, payload2}),
  1653. pb.NewTuple({key3, payload3})
  1654. });
  1655. const auto list2 = pb.NewList(tupleType, {
  1656. pb.NewTuple({key2, payload4}),
  1657. pb.NewTuple({key3, payload5}),
  1658. pb.NewTuple({key4, payload6})
  1659. });
  1660. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1661. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1662. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1663. }));
  1664. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1665. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1666. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1667. EJoinKind::RightSemi, {0U}, {0U}, {}, {1U, 0U, 0U, 1U}, resultType),
  1668. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1669. );
  1670. if (SPILLING) {
  1671. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1672. }
  1673. const auto graph = setup.BuildGraph(pgmReturn);
  1674. if (SPILLING) {
  1675. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1676. }
  1677. const auto iterator = graph->GetValue().GetListIterator();
  1678. NUdf::TUnboxedValue tuple;
  1679. std::map<std::pair<TString, ui32>, ui32> u;
  1680. while (iterator.Next(tuple)) {
  1681. auto t0 = tuple.GetElement(0);
  1682. auto t1 = tuple.GetElement(1);
  1683. ++u[std::make_pair(TString(t0.AsStringRef()), t1.Get<ui32>())];
  1684. }
  1685. UNIT_ASSERT(!iterator.Next(tuple));
  1686. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("X"), 2)], 1);
  1687. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("Y"), 2)], 1);
  1688. UNIT_ASSERT_EQUAL(u.size(), 2);
  1689. }
  1690. }
  1691. Y_UNIT_TEST_LLVM_SPILLING(TestRightMulti1) {
  1692. if (SPILLING && RuntimeVersion < 50) return;
  1693. for (ui32 pass = 0; pass < 1; ++pass) {
  1694. TSetup<LLVM, SPILLING> setup;
  1695. TProgramBuilder& pb = *setup.PgmBuilder;
  1696. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1697. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1698. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1699. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1700. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1701. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1702. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1703. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1704. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1705. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1706. const auto tupleType = pb.NewTupleType({
  1707. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1708. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1709. });
  1710. const auto list1 = pb.NewList(tupleType, {
  1711. pb.NewTuple({key1, payload1}),
  1712. pb.NewTuple({key2, payload2}),
  1713. pb.NewTuple({key3, payload3})
  1714. });
  1715. const auto list2 = pb.NewList(tupleType, {
  1716. pb.NewTuple({key2, payload4}),
  1717. pb.NewTuple({key3, payload5}),
  1718. pb.NewTuple({key4, payload6})
  1719. });
  1720. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1721. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1722. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1723. }));
  1724. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1725. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1726. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1727. EJoinKind::Right, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1728. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1729. );
  1730. if (SPILLING) {
  1731. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1732. }
  1733. const auto graph = setup.BuildGraph(pgmReturn);
  1734. if (SPILLING) {
  1735. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1736. }
  1737. const auto iterator = graph->GetValue().GetListIterator();
  1738. NUdf::TUnboxedValue tuple;
  1739. std::map<std::pair<TString, TString>, ui32> u;
  1740. while (iterator.Next(tuple)) {
  1741. auto t0 = tuple.GetElement(0);
  1742. auto t1 = tuple.GetElement(1);
  1743. UNIT_ASSERT(!t0 || !t0.AsStringRef().Empty()); // ensure no empty strings
  1744. UNIT_ASSERT(!t1 || !t1.AsStringRef().Empty());
  1745. ++u[std::make_pair(t0 ? TString(t0.AsStringRef()) : TString(), t1 ? TString(t1.AsStringRef()) : TString())];
  1746. }
  1747. UNIT_ASSERT(!iterator.Next(tuple));
  1748. UNIT_ASSERT_EQUAL(u[std::make_pair(TString(), TString("Z"))], 1);
  1749. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  1750. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("Y"))], 1);
  1751. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("X"))], 1);
  1752. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  1753. UNIT_ASSERT_EQUAL(u.size(), 5);
  1754. }
  1755. }
  1756. Y_UNIT_TEST_LLVM_SPILLING(TestRightSemiWithNullKey1) {
  1757. if (SPILLING && RuntimeVersion < 50) return;
  1758. for (ui32 pass = 0; pass < 1; ++pass) {
  1759. TSetup<LLVM, SPILLING> setup;
  1760. TProgramBuilder& pb = *setup.PgmBuilder;
  1761. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  1762. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  1763. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1764. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1765. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  1766. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1767. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1768. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1769. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1770. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1771. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1772. const auto tupleType = pb.NewTupleType({
  1773. pb.NewDataType(NUdf::TDataType<ui32>::Id, true),
  1774. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1775. });
  1776. const auto list1 = pb.NewList(tupleType, {
  1777. pb.NewTuple({key0, payload4}),
  1778. pb.NewTuple({key1, payload1}),
  1779. pb.NewTuple({key2, payload2}),
  1780. pb.NewTuple({key3, payload3})
  1781. });
  1782. const auto list2 = pb.NewList(tupleType, {
  1783. pb.NewTuple({key0, payload3}),
  1784. pb.NewTuple({key2, payload4}),
  1785. pb.NewTuple({key3, payload5}),
  1786. pb.NewTuple({key4, payload6})
  1787. });
  1788. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1789. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1790. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1791. }));
  1792. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1793. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1794. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1795. EJoinKind::RightSemi, {0U}, {0U}, {}, {1U, 0U, 0U, 1U}, resultType),
  1796. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1797. );
  1798. if (SPILLING) {
  1799. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1800. }
  1801. const auto graph = setup.BuildGraph(pgmReturn);
  1802. if (SPILLING) {
  1803. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1804. }
  1805. const auto iterator = graph->GetValue().GetListIterator();
  1806. NUdf::TUnboxedValue tuple;
  1807. std::map<std::pair<TString, ui32>, ui32> u;
  1808. while (iterator.Next(tuple)) {
  1809. auto t0 = tuple.GetElement(0);
  1810. auto t1 = tuple.GetElement(1);
  1811. ++u[std::make_pair(TString(t0.AsStringRef()), t1.Get<ui32>())];
  1812. }
  1813. UNIT_ASSERT(!iterator.Next(tuple));
  1814. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("X"), 2)], 1);
  1815. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("Y"), 2)], 1);
  1816. UNIT_ASSERT_EQUAL(u.size(), 2);
  1817. }
  1818. }
  1819. Y_UNIT_TEST_LLVM_SPILLING(TestRightOnlyWithNullKey1) {
  1820. if (SPILLING && RuntimeVersion < 50) return;
  1821. for (ui32 pass = 0; pass < 1; ++pass) {
  1822. TSetup<LLVM, SPILLING> setup;
  1823. TProgramBuilder& pb = *setup.PgmBuilder;
  1824. const auto key0 = pb.NewEmptyOptional(pb.NewDataType(NUdf::TDataType<ui32>::Id, true));
  1825. const auto key1 = pb.NewOptional(pb.NewDataLiteral<ui32>(1));
  1826. const auto key2 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1827. const auto key3 = pb.NewOptional(pb.NewDataLiteral<ui32>(2));
  1828. const auto key4 = pb.NewOptional(pb.NewDataLiteral<ui32>(3));
  1829. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1830. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1831. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1832. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1833. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1834. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1835. const auto tupleType = pb.NewTupleType({
  1836. pb.NewDataType(NUdf::TDataType<ui32>::Id, true),
  1837. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1838. });
  1839. const auto list1 = pb.NewList(tupleType, {
  1840. pb.NewTuple({key0, payload4}),
  1841. pb.NewTuple({key1, payload1}),
  1842. pb.NewTuple({key2, payload2}),
  1843. pb.NewTuple({key3, payload3})
  1844. });
  1845. const auto list2 = pb.NewList(tupleType, {
  1846. pb.NewTuple({key0, payload3}),
  1847. pb.NewTuple({key2, payload4}),
  1848. pb.NewTuple({key3, payload5}),
  1849. pb.NewTuple({key4, payload6})
  1850. });
  1851. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1852. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1853. pb.NewDataType(NUdf::TDataType<ui32>::Id)
  1854. }));
  1855. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1856. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1857. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1858. EJoinKind::RightOnly, {0U}, {0U}, {}, {1U, 0U, 0U, 1U}, resultType),
  1859. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1860. );
  1861. if (SPILLING) {
  1862. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1863. }
  1864. const auto graph = setup.BuildGraph(pgmReturn);
  1865. if (SPILLING) {
  1866. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1867. }
  1868. const auto iterator = graph->GetValue().GetListIterator();
  1869. NUdf::TUnboxedValue tuple;
  1870. std::map<std::pair<TString, ui64>, ui32> u;
  1871. while (iterator.Next(tuple)) {
  1872. auto t0 = tuple.GetElement(0);
  1873. auto t1 = tuple.GetElement(1);
  1874. ++u[std::make_pair(TString(t0.AsStringRef()), t1 ? t1.Get<ui32>() : std::numeric_limits<ui64>::max())];
  1875. // replace NULL with <ui64>::max()
  1876. }
  1877. UNIT_ASSERT(!iterator.Next(tuple));
  1878. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("Z"), 3)], 1);
  1879. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), std::numeric_limits<ui64>::max())], 1);
  1880. UNIT_ASSERT_EQUAL(u.size(), 2);
  1881. }
  1882. }
  1883. Y_UNIT_TEST_LLVM_SPILLING(TestFull1) {
  1884. if (SPILLING && RuntimeVersion < 50) return;
  1885. for (ui32 pass = 0; pass < 1; ++pass) {
  1886. TSetup<LLVM, SPILLING> setup;
  1887. TProgramBuilder& pb = *setup.PgmBuilder;
  1888. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1889. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1890. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1891. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1892. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1893. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1894. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1895. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1896. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1897. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1898. const auto tupleType = pb.NewTupleType({
  1899. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1900. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1901. });
  1902. const auto list1 = pb.NewList(tupleType, {
  1903. pb.NewTuple({key1, payload1}),
  1904. pb.NewTuple({key2, payload2}),
  1905. pb.NewTuple({key3, payload3})
  1906. });
  1907. const auto list2 = pb.NewList(tupleType, {
  1908. pb.NewTuple({key2, payload4}),
  1909. pb.NewTuple({key3, payload5}),
  1910. pb.NewTuple({key4, payload6})
  1911. });
  1912. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1913. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1914. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1915. }));
  1916. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1917. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1918. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1919. EJoinKind::Full, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1920. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1921. );
  1922. if (SPILLING) {
  1923. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1924. }
  1925. const auto graph = setup.BuildGraph(pgmReturn);
  1926. if (SPILLING) {
  1927. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1928. }
  1929. const auto iterator = graph->GetValue().GetListIterator();
  1930. NUdf::TUnboxedValue tuple;
  1931. std::map<std::pair<TString, TString>, ui32> u;
  1932. while (iterator.Next(tuple)) {
  1933. auto t0 = tuple.GetElement(0);
  1934. auto t1 = tuple.GetElement(1);
  1935. UNIT_ASSERT(!t0 || !t0.AsStringRef().Empty()); // ensure no empty strings
  1936. UNIT_ASSERT(!t1 || !t1.AsStringRef().Empty());
  1937. ++u[std::make_pair(t0 ? TString(t0.AsStringRef()) : TString(), t1 ? TString(t1.AsStringRef()) : TString())];
  1938. }
  1939. UNIT_ASSERT(!iterator.Next(tuple));
  1940. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("X"))], 1);
  1941. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("B"), TString("Y"))], 1);
  1942. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("X"))], 1);
  1943. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("C"), TString("Y"))], 1);
  1944. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), TString())], 1);
  1945. UNIT_ASSERT_EQUAL(u[std::make_pair(TString(), TString("Z"))], 1);
  1946. UNIT_ASSERT_EQUAL(u.size(), 6);
  1947. }
  1948. }
  1949. Y_UNIT_TEST_LLVM_SPILLING(TestExclusion1) {
  1950. if (SPILLING && RuntimeVersion < 50) return;
  1951. for (ui32 pass = 0; pass < 1; ++pass) {
  1952. TSetup<LLVM, SPILLING> setup;
  1953. TProgramBuilder& pb = *setup.PgmBuilder;
  1954. const auto key1 = pb.NewDataLiteral<ui32>(1);
  1955. const auto key2 = pb.NewDataLiteral<ui32>(2);
  1956. const auto key3 = pb.NewDataLiteral<ui32>(2);
  1957. const auto key4 = pb.NewDataLiteral<ui32>(3);
  1958. const auto payload1 = pb.NewDataLiteral<NUdf::EDataSlot::String>("A");
  1959. const auto payload2 = pb.NewDataLiteral<NUdf::EDataSlot::String>("B");
  1960. const auto payload3 = pb.NewDataLiteral<NUdf::EDataSlot::String>("C");
  1961. const auto payload4 = pb.NewDataLiteral<NUdf::EDataSlot::String>("X");
  1962. const auto payload5 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Y");
  1963. const auto payload6 = pb.NewDataLiteral<NUdf::EDataSlot::String>("Z");
  1964. const auto tupleType = pb.NewTupleType({
  1965. pb.NewDataType(NUdf::TDataType<ui32>::Id),
  1966. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1967. });
  1968. const auto list1 = pb.NewList(tupleType, {
  1969. pb.NewTuple({key1, payload1}),
  1970. pb.NewTuple({key2, payload2}),
  1971. pb.NewTuple({key3, payload3})
  1972. });
  1973. const auto list2 = pb.NewList(tupleType, {
  1974. pb.NewTuple({key2, payload4}),
  1975. pb.NewTuple({key3, payload5}),
  1976. pb.NewTuple({key4, payload6})
  1977. });
  1978. const auto resultType = pb.NewFlowType(pb.NewMultiType({
  1979. pb.NewDataType(NUdf::TDataType<char*>::Id),
  1980. pb.NewDataType(NUdf::TDataType<char*>::Id)
  1981. }));
  1982. const auto pgmReturn = pb.Collect(pb.NarrowMap(pb.GraceJoin(
  1983. pb.ExpandMap(pb.ToFlow(list1), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1984. pb.ExpandMap(pb.ToFlow(list2), [&](TRuntimeNode item) -> TRuntimeNode::TList { return {pb.Nth(item, 0U), pb.Nth(item, 1U)}; }),
  1985. EJoinKind::Exclusion, {0U}, {0U}, {1U, 0U}, {1U, 1U}, resultType),
  1986. [&](TRuntimeNode::TList items) -> TRuntimeNode { return pb.NewTuple(items); })
  1987. );
  1988. if (SPILLING) {
  1989. setup.RenameCallable(pgmReturn, "GraceJoin", "GraceJoinWithSpilling");
  1990. }
  1991. const auto graph = setup.BuildGraph(pgmReturn);
  1992. if (SPILLING) {
  1993. graph->GetContext().SpillerFactory = std::make_shared<TMockSpillerFactory>();
  1994. }
  1995. const auto iterator = graph->GetValue().GetListIterator();
  1996. NUdf::TUnboxedValue tuple;
  1997. std::map<std::pair<TString, TString>, ui32> u;
  1998. while (iterator.Next(tuple)) {
  1999. auto t0 = tuple.GetElement(0);
  2000. auto t1 = tuple.GetElement(1);
  2001. UNIT_ASSERT(!t0 || !t0.AsStringRef().Empty()); // ensure no empty strings
  2002. UNIT_ASSERT(!t1 || !t1.AsStringRef().Empty());
  2003. ++u[std::make_pair(t0 ? TString(t0.AsStringRef()) : TString(), t1 ? TString(t1.AsStringRef()) : TString())];
  2004. }
  2005. UNIT_ASSERT(!iterator.Next(tuple));
  2006. UNIT_ASSERT_EQUAL(u[std::make_pair(TString("A"), TString())], 1);
  2007. UNIT_ASSERT_EQUAL(u[std::make_pair(TString(), TString("Z"))], 1);
  2008. UNIT_ASSERT_EQUAL(u.size(), 2);
  2009. }
  2010. }
  2011. }
  2012. }
  2013. }