mkql_program_builder.cpp 271 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381
  1. #include "mkql_program_builder.h"
  2. #include "mkql_opt_literal.h"
  3. #include "mkql_node_visitor.h"
  4. #include "mkql_node_cast.h"
  5. #include "mkql_runtime_version.h"
  6. #include "yql/essentials/minikql/mkql_node_printer.h"
  7. #include "yql/essentials/minikql/mkql_function_registry.h"
  8. #include "yql/essentials/minikql/mkql_utils.h"
  9. #include "yql/essentials/minikql/mkql_type_builder.h"
  10. #include "yql/essentials/core/sql_types/match_recognize.h"
  11. #include "yql/essentials/core/sql_types/time_order_recover.h"
  12. #include <yql/essentials/parser/pg_catalog/catalog.h>
  13. #include <util/string/cast.h>
  14. #include <util/string/printf.h>
  15. #include <array>
  16. using namespace std::string_view_literals;
  17. namespace NKikimr {
  18. namespace NMiniKQL {
  19. namespace {
  20. struct TDataFunctionFlags {
  21. enum {
  22. HasBooleanResult = 0x01,
  23. RequiresBooleanArgs = 0x02,
  24. HasOptionalResult = 0x04,
  25. AllowOptionalArgs = 0x08,
  26. HasUi32Result = 0x10,
  27. RequiresCompare = 0x20,
  28. HasStringResult = 0x40,
  29. RequiresStringArgs = 0x80,
  30. RequiresHash = 0x100,
  31. RequiresEquals = 0x200,
  32. AllowNull = 0x400,
  33. CommonOptionalResult = 0x800,
  34. SupportsTuple = 0x1000,
  35. SameOptionalArgs = 0x2000,
  36. Default = 0x00
  37. };
  38. };
  39. #define MKQL_BAD_TYPE_VISIT(NodeType, ScriptName) \
  40. void Visit(NodeType& node) override { \
  41. Y_UNUSED(node); \
  42. MKQL_ENSURE(false, "Can't convert " #NodeType " to " ScriptName " object"); \
  43. }
  44. class TPythonTypeChecker : public TExploringNodeVisitor {
  45. using TExploringNodeVisitor::Visit;
  46. MKQL_BAD_TYPE_VISIT(TAnyType, "Python");
  47. };
  48. class TLuaTypeChecker : public TExploringNodeVisitor {
  49. using TExploringNodeVisitor::Visit;
  50. MKQL_BAD_TYPE_VISIT(TVoidType, "Lua");
  51. MKQL_BAD_TYPE_VISIT(TAnyType, "Lua");
  52. MKQL_BAD_TYPE_VISIT(TVariantType, "Lua");
  53. };
  54. class TJavascriptTypeChecker : public TExploringNodeVisitor {
  55. using TExploringNodeVisitor::Visit;
  56. MKQL_BAD_TYPE_VISIT(TAnyType, "Javascript");
  57. };
  58. #undef MKQL_BAD_TYPE_VISIT
  59. void EnsureScriptSpecificTypes(
  60. EScriptType scriptType,
  61. TCallableType* funcType,
  62. const TTypeEnvironment& env)
  63. {
  64. switch (scriptType) {
  65. case EScriptType::Lua:
  66. return TLuaTypeChecker().Walk(funcType, env);
  67. case EScriptType::Python:
  68. case EScriptType::Python2:
  69. case EScriptType::Python3:
  70. case EScriptType::ArcPython:
  71. case EScriptType::ArcPython2:
  72. case EScriptType::ArcPython3:
  73. case EScriptType::CustomPython:
  74. case EScriptType::CustomPython2:
  75. case EScriptType::CustomPython3:
  76. case EScriptType::SystemPython2:
  77. case EScriptType::SystemPython3:
  78. case EScriptType::SystemPython3_8:
  79. case EScriptType::SystemPython3_9:
  80. case EScriptType::SystemPython3_10:
  81. case EScriptType::SystemPython3_11:
  82. case EScriptType::SystemPython3_12:
  83. case EScriptType::SystemPython3_13:
  84. return TPythonTypeChecker().Walk(funcType, env);
  85. case EScriptType::Javascript:
  86. return TJavascriptTypeChecker().Walk(funcType, env);
  87. default:
  88. MKQL_ENSURE(false, "Unknown script type " << static_cast<ui32>(scriptType));
  89. }
  90. }
  91. ui32 GetNumericSchemeTypeLevel(NUdf::TDataTypeId typeId) {
  92. switch (typeId) {
  93. case NUdf::TDataType<ui8>::Id:
  94. return 0;
  95. case NUdf::TDataType<i8>::Id:
  96. return 1;
  97. case NUdf::TDataType<ui16>::Id:
  98. return 2;
  99. case NUdf::TDataType<i16>::Id:
  100. return 3;
  101. case NUdf::TDataType<ui32>::Id:
  102. return 4;
  103. case NUdf::TDataType<i32>::Id:
  104. return 5;
  105. case NUdf::TDataType<ui64>::Id:
  106. return 6;
  107. case NUdf::TDataType<i64>::Id:
  108. return 7;
  109. case NUdf::TDataType<float>::Id:
  110. return 8;
  111. case NUdf::TDataType<double>::Id:
  112. return 9;
  113. default:
  114. ythrow yexception() << "Unknown numeric type: " << typeId;
  115. }
  116. }
  117. NUdf::TDataTypeId GetNumericSchemeTypeByLevel(ui32 level) {
  118. switch (level) {
  119. case 0:
  120. return NUdf::TDataType<ui8>::Id;
  121. case 1:
  122. return NUdf::TDataType<i8>::Id;
  123. case 2:
  124. return NUdf::TDataType<ui16>::Id;
  125. case 3:
  126. return NUdf::TDataType<i16>::Id;
  127. case 4:
  128. return NUdf::TDataType<ui32>::Id;
  129. case 5:
  130. return NUdf::TDataType<i32>::Id;
  131. case 6:
  132. return NUdf::TDataType<ui64>::Id;
  133. case 7:
  134. return NUdf::TDataType<i64>::Id;
  135. case 8:
  136. return NUdf::TDataType<float>::Id;
  137. case 9:
  138. return NUdf::TDataType<double>::Id;
  139. default:
  140. ythrow yexception() << "Unknown numeric level: " << level;
  141. }
  142. }
  143. NUdf::TDataTypeId MakeNumericDataSuperType(NUdf::TDataTypeId typeId1, NUdf::TDataTypeId typeId2) {
  144. return typeId1 == typeId2 ? typeId1 :
  145. GetNumericSchemeTypeByLevel(std::max(GetNumericSchemeTypeLevel(typeId1), GetNumericSchemeTypeLevel(typeId2)));
  146. }
  147. template<bool IsFilter>
  148. bool CollectOptionalElements(const TType* type, std::vector<std::string_view>& test, std::vector<std::pair<std::string_view, TType*>>& output) {
  149. const auto structType = AS_TYPE(TStructType, type);
  150. test.reserve(structType->GetMembersCount());
  151. output.reserve(structType->GetMembersCount());
  152. bool multiOptional = false;
  153. for (ui32 i = 0; i < structType->GetMembersCount(); ++i) {
  154. output.emplace_back(structType->GetMemberName(i), structType->GetMemberType(i));
  155. auto& memberType = output.back().second;
  156. if (memberType->IsOptional()) {
  157. test.emplace_back(output.back().first);
  158. if constexpr (IsFilter) {
  159. memberType = AS_TYPE(TOptionalType, memberType)->GetItemType();
  160. multiOptional = multiOptional || memberType->IsOptional();
  161. }
  162. }
  163. }
  164. return multiOptional;
  165. }
  166. template<bool IsFilter>
  167. bool CollectOptionalElements(const TType* type, std::vector<ui32>& test, std::vector<TType*>& output) {
  168. const auto typleType = AS_TYPE(TTupleType, type);
  169. test.reserve(typleType->GetElementsCount());
  170. output.reserve(typleType->GetElementsCount());
  171. bool multiOptional = false;
  172. for (ui32 i = 0; i < typleType->GetElementsCount(); ++i) {
  173. output.emplace_back(typleType->GetElementType(i));
  174. auto& elementType = output.back();
  175. if (elementType->IsOptional()) {
  176. test.emplace_back(i);
  177. if constexpr (IsFilter) {
  178. elementType = AS_TYPE(TOptionalType, elementType)->GetItemType();
  179. multiOptional = multiOptional || elementType->IsOptional();
  180. }
  181. }
  182. }
  183. return multiOptional;
  184. }
  185. bool ReduceOptionalElements(const TType* type, const TArrayRef<const std::string_view>& test, std::vector<std::pair<std::string_view, TType*>>& output) {
  186. const auto structType = AS_TYPE(TStructType, type);
  187. output.reserve(structType->GetMembersCount());
  188. for (ui32 i = 0U; i < structType->GetMembersCount(); ++i) {
  189. output.emplace_back(structType->GetMemberName(i), structType->GetMemberType(i));
  190. }
  191. bool multiOptional = false;
  192. for (const auto& member : test) {
  193. auto& memberType = output[structType->GetMemberIndex(member)].second;
  194. MKQL_ENSURE(memberType->IsOptional(), "Required optional column type");
  195. memberType = AS_TYPE(TOptionalType, memberType)->GetItemType();
  196. multiOptional = multiOptional || memberType->IsOptional();
  197. }
  198. return multiOptional;
  199. }
  200. bool ReduceOptionalElements(const TType* type, const TArrayRef<const ui32>& test, std::vector<TType*>& output) {
  201. const auto typleType = AS_TYPE(TTupleType, type);
  202. output.reserve(typleType->GetElementsCount());
  203. for (ui32 i = 0U; i < typleType->GetElementsCount(); ++i) {
  204. output.emplace_back(typleType->GetElementType(i));
  205. }
  206. bool multiOptional = false;
  207. for (const auto& member : test) {
  208. auto& memberType = output[member];
  209. MKQL_ENSURE(memberType->IsOptional(), "Required optional column type");
  210. memberType = AS_TYPE(TOptionalType, memberType)->GetItemType();
  211. multiOptional = multiOptional || memberType->IsOptional();
  212. }
  213. return multiOptional;
  214. }
  215. static std::vector<TType*> ValidateBlockItems(const TArrayRef<TType* const>& wideComponents, bool unwrap) {
  216. MKQL_ENSURE(wideComponents.size() > 0, "Expected at least one column");
  217. std::vector<TType*> items;
  218. items.reserve(wideComponents.size());
  219. // XXX: Declare these variables outside the loop body to use for the last
  220. // item (i.e. block length column) in the assertions below.
  221. bool isScalar;
  222. TType* itemType;
  223. for (const auto& wideComponent : wideComponents) {
  224. auto blockType = AS_TYPE(TBlockType, wideComponent);
  225. isScalar = blockType->GetShape() == TBlockType::EShape::Scalar;
  226. itemType = blockType->GetItemType();
  227. items.push_back(unwrap ? itemType : blockType);
  228. }
  229. MKQL_ENSURE(isScalar, "Last column should be scalar");
  230. MKQL_ENSURE(AS_TYPE(TDataType, itemType)->GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected Uint64");
  231. return items;
  232. }
  233. std::vector<TType*> ValidateBlockStreamType(const TType* streamType, bool unwrap = true) {
  234. const auto wideComponents = GetWideComponents(AS_TYPE(TStreamType, streamType));
  235. return ValidateBlockItems(wideComponents, unwrap);
  236. }
  237. std::vector<TType*> ValidateBlockFlowType(const TType* flowType, bool unwrap = true) {
  238. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flowType));
  239. return ValidateBlockItems(wideComponents, unwrap);
  240. }
  241. } // namespace
  242. std::string_view ScriptTypeAsStr(EScriptType type) {
  243. switch (type) {
  244. #define MKQL_SCRIPT_TYPE_CASE(name, value, ...) \
  245. case EScriptType::name: return std::string_view(#name);
  246. MKQL_SCRIPT_TYPES(MKQL_SCRIPT_TYPE_CASE)
  247. #undef MKQL_SCRIPT_TYPE_CASE
  248. } // switch
  249. return std::string_view("Unknown");
  250. }
  251. EScriptType ScriptTypeFromStr(std::string_view str) {
  252. TString lowerStr = TString(str);
  253. lowerStr.to_lower();
  254. #define MKQL_SCRIPT_TYPE_FROM_STR(name, value, lowerName, allowSuffix) \
  255. if ((allowSuffix && lowerStr.StartsWith(#lowerName)) || lowerStr == #lowerName) return EScriptType::name;
  256. MKQL_SCRIPT_TYPES(MKQL_SCRIPT_TYPE_FROM_STR)
  257. #undef MKQL_SCRIPT_TYPE_FROM_STR
  258. return EScriptType::Unknown;
  259. }
  260. bool IsCustomPython(EScriptType type) {
  261. return type == EScriptType::CustomPython ||
  262. type == EScriptType::CustomPython2 ||
  263. type == EScriptType::CustomPython3;
  264. }
  265. bool IsSystemPython(EScriptType type) {
  266. return type == EScriptType::SystemPython2
  267. || type == EScriptType::SystemPython3
  268. || type == EScriptType::SystemPython3_8
  269. || type == EScriptType::SystemPython3_9
  270. || type == EScriptType::SystemPython3_10
  271. || type == EScriptType::SystemPython3_11
  272. || type == EScriptType::SystemPython3_12
  273. || type == EScriptType::SystemPython3_13
  274. || type == EScriptType::Python
  275. || type == EScriptType::Python2;
  276. }
  277. EScriptType CanonizeScriptType(EScriptType type) {
  278. if (type == EScriptType::Python) {
  279. return EScriptType::Python2;
  280. }
  281. if (type == EScriptType::ArcPython) {
  282. return EScriptType::ArcPython2;
  283. }
  284. return type;
  285. }
  286. void EnsureDataOrOptionalOfData(TRuntimeNode node) {
  287. MKQL_ENSURE(node.GetStaticType()->IsData() ||
  288. node.GetStaticType()->IsOptional() && AS_TYPE(TOptionalType, node.GetStaticType())
  289. ->GetItemType()->IsData(), "Expected data or optional of data");
  290. }
  291. TProgramBuilder::TProgramBuilder(const TTypeEnvironment& env, const IFunctionRegistry& functionRegistry, bool voidWithEffects)
  292. : TTypeBuilder(env)
  293. , FunctionRegistry(functionRegistry)
  294. , VoidWithEffects(voidWithEffects)
  295. {}
  296. const TTypeEnvironment& TProgramBuilder::GetTypeEnvironment() const {
  297. return Env;
  298. }
  299. const IFunctionRegistry& TProgramBuilder::GetFunctionRegistry() const {
  300. return FunctionRegistry;
  301. }
  302. TType* TProgramBuilder::ChooseCommonType(TType* type1, TType* type2) {
  303. bool isOptional1, isOptional2;
  304. const auto data1 = UnpackOptionalData(type1, isOptional1);
  305. const auto data2 = UnpackOptionalData(type2, isOptional2);
  306. if (data1->IsSameType(*data2)) {
  307. return isOptional1 ? type1 : type2;
  308. }
  309. MKQL_ENSURE(!
  310. ((NUdf::GetDataTypeInfo(*data1->GetDataSlot()).Features | NUdf::GetDataTypeInfo(*data2->GetDataSlot()).Features) & (NUdf::EDataTypeFeatures::DateType | NUdf::EDataTypeFeatures::TzDateType)),
  311. "Not same date types: " << *type1 << " and " << *type2
  312. );
  313. const auto data = NewDataType(MakeNumericDataSuperType(data1->GetSchemeType(), data2->GetSchemeType()));
  314. return isOptional1 || isOptional2 ? NewOptionalType(data) : data;
  315. }
  316. TType* TProgramBuilder::BuildArithmeticCommonType(TType* type1, TType* type2) {
  317. bool isOptional1, isOptional2;
  318. const auto data1 = UnpackOptionalData(type1, isOptional1);
  319. const auto data2 = UnpackOptionalData(type2, isOptional2);
  320. const auto features1 = NUdf::GetDataTypeInfo(*data1->GetDataSlot()).Features;
  321. const auto features2 = NUdf::GetDataTypeInfo(*data2->GetDataSlot()).Features;
  322. const bool isOptional = isOptional1 || isOptional2;
  323. if (features1 & features2 & NUdf::EDataTypeFeatures::TimeIntervalType) {
  324. return NewOptionalType(features1 & NUdf::EDataTypeFeatures::BigDateType ? data1 : data2);
  325. } else if (features1 & NUdf::EDataTypeFeatures::TimeIntervalType) {
  326. return NewOptionalType(features2 & NUdf::EDataTypeFeatures::IntegralType ? data1 : data2);
  327. } else if (features2 & NUdf::EDataTypeFeatures::TimeIntervalType) {
  328. return NewOptionalType(features1 & NUdf::EDataTypeFeatures::IntegralType ? data2 : data1);
  329. } else if (
  330. features1 & (NUdf::EDataTypeFeatures::DateType | NUdf::EDataTypeFeatures::TzDateType) &&
  331. features2 & (NUdf::EDataTypeFeatures::DateType | NUdf::EDataTypeFeatures::TzDateType)
  332. ) {
  333. const auto used = ((features1 | features2) & NUdf::EDataTypeFeatures::BigDateType)
  334. ? NewDataType(NUdf::EDataSlot::Interval64)
  335. : NewDataType(NUdf::EDataSlot::Interval);
  336. return isOptional ? NewOptionalType(used) : used;
  337. } else if (data1->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id) {
  338. MKQL_ENSURE(data1->IsSameType(*data2), "Must be same type.");
  339. return isOptional ? NewOptionalType(data1) : data2;
  340. }
  341. const auto data = NewDataType(MakeNumericDataSuperType(data1->GetSchemeType(), data2->GetSchemeType()));
  342. return isOptional ? NewOptionalType(data) : data;
  343. }
  344. TRuntimeNode TProgramBuilder::Arg(TType* type) const {
  345. TCallableBuilder builder(Env, __func__, type, true);
  346. return TRuntimeNode(builder.Build(), false);
  347. }
  348. TRuntimeNode TProgramBuilder::WideFlowArg(TType* type) const {
  349. if constexpr (RuntimeVersion < 18U) {
  350. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  351. }
  352. TCallableBuilder builder(Env, __func__, type, true);
  353. return TRuntimeNode(builder.Build(), false);
  354. }
  355. TRuntimeNode TProgramBuilder::Member(TRuntimeNode structObj, const std::string_view& memberName) {
  356. bool isOptional;
  357. const auto type = AS_TYPE(TStructType, UnpackOptional(structObj.GetStaticType(), isOptional));
  358. const auto memberIndex = type->GetMemberIndex(memberName);
  359. auto memberType = type->GetMemberType(memberIndex);
  360. if (isOptional && !memberType->IsOptional() && !memberType->IsNull() && !memberType->IsPg()) {
  361. memberType = NewOptionalType(memberType);
  362. }
  363. TCallableBuilder callableBuilder(Env, __func__, memberType);
  364. callableBuilder.Add(structObj);
  365. callableBuilder.Add(NewDataLiteral<ui32>(memberIndex));
  366. return TRuntimeNode(callableBuilder.Build(), false);
  367. }
  368. TRuntimeNode TProgramBuilder::Element(TRuntimeNode structObj, const std::string_view& memberName) {
  369. return Member(structObj, memberName);
  370. }
  371. TRuntimeNode TProgramBuilder::AddMember(TRuntimeNode structObj, const std::string_view& memberName, TRuntimeNode memberValue) {
  372. auto oldType = structObj.GetStaticType();
  373. MKQL_ENSURE(oldType->IsStruct(), "Expected struct");
  374. const auto& oldTypeDetailed = static_cast<const TStructType&>(*oldType);
  375. TStructTypeBuilder newTypeBuilder(Env);
  376. newTypeBuilder.Reserve(oldTypeDetailed.GetMembersCount() + 1);
  377. for (ui32 i = 0, e = oldTypeDetailed.GetMembersCount(); i < e; ++i) {
  378. newTypeBuilder.Add(oldTypeDetailed.GetMemberName(i), oldTypeDetailed.GetMemberType(i));
  379. }
  380. newTypeBuilder.Add(memberName, memberValue.GetStaticType());
  381. auto newType = newTypeBuilder.Build();
  382. for (ui32 i = 0, e = newType->GetMembersCount(); i < e; ++i) {
  383. if (newType->GetMemberName(i) == memberName) {
  384. // insert at position i in the struct
  385. TCallableBuilder callableBuilder(Env, __func__, newType);
  386. callableBuilder.Add(structObj);
  387. callableBuilder.Add(memberValue);
  388. callableBuilder.Add(NewDataLiteral<ui32>(i));
  389. return TRuntimeNode(callableBuilder.Build(), false);
  390. }
  391. }
  392. Y_ABORT();
  393. }
  394. TRuntimeNode TProgramBuilder::RemoveMember(TRuntimeNode structObj, const std::string_view& memberName, bool forced) {
  395. auto oldType = structObj.GetStaticType();
  396. MKQL_ENSURE(oldType->IsStruct(), "Expected struct");
  397. const auto& oldTypeDetailed = static_cast<const TStructType&>(*oldType);
  398. MKQL_ENSURE(oldTypeDetailed.GetMembersCount() > 0, "Expected non-empty struct");
  399. TStructTypeBuilder newTypeBuilder(Env);
  400. newTypeBuilder.Reserve(oldTypeDetailed.GetMembersCount() - 1);
  401. std::optional<ui32> memberIndex;
  402. for (ui32 i = 0, e = oldTypeDetailed.GetMembersCount(); i < e; ++i) {
  403. if (oldTypeDetailed.GetMemberName(i) != memberName) {
  404. newTypeBuilder.Add(oldTypeDetailed.GetMemberName(i), oldTypeDetailed.GetMemberType(i));
  405. }
  406. else {
  407. memberIndex = i;
  408. }
  409. }
  410. if (!memberIndex && forced) {
  411. return structObj;
  412. }
  413. MKQL_ENSURE(memberIndex, "Unknown member name: " << memberName);
  414. // remove at position i in the struct
  415. auto newType = newTypeBuilder.Build();
  416. TCallableBuilder callableBuilder(Env, __func__, newType);
  417. callableBuilder.Add(structObj);
  418. callableBuilder.Add(NewDataLiteral<ui32>(*memberIndex));
  419. return TRuntimeNode(callableBuilder.Build(), false);
  420. }
  421. TRuntimeNode TProgramBuilder::Zip(const TArrayRef<const TRuntimeNode>& lists) {
  422. if (lists.empty()) {
  423. return NewEmptyList(Env.GetEmptyTupleLazy()->GetGenericType());
  424. }
  425. std::vector<TType*> tupleTypes;
  426. tupleTypes.reserve(lists.size());
  427. for (auto& list : lists) {
  428. if (list.GetStaticType()->IsEmptyList()) {
  429. tupleTypes.push_back(Env.GetTypeOfVoidLazy());
  430. continue;
  431. }
  432. AS_TYPE(TListType, list.GetStaticType());
  433. auto itemType = static_cast<const TListType&>(*list.GetStaticType()).GetItemType();
  434. tupleTypes.push_back(itemType);
  435. }
  436. auto returnType = TListType::Create(TTupleType::Create(tupleTypes.size(), tupleTypes.data(), Env), Env);
  437. TCallableBuilder callableBuilder(Env, __func__, returnType);
  438. for (auto& list : lists) {
  439. callableBuilder.Add(list);
  440. }
  441. return TRuntimeNode(callableBuilder.Build(), false);
  442. }
  443. TRuntimeNode TProgramBuilder::ZipAll(const TArrayRef<const TRuntimeNode>& lists) {
  444. if (lists.empty()) {
  445. return NewEmptyList(Env.GetEmptyTupleLazy()->GetGenericType());
  446. }
  447. std::vector<TType*> tupleTypes;
  448. tupleTypes.reserve(lists.size());
  449. for (auto& list : lists) {
  450. if (list.GetStaticType()->IsEmptyList()) {
  451. tupleTypes.push_back(TOptionalType::Create(Env.GetTypeOfVoidLazy(), Env));
  452. continue;
  453. }
  454. AS_TYPE(TListType, list.GetStaticType());
  455. auto itemType = static_cast<const TListType&>(*list.GetStaticType()).GetItemType();
  456. tupleTypes.push_back(TOptionalType::Create(itemType, Env));
  457. }
  458. auto returnType = TListType::Create(TTupleType::Create(tupleTypes.size(), tupleTypes.data(), Env), Env);
  459. TCallableBuilder callableBuilder(Env, __func__, returnType);
  460. for (auto& list : lists) {
  461. callableBuilder.Add(list);
  462. }
  463. return TRuntimeNode(callableBuilder.Build(), false);
  464. }
  465. TRuntimeNode TProgramBuilder::Enumerate(TRuntimeNode list, TRuntimeNode start, TRuntimeNode step) {
  466. const auto itemType = AS_TYPE(TListType, list.GetStaticType())->GetItemType();
  467. ThrowIfListOfVoid(itemType);
  468. MKQL_ENSURE(AS_TYPE(TDataType, start)->GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected Uint64 as start");
  469. MKQL_ENSURE(AS_TYPE(TDataType, step)->GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected Uint64 as step");
  470. const std::array<TType*, 2U> tupleTypes = {{ NewDataType(NUdf::EDataSlot::Uint64), itemType }};
  471. const auto returnType = NewListType(NewTupleType(tupleTypes));
  472. TCallableBuilder callableBuilder(Env, __func__, returnType);
  473. callableBuilder.Add(list);
  474. callableBuilder.Add(start);
  475. callableBuilder.Add(step);
  476. return TRuntimeNode(callableBuilder.Build(), false);
  477. }
  478. TRuntimeNode TProgramBuilder::Enumerate(TRuntimeNode list) {
  479. return TProgramBuilder::Enumerate(list, NewDataLiteral<ui64>(0), NewDataLiteral<ui64>(1));
  480. }
  481. TRuntimeNode TProgramBuilder::Fold(TRuntimeNode list, TRuntimeNode state, const TBinaryLambda& handler) {
  482. const auto itemType = AS_TYPE(TListType, list.GetStaticType())->GetItemType();
  483. ThrowIfListOfVoid(itemType);
  484. const auto stateNodeArg = Arg(state.GetStaticType());
  485. const auto itemArg = Arg(itemType);
  486. const auto newState = handler(itemArg, stateNodeArg);
  487. MKQL_ENSURE(newState.GetStaticType()->IsSameType(*state.GetStaticType()), "State type is changed by the handler");
  488. TCallableBuilder callableBuilder(Env, __func__, state.GetStaticType());
  489. callableBuilder.Add(list);
  490. callableBuilder.Add(state);
  491. callableBuilder.Add(itemArg);
  492. callableBuilder.Add(stateNodeArg);
  493. callableBuilder.Add(newState);
  494. return TRuntimeNode(callableBuilder.Build(), false);
  495. }
  496. TRuntimeNode TProgramBuilder::Fold1(TRuntimeNode list, const TUnaryLambda& init, const TBinaryLambda& handler) {
  497. const auto itemType = AS_TYPE(TListType, list.GetStaticType())->GetItemType();
  498. ThrowIfListOfVoid(itemType);
  499. const auto itemArg = Arg(itemType);
  500. const auto initState = init(itemArg);
  501. const auto stateNodeArg = Arg(initState.GetStaticType());
  502. const auto newState = handler(itemArg, stateNodeArg);
  503. MKQL_ENSURE(newState.GetStaticType()->IsSameType(*initState.GetStaticType()), "State type is changed by the handler");
  504. TCallableBuilder callableBuilder(Env, __func__, NewOptionalType(newState.GetStaticType()));
  505. callableBuilder.Add(list);
  506. callableBuilder.Add(itemArg);
  507. callableBuilder.Add(initState);
  508. callableBuilder.Add(stateNodeArg);
  509. callableBuilder.Add(newState);
  510. return TRuntimeNode(callableBuilder.Build(), false);
  511. }
  512. TRuntimeNode TProgramBuilder::Reduce(TRuntimeNode list, TRuntimeNode state1,
  513. const TBinaryLambda& handler1,
  514. const TUnaryLambda& handler2,
  515. TRuntimeNode state3,
  516. const TBinaryLambda& handler3) {
  517. const auto listType = list.GetStaticType();
  518. MKQL_ENSURE(listType->IsList() || listType->IsStream(), "Expected list or stream");
  519. const auto itemType = listType->IsList()?
  520. static_cast<const TListType&>(*listType).GetItemType():
  521. static_cast<const TStreamType&>(*listType).GetItemType();
  522. ThrowIfListOfVoid(itemType);
  523. const auto state1NodeArg = Arg(state1.GetStaticType());
  524. const auto state3NodeArg = Arg(state3.GetStaticType());
  525. const auto itemArg = Arg(itemType);
  526. const auto newState1 = handler1(itemArg, state1NodeArg);
  527. MKQL_ENSURE(newState1.GetStaticType()->IsSameType(*state1.GetStaticType()), "State 1 type is changed by the handler");
  528. const auto newState2 = handler2(state1NodeArg);
  529. TRuntimeNode itemState2Arg = Arg(newState2.GetStaticType());
  530. const auto newState3 = handler3(itemState2Arg, state3NodeArg);
  531. MKQL_ENSURE(newState3.GetStaticType()->IsSameType(*state3.GetStaticType()), "State 3 type is changed by the handler");
  532. TCallableBuilder callableBuilder(Env, __func__, newState3.GetStaticType());
  533. callableBuilder.Add(list);
  534. callableBuilder.Add(state1);
  535. callableBuilder.Add(state3);
  536. callableBuilder.Add(itemArg);
  537. callableBuilder.Add(state1NodeArg);
  538. callableBuilder.Add(newState1);
  539. callableBuilder.Add(newState2);
  540. callableBuilder.Add(itemState2Arg);
  541. callableBuilder.Add(state3NodeArg);
  542. callableBuilder.Add(newState3);
  543. return TRuntimeNode(callableBuilder.Build(), false);
  544. }
  545. TRuntimeNode TProgramBuilder::Condense(TRuntimeNode flow, TRuntimeNode state,
  546. const TBinaryLambda& switcher,
  547. const TBinaryLambda& handler, bool useCtx) {
  548. const auto flowType = flow.GetStaticType();
  549. if (flowType->IsList()) {
  550. // TODO: Native implementation for list.
  551. return Collect(Condense(ToFlow(flow), state, switcher, handler));
  552. }
  553. MKQL_ENSURE(flowType->IsFlow() || flowType->IsStream(), "Expected flow or stream.");
  554. const auto itemType = flowType->IsFlow() ?
  555. static_cast<const TFlowType&>(*flowType).GetItemType():
  556. static_cast<const TStreamType&>(*flowType).GetItemType();
  557. const auto itemArg = Arg(itemType);
  558. const auto stateArg = Arg(state.GetStaticType());
  559. const auto outSwitch = switcher(itemArg, stateArg);
  560. const auto newState = handler(itemArg, stateArg);
  561. MKQL_ENSURE(newState.GetStaticType()->IsSameType(*state.GetStaticType()), "State type is changed by the handler");
  562. TCallableBuilder callableBuilder(Env, __func__, flowType->IsFlow() ? NewFlowType(state.GetStaticType()) : NewStreamType(state.GetStaticType()));
  563. callableBuilder.Add(flow);
  564. callableBuilder.Add(state);
  565. callableBuilder.Add(itemArg);
  566. callableBuilder.Add(stateArg);
  567. callableBuilder.Add(outSwitch);
  568. callableBuilder.Add(newState);
  569. if (useCtx) {
  570. MKQL_ENSURE(RuntimeVersion >= 30U, "Too old runtime version");
  571. callableBuilder.Add(NewDataLiteral<bool>(useCtx));
  572. }
  573. return TRuntimeNode(callableBuilder.Build(), false);
  574. }
  575. TRuntimeNode TProgramBuilder::Condense1(TRuntimeNode flow, const TUnaryLambda& init,
  576. const TBinaryLambda& switcher,
  577. const TBinaryLambda& handler, bool useCtx) {
  578. const auto flowType = flow.GetStaticType();
  579. if (flowType->IsList()) {
  580. // TODO: Native implementation for list.
  581. return Collect(Condense1(ToFlow(flow), init, switcher, handler));
  582. }
  583. MKQL_ENSURE(flowType->IsFlow() || flowType->IsStream(), "Expected flow or stream.");
  584. const auto itemType = flowType->IsFlow() ?
  585. static_cast<const TFlowType&>(*flowType).GetItemType():
  586. static_cast<const TStreamType&>(*flowType).GetItemType();
  587. const auto itemArg = Arg(itemType);
  588. const auto initState = init(itemArg);
  589. const auto stateArg = Arg(initState.GetStaticType());
  590. const auto outSwitch = switcher(itemArg, stateArg);
  591. const auto newState = handler(itemArg, stateArg);
  592. MKQL_ENSURE(newState.GetStaticType()->IsSameType(*initState.GetStaticType()), "State type is changed by the handler");
  593. TCallableBuilder callableBuilder(Env, __func__, flowType->IsFlow() ? NewFlowType(newState.GetStaticType()) : NewStreamType(newState.GetStaticType()));
  594. callableBuilder.Add(flow);
  595. callableBuilder.Add(itemArg);
  596. callableBuilder.Add(initState);
  597. callableBuilder.Add(stateArg);
  598. callableBuilder.Add(outSwitch);
  599. callableBuilder.Add(newState);
  600. if (useCtx) {
  601. MKQL_ENSURE(RuntimeVersion >= 30U, "Too old runtime version");
  602. callableBuilder.Add(NewDataLiteral<bool>(useCtx));
  603. }
  604. return TRuntimeNode(callableBuilder.Build(), false);
  605. }
  606. TRuntimeNode TProgramBuilder::Squeeze(TRuntimeNode stream, TRuntimeNode state,
  607. const TBinaryLambda& handler,
  608. const TUnaryLambda& save,
  609. const TUnaryLambda& load) {
  610. const auto streamType = stream.GetStaticType();
  611. MKQL_ENSURE(streamType->IsStream(), "Expected stream");
  612. const auto& streamDetailedType = static_cast<const TStreamType&>(*streamType);
  613. const auto itemType = streamDetailedType.GetItemType();
  614. ThrowIfListOfVoid(itemType);
  615. const auto stateNodeArg = Arg(state.GetStaticType());
  616. const auto itemArg = Arg(itemType);
  617. const auto newState = handler(itemArg, stateNodeArg);
  618. MKQL_ENSURE(newState.GetStaticType()->IsSameType(*state.GetStaticType()), "State type is changed by the handler");
  619. TRuntimeNode saveArg, outSave, loadArg, outLoad;
  620. if (save && load) {
  621. outSave = save(saveArg = Arg(state.GetStaticType()));
  622. outLoad = load(loadArg = Arg(outSave.GetStaticType()));
  623. MKQL_ENSURE(outLoad.GetStaticType()->IsSameType(*state.GetStaticType()), "Loaded type is changed by the load handler");
  624. } else {
  625. saveArg = outSave = loadArg = outLoad = NewVoid();
  626. }
  627. TCallableBuilder callableBuilder(Env, __func__, TStreamType::Create(state.GetStaticType(), Env));
  628. callableBuilder.Add(stream);
  629. callableBuilder.Add(state);
  630. callableBuilder.Add(itemArg);
  631. callableBuilder.Add(stateNodeArg);
  632. callableBuilder.Add(newState);
  633. callableBuilder.Add(saveArg);
  634. callableBuilder.Add(outSave);
  635. callableBuilder.Add(loadArg);
  636. callableBuilder.Add(outLoad);
  637. return TRuntimeNode(callableBuilder.Build(), false);
  638. }
  639. TRuntimeNode TProgramBuilder::Squeeze1(TRuntimeNode stream, const TUnaryLambda& init,
  640. const TBinaryLambda& handler,
  641. const TUnaryLambda& save,
  642. const TUnaryLambda& load) {
  643. const auto streamType = stream.GetStaticType();
  644. MKQL_ENSURE(streamType->IsStream(), "Expected stream");
  645. const auto& streamDetailedType = static_cast<const TStreamType&>(*streamType);
  646. const auto itemType = streamDetailedType.GetItemType();
  647. ThrowIfListOfVoid(itemType);
  648. const auto itemArg = Arg(itemType);
  649. const auto initState = init(itemArg);
  650. const auto stateNodeArg = Arg(initState.GetStaticType());
  651. const auto newState = handler(itemArg, stateNodeArg);
  652. MKQL_ENSURE(newState.GetStaticType()->IsSameType(*initState.GetStaticType()), "State type is changed by the handler");
  653. TRuntimeNode saveArg, outSave, loadArg, outLoad;
  654. if (save && load) {
  655. outSave = save(saveArg = Arg(initState.GetStaticType()));
  656. outLoad = load(loadArg = Arg(outSave.GetStaticType()));
  657. MKQL_ENSURE(outLoad.GetStaticType()->IsSameType(*initState.GetStaticType()), "Loaded type is changed by the load handler");
  658. } else {
  659. saveArg = outSave = loadArg = outLoad = NewVoid();
  660. }
  661. TCallableBuilder callableBuilder(Env, __func__, NewStreamType(newState.GetStaticType()));
  662. callableBuilder.Add(stream);
  663. callableBuilder.Add(itemArg);
  664. callableBuilder.Add(initState);
  665. callableBuilder.Add(stateNodeArg);
  666. callableBuilder.Add(newState);
  667. callableBuilder.Add(saveArg);
  668. callableBuilder.Add(outSave);
  669. callableBuilder.Add(loadArg);
  670. callableBuilder.Add(outLoad);
  671. return TRuntimeNode(callableBuilder.Build(), false);
  672. }
  673. TRuntimeNode TProgramBuilder::Discard(TRuntimeNode stream) {
  674. const auto streamType = stream.GetStaticType();
  675. MKQL_ENSURE(streamType->IsStream() || streamType->IsFlow(), "Expected stream or flow.");
  676. TCallableBuilder callableBuilder(Env, __func__, streamType);
  677. callableBuilder.Add(stream);
  678. return TRuntimeNode(callableBuilder.Build(), false);
  679. }
  680. TRuntimeNode TProgramBuilder::Map(TRuntimeNode list, const TUnaryLambda& handler) {
  681. return BuildMap(__func__, list, handler);
  682. }
  683. TRuntimeNode TProgramBuilder::OrderedMap(TRuntimeNode list, const TUnaryLambda& handler) {
  684. return BuildMap(__func__, list, handler);
  685. }
  686. TRuntimeNode TProgramBuilder::MapNext(TRuntimeNode list, const TBinaryLambda& handler) {
  687. const auto listType = list.GetStaticType();
  688. MKQL_ENSURE(listType->IsStream() || listType->IsFlow(), "Expected stream or flow");
  689. const auto itemType = listType->IsFlow() ?
  690. AS_TYPE(TFlowType, listType)->GetItemType():
  691. AS_TYPE(TStreamType, listType)->GetItemType();
  692. ThrowIfListOfVoid(itemType);
  693. TType* nextItemType = TOptionalType::Create(itemType, Env);
  694. const auto itemArg = Arg(itemType);
  695. const auto nextItemArg = Arg(nextItemType);
  696. const auto newItem = handler(itemArg, nextItemArg);
  697. const auto resultListType = listType->IsFlow() ?
  698. (TType*)TFlowType::Create(newItem.GetStaticType(), Env):
  699. (TType*)TStreamType::Create(newItem.GetStaticType(), Env);
  700. TCallableBuilder callableBuilder(Env, __func__, resultListType);
  701. callableBuilder.Add(list);
  702. callableBuilder.Add(itemArg);
  703. callableBuilder.Add(nextItemArg);
  704. callableBuilder.Add(newItem);
  705. return TRuntimeNode(callableBuilder.Build(), false);
  706. }
  707. template <bool Ordered>
  708. TRuntimeNode TProgramBuilder::BuildExtract(TRuntimeNode list, const std::string_view& name) {
  709. const auto listType = list.GetStaticType();
  710. MKQL_ENSURE(listType->IsList() || listType->IsOptional(), "Expected list or optional.");
  711. const auto itemType = listType->IsList() ?
  712. AS_TYPE(TListType, listType)->GetItemType():
  713. AS_TYPE(TOptionalType, listType)->GetItemType();
  714. const auto lambda = [&](TRuntimeNode item) {
  715. return itemType->IsStruct() ? Member(item, name) : Nth(item, ::FromString<ui32>(name));
  716. };
  717. return Ordered ? OrderedMap(list, lambda) : Map(list, lambda);
  718. }
  719. TRuntimeNode TProgramBuilder::Extract(TRuntimeNode list, const std::string_view& name) {
  720. return BuildExtract<false>(list, name);
  721. }
  722. TRuntimeNode TProgramBuilder::OrderedExtract(TRuntimeNode list, const std::string_view& name) {
  723. return BuildExtract<true>(list, name);
  724. }
  725. TRuntimeNode TProgramBuilder::ChainMap(TRuntimeNode list, TRuntimeNode state, const TBinaryLambda& handler) {
  726. return ChainMap(list, state, [&](TRuntimeNode item, TRuntimeNode state) -> TRuntimeNodePair {
  727. const auto result = handler(item, state);
  728. return {result, result};
  729. });
  730. }
  731. TRuntimeNode TProgramBuilder::ChainMap(TRuntimeNode list, TRuntimeNode state, const TBinarySplitLambda& handler) {
  732. const auto listType = list.GetStaticType();
  733. MKQL_ENSURE(listType->IsFlow() || listType->IsList() || listType->IsStream(), "Expected flow, list or stream");
  734. const auto itemType = listType->IsFlow() ?
  735. AS_TYPE(TFlowType, listType)->GetItemType():
  736. listType->IsList() ?
  737. AS_TYPE(TListType, listType)->GetItemType():
  738. AS_TYPE(TStreamType, listType)->GetItemType();
  739. ThrowIfListOfVoid(itemType);
  740. const auto stateNodeArg = Arg(state.GetStaticType());
  741. const auto itemArg = Arg(itemType);
  742. const auto newItemAndState = handler(itemArg, stateNodeArg);
  743. MKQL_ENSURE(std::get<1U>(newItemAndState).GetStaticType()->IsSameType(*state.GetStaticType()), "State type is changed by the handler");
  744. const auto resultItemType = std::get<0U>(newItemAndState).GetStaticType();
  745. TType* resultListType = nullptr;
  746. if (listType->IsFlow()) {
  747. resultListType = TFlowType::Create(resultItemType, Env);
  748. } else if (listType->IsList()) {
  749. resultListType = TListType::Create(resultItemType, Env);
  750. } else if (listType->IsStream()) {
  751. resultListType = TStreamType::Create(resultItemType, Env);
  752. }
  753. TCallableBuilder callableBuilder(Env, __func__, resultListType);
  754. callableBuilder.Add(list);
  755. callableBuilder.Add(state);
  756. callableBuilder.Add(itemArg);
  757. callableBuilder.Add(stateNodeArg);
  758. callableBuilder.Add(std::get<0U>(newItemAndState));
  759. callableBuilder.Add(std::get<1U>(newItemAndState));
  760. return TRuntimeNode(callableBuilder.Build(), false);
  761. }
  762. TRuntimeNode TProgramBuilder::Chain1Map(TRuntimeNode list, const TUnaryLambda& init, const TBinaryLambda& handler) {
  763. return Chain1Map(list,
  764. [&](TRuntimeNode item) -> TRuntimeNodePair {
  765. const auto result = init(item);
  766. return {result, result};
  767. },
  768. [&](TRuntimeNode item, TRuntimeNode state) -> TRuntimeNodePair {
  769. const auto result = handler(item, state);
  770. return {result, result};
  771. }
  772. );
  773. }
  774. TRuntimeNode TProgramBuilder::Chain1Map(TRuntimeNode list, const TUnarySplitLambda& init, const TBinarySplitLambda& handler) {
  775. const auto listType = list.GetStaticType();
  776. MKQL_ENSURE(listType->IsFlow() || listType->IsList() || listType->IsStream(), "Expected flow, list or stream");
  777. const auto itemType = listType->IsFlow() ?
  778. AS_TYPE(TFlowType, listType)->GetItemType():
  779. listType->IsList() ?
  780. AS_TYPE(TListType, listType)->GetItemType():
  781. AS_TYPE(TStreamType, listType)->GetItemType();
  782. ThrowIfListOfVoid(itemType);
  783. const auto itemArg = Arg(itemType);
  784. const auto initItemAndState = init(itemArg);
  785. const auto resultItemType = std::get<0U>(initItemAndState).GetStaticType();
  786. const auto stateType = std::get<1U>(initItemAndState).GetStaticType();;
  787. TType* resultListType = nullptr;
  788. if (listType->IsFlow()) {
  789. resultListType = TFlowType::Create(resultItemType, Env);
  790. } else if (listType->IsList()) {
  791. resultListType = TListType::Create(resultItemType, Env);
  792. } else if (listType->IsStream()) {
  793. resultListType = TStreamType::Create(resultItemType, Env);
  794. }
  795. const auto stateArg = Arg(stateType);
  796. const auto updateItemAndState = handler(itemArg, stateArg);
  797. MKQL_ENSURE(std::get<0U>(updateItemAndState).GetStaticType()->IsSameType(*resultItemType), "Item type is changed by the handler");
  798. MKQL_ENSURE(std::get<1U>(updateItemAndState).GetStaticType()->IsSameType(*stateType), "State type is changed by the handler");
  799. TCallableBuilder callableBuilder(Env, __func__, resultListType);
  800. callableBuilder.Add(list);
  801. callableBuilder.Add(itemArg);
  802. callableBuilder.Add(std::get<0U>(initItemAndState));
  803. callableBuilder.Add(std::get<1U>(initItemAndState));
  804. callableBuilder.Add(stateArg);
  805. callableBuilder.Add(std::get<0U>(updateItemAndState));
  806. callableBuilder.Add(std::get<1U>(updateItemAndState));
  807. return TRuntimeNode(callableBuilder.Build(), false);
  808. }
  809. TRuntimeNode TProgramBuilder::ToList(TRuntimeNode optional) {
  810. const auto optionalType = optional.GetStaticType();
  811. MKQL_ENSURE(optionalType->IsOptional(), "Expected optional");
  812. const auto& optionalDetailedType = static_cast<const TOptionalType&>(*optionalType);
  813. const auto itemType = optionalDetailedType.GetItemType();
  814. return IfPresent(optional, [&](TRuntimeNode item) { return AsList(item); }, NewEmptyList(itemType));
  815. }
  816. TRuntimeNode TProgramBuilder::Iterable(TZeroLambda lambda) {
  817. if constexpr (RuntimeVersion < 19U) {
  818. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  819. }
  820. const auto itemArg = Arg(NewNull().GetStaticType());
  821. auto lambdaRes = lambda();
  822. const auto resultType = NewListType(AS_TYPE(TStreamType, lambdaRes.GetStaticType())->GetItemType());
  823. TCallableBuilder callableBuilder(Env, __func__, resultType);
  824. callableBuilder.Add(lambdaRes);
  825. callableBuilder.Add(itemArg);
  826. return TRuntimeNode(callableBuilder.Build(), false);
  827. }
  828. TRuntimeNode TProgramBuilder::ToOptional(TRuntimeNode list) {
  829. return Head(list);
  830. }
  831. TRuntimeNode TProgramBuilder::Head(TRuntimeNode list) {
  832. const auto resultType = NewOptionalType(AS_TYPE(TListType, list.GetStaticType())->GetItemType());
  833. TCallableBuilder callableBuilder(Env, __func__, resultType);
  834. callableBuilder.Add(list);
  835. return TRuntimeNode(callableBuilder.Build(), false);
  836. }
  837. TRuntimeNode TProgramBuilder::Last(TRuntimeNode list) {
  838. const auto resultType = NewOptionalType(AS_TYPE(TListType, list.GetStaticType())->GetItemType());
  839. TCallableBuilder callableBuilder(Env, __func__, resultType);
  840. callableBuilder.Add(list);
  841. return TRuntimeNode(callableBuilder.Build(), false);
  842. }
  843. TRuntimeNode TProgramBuilder::Nanvl(TRuntimeNode data, TRuntimeNode dataIfNaN) {
  844. const std::array<TRuntimeNode, 2> args = {{ data, dataIfNaN }};
  845. return Invoke(__func__, BuildArithmeticCommonType(data.GetStaticType(), dataIfNaN.GetStaticType()), args);
  846. }
  847. TRuntimeNode TProgramBuilder::FlatMap(TRuntimeNode list, const TUnaryLambda& handler)
  848. {
  849. return BuildFlatMap(__func__, list, handler);
  850. }
  851. TRuntimeNode TProgramBuilder::OrderedFlatMap(TRuntimeNode list, const TUnaryLambda& handler)
  852. {
  853. return BuildFlatMap(__func__, list, handler);
  854. }
  855. TRuntimeNode TProgramBuilder::Filter(TRuntimeNode list, const TUnaryLambda& handler)
  856. {
  857. return BuildFilter(__func__, list, handler);
  858. }
  859. TRuntimeNode TProgramBuilder::Filter(TRuntimeNode list, TRuntimeNode limit, const TUnaryLambda& handler)
  860. {
  861. return BuildFilter(__func__, list, limit, handler);
  862. }
  863. TRuntimeNode TProgramBuilder::OrderedFilter(TRuntimeNode list, const TUnaryLambda& handler)
  864. {
  865. return BuildFilter(__func__, list, handler);
  866. }
  867. TRuntimeNode TProgramBuilder::OrderedFilter(TRuntimeNode list, TRuntimeNode limit, const TUnaryLambda& handler)
  868. {
  869. return BuildFilter(__func__, list, limit, handler);
  870. }
  871. TRuntimeNode TProgramBuilder::TakeWhile(TRuntimeNode list, const TUnaryLambda& handler)
  872. {
  873. return BuildFilter(__func__, list, handler);
  874. }
  875. TRuntimeNode TProgramBuilder::SkipWhile(TRuntimeNode list, const TUnaryLambda& handler)
  876. {
  877. return BuildFilter(__func__, list, handler);
  878. }
  879. TRuntimeNode TProgramBuilder::TakeWhileInclusive(TRuntimeNode list, const TUnaryLambda& handler)
  880. {
  881. return BuildFilter(__func__, list, handler);
  882. }
  883. TRuntimeNode TProgramBuilder::SkipWhileInclusive(TRuntimeNode list, const TUnaryLambda& handler)
  884. {
  885. return BuildFilter(__func__, list, handler);
  886. }
  887. TRuntimeNode TProgramBuilder::BuildListSort(const std::string_view& callableName, TRuntimeNode list, TRuntimeNode ascending,
  888. const TUnaryLambda& keyExtractor)
  889. {
  890. const auto listType = list.GetStaticType();
  891. MKQL_ENSURE(listType->IsList(), "Expected list.");
  892. const auto itemType = static_cast<const TListType&>(*listType).GetItemType();
  893. ThrowIfListOfVoid(itemType);
  894. const auto ascendingType = ascending.GetStaticType();
  895. const auto itemArg = Arg(itemType);
  896. auto key = keyExtractor(itemArg);
  897. if (ascendingType->IsTuple()) {
  898. const auto ascendingTuple = AS_TYPE(TTupleType, ascendingType);
  899. if (ascendingTuple->GetElementsCount() == 0) {
  900. return list;
  901. }
  902. if (ascendingTuple->GetElementsCount() == 1) {
  903. ascending = Nth(ascending, 0);
  904. key = Nth(key, 0);
  905. }
  906. }
  907. TCallableBuilder callableBuilder(Env, callableName, listType);
  908. callableBuilder.Add(list);
  909. callableBuilder.Add(itemArg);
  910. callableBuilder.Add(key);
  911. callableBuilder.Add(ascending);
  912. return TRuntimeNode(callableBuilder.Build(), false);
  913. }
  914. TRuntimeNode TProgramBuilder::BuildListNth(const std::string_view& callableName, TRuntimeNode list, TRuntimeNode n, TRuntimeNode ascending,
  915. const TUnaryLambda& keyExtractor)
  916. {
  917. const auto listType = list.GetStaticType();
  918. MKQL_ENSURE(listType->IsList(), "Expected list.");
  919. const auto itemType = static_cast<const TListType&>(*listType).GetItemType();
  920. ThrowIfListOfVoid(itemType);
  921. MKQL_ENSURE(n.GetStaticType()->IsData(), "Expected data");
  922. MKQL_ENSURE(static_cast<const TDataType&>(*n.GetStaticType()).GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64");
  923. const auto ascendingType = ascending.GetStaticType();
  924. const auto itemArg = Arg(itemType);
  925. auto key = keyExtractor(itemArg);
  926. if (ascendingType->IsTuple()) {
  927. const auto ascendingTuple = AS_TYPE(TTupleType, ascendingType);
  928. if (ascendingTuple->GetElementsCount() == 0) {
  929. return Take(list, n);
  930. }
  931. if (ascendingTuple->GetElementsCount() == 1) {
  932. ascending = Nth(ascending, 0);
  933. key = Nth(key, 0);
  934. }
  935. }
  936. TCallableBuilder callableBuilder(Env, callableName, listType);
  937. callableBuilder.Add(list);
  938. callableBuilder.Add(n);
  939. callableBuilder.Add(itemArg);
  940. callableBuilder.Add(key);
  941. callableBuilder.Add(ascending);
  942. return TRuntimeNode(callableBuilder.Build(), false);
  943. }
  944. TRuntimeNode TProgramBuilder::BuildSort(const std::string_view& callableName, TRuntimeNode flow, TRuntimeNode ascending,
  945. const TUnaryLambda& keyExtractor)
  946. {
  947. if (const auto flowType = flow.GetStaticType(); flowType->IsFlow() || flowType->IsStream()) {
  948. const bool newVersion = RuntimeVersion >= 25U && flowType->IsFlow();
  949. const auto condense = newVersion ?
  950. SqueezeToList(Map(flow, [&](TRuntimeNode item) { return Pickle(item); }), NewEmptyOptionalDataLiteral(NUdf::TDataType<ui64>::Id)) :
  951. Condense1(flow,
  952. [this](TRuntimeNode item) { return AsList(item); },
  953. [this](TRuntimeNode, TRuntimeNode) { return NewDataLiteral<bool>(false); },
  954. [this](TRuntimeNode item, TRuntimeNode state) { return Append(state, item); }
  955. );
  956. const auto finalKeyExtractor = newVersion ? [&](TRuntimeNode item) {
  957. auto itemType = AS_TYPE(TFlowType, flowType)->GetItemType();
  958. return keyExtractor(Unpickle(itemType, item));
  959. } : keyExtractor;
  960. return FlatMap(condense, [&](TRuntimeNode list) {
  961. auto stealed = RuntimeVersion >= 27U ? Steal(list) : list;
  962. auto sorted = BuildSort(RuntimeVersion >= 26U ? "UnstableSort" : callableName, stealed, ascending, finalKeyExtractor);
  963. return newVersion ? Map(LazyList(sorted), [&](TRuntimeNode item) {
  964. auto itemType = AS_TYPE(TFlowType, flowType)->GetItemType();
  965. return Unpickle(itemType, item);
  966. }) : sorted;
  967. });
  968. }
  969. return BuildListSort(callableName, flow, ascending, keyExtractor);
  970. }
  971. TRuntimeNode TProgramBuilder::BuildNth(const std::string_view& callableName, TRuntimeNode flow, TRuntimeNode n, TRuntimeNode ascending,
  972. const TUnaryLambda& keyExtractor)
  973. {
  974. if (const auto flowType = flow.GetStaticType(); flowType->IsFlow() || flowType->IsStream()) {
  975. return FlatMap(Condense1(flow,
  976. [this](TRuntimeNode item) { return AsList(item); },
  977. [this](TRuntimeNode, TRuntimeNode) { return NewDataLiteral<bool>(false); },
  978. [this](TRuntimeNode item, TRuntimeNode state) { return Append(state, item); }
  979. ),
  980. [&](TRuntimeNode list) { return BuildNth(callableName, list, n, ascending, keyExtractor); }
  981. );
  982. }
  983. return BuildListNth(callableName, flow, n, ascending, keyExtractor);
  984. }
  985. TRuntimeNode TProgramBuilder::BuildTake(const std::string_view& callableName, TRuntimeNode flow, TRuntimeNode count) {
  986. const auto listType = flow.GetStaticType();
  987. TType* itemType = nullptr;
  988. if (listType->IsFlow()) {
  989. itemType = AS_TYPE(TFlowType, listType)->GetItemType();
  990. } else if (listType->IsList()) {
  991. itemType = AS_TYPE(TListType, listType)->GetItemType();
  992. } else if (listType->IsStream()) {
  993. itemType = AS_TYPE(TStreamType, listType)->GetItemType();
  994. }
  995. MKQL_ENSURE(itemType, "Expected flow, list or stream.");
  996. ThrowIfListOfVoid(itemType);
  997. MKQL_ENSURE(count.GetStaticType()->IsData(), "Expected data");
  998. MKQL_ENSURE(static_cast<const TDataType&>(*count.GetStaticType()).GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64");
  999. TCallableBuilder callableBuilder(Env, callableName, listType);
  1000. callableBuilder.Add(flow);
  1001. callableBuilder.Add(count);
  1002. return TRuntimeNode(callableBuilder.Build(), false);
  1003. }
  1004. template<bool IsFilter, bool OnStruct>
  1005. TRuntimeNode TProgramBuilder::BuildFilterNulls(TRuntimeNode list) {
  1006. const auto listType = list.GetStaticType();
  1007. TType* itemType;
  1008. if (listType->IsFlow()) {
  1009. itemType = AS_TYPE(TFlowType, listType)->GetItemType();
  1010. } else if (listType->IsList()) {
  1011. itemType = AS_TYPE(TListType, listType)->GetItemType();
  1012. } else if (listType->IsStream()) {
  1013. itemType = AS_TYPE(TStreamType, listType)->GetItemType();
  1014. } else if (listType->IsOptional()) {
  1015. itemType = AS_TYPE(TOptionalType, listType)->GetItemType();
  1016. } else {
  1017. THROW yexception() << "Expected flow or list or stream or optional of " << (OnStruct ? "struct." : "tuple.");
  1018. }
  1019. std::conditional_t<OnStruct, std::vector<std::pair<std::string_view, TType*>>, std::vector<TType*>> filteredItems;
  1020. std::vector<std::conditional_t<OnStruct, std::string_view, ui32>> members;
  1021. const bool multiOptional = CollectOptionalElements<IsFilter>(itemType, members, filteredItems);
  1022. const auto predicate = [=](TRuntimeNode item) {
  1023. std::vector<TRuntimeNode> checkMembers;
  1024. checkMembers.reserve(members.size());
  1025. std::transform(members.cbegin(), members.cend(), std::back_inserter(checkMembers),
  1026. [=](const auto& i){ return Exists(Element(item, i)); });
  1027. return And(checkMembers);
  1028. };
  1029. auto resultType = listType;
  1030. if constexpr (IsFilter) {
  1031. if (const auto filteredItemType = NewArrayType(filteredItems); multiOptional) {
  1032. return BuildFilterNulls<OnStruct>(list, members, filteredItems);
  1033. } else {
  1034. resultType = listType->IsFlow() ?
  1035. NewFlowType(filteredItemType):
  1036. listType->IsList() ?
  1037. NewListType(filteredItemType):
  1038. listType->IsStream() ? NewStreamType(filteredItemType) : NewOptionalType(filteredItemType);
  1039. }
  1040. }
  1041. return Filter(list, predicate, resultType);
  1042. }
  1043. template<bool IsFilter, bool OnStruct>
  1044. TRuntimeNode TProgramBuilder::BuildFilterNulls(TRuntimeNode list, const TArrayRef<std::conditional_t<OnStruct, const std::string_view, const ui32>>& members) {
  1045. if (members.empty()) {
  1046. return list;
  1047. }
  1048. const auto listType = list.GetStaticType();
  1049. TType* itemType;
  1050. if (listType->IsFlow()) {
  1051. itemType = AS_TYPE(TFlowType, listType)->GetItemType();
  1052. } else if (listType->IsList()) {
  1053. itemType = AS_TYPE(TListType, listType)->GetItemType();
  1054. } else if (listType->IsStream()) {
  1055. itemType = AS_TYPE(TStreamType, listType)->GetItemType();
  1056. } else if (listType->IsOptional()) {
  1057. itemType = AS_TYPE(TOptionalType, listType)->GetItemType();
  1058. } else {
  1059. THROW yexception() << "Expected flow or list or stream or optional of struct.";
  1060. }
  1061. const auto predicate = [=](TRuntimeNode item) {
  1062. TRuntimeNode::TList checkMembers;
  1063. checkMembers.reserve(members.size());
  1064. std::transform(members.cbegin(), members.cend(), std::back_inserter(checkMembers),
  1065. [=](const auto& i){ return Exists(Element(item, i)); });
  1066. return And(checkMembers);
  1067. };
  1068. auto resultType = listType;
  1069. if constexpr (IsFilter) {
  1070. if (std::conditional_t<OnStruct, std::vector<std::pair<std::string_view, TType*>>, std::vector<TType*>> filteredItems;
  1071. ReduceOptionalElements(itemType, members, filteredItems)) {
  1072. return BuildFilterNulls<OnStruct>(list, members, filteredItems);
  1073. } else {
  1074. const auto filteredItemType = NewArrayType(filteredItems);
  1075. resultType = listType->IsFlow() ?
  1076. NewFlowType(filteredItemType):
  1077. listType->IsList() ?
  1078. NewListType(filteredItemType):
  1079. listType->IsStream() ? NewStreamType(filteredItemType) : NewOptionalType(filteredItemType);
  1080. }
  1081. }
  1082. return Filter(list, predicate, resultType);
  1083. }
  1084. template<bool OnStruct>
  1085. TRuntimeNode TProgramBuilder::BuildFilterNulls(TRuntimeNode list, const TArrayRef<std::conditional_t<OnStruct, const std::string_view, const ui32>>& members,
  1086. const std::conditional_t<OnStruct, std::vector<std::pair<std::string_view, TType*>>, std::vector<TType*>>& filteredItems) {
  1087. return FlatMap(list, [&](TRuntimeNode item) {
  1088. TRuntimeNode::TList checkMembers;
  1089. checkMembers.reserve(members.size());
  1090. std::transform(members.cbegin(), members.cend(), std::back_inserter(checkMembers),
  1091. [=](const auto& i){ return Element(item, i); });
  1092. return IfPresent(checkMembers, [&](TRuntimeNode::TList items) {
  1093. std::conditional_t<OnStruct, std::vector<std::pair<std::string_view, TRuntimeNode>>, TRuntimeNode::TList> row;
  1094. row.reserve(filteredItems.size());
  1095. auto j = 0U;
  1096. if constexpr (OnStruct) {
  1097. std::transform(filteredItems.cbegin(), filteredItems.cend(), std::back_inserter(row),
  1098. [&](const std::pair<std::string_view, TType*>& i) {
  1099. const auto& member = i.first;
  1100. const bool passtrought = members.cend() == std::find(members.cbegin(), members.cend(), member);
  1101. return std::make_pair(member, passtrought ? Element(item, member) : items[j++]);
  1102. }
  1103. );
  1104. return NewOptional(NewStruct(row));
  1105. } else {
  1106. auto i = 0U;
  1107. std::generate_n(std::back_inserter(row), filteredItems.size(),
  1108. [&]() {
  1109. const auto index = i++;
  1110. const bool passtrought = members.cend() == std::find(members.cbegin(), members.cend(), index);
  1111. return passtrought ? Element(item, index) : items[j++];
  1112. }
  1113. );
  1114. return NewOptional(NewTuple(row));
  1115. }
  1116. }, NewEmptyOptional(NewOptionalType(NewArrayType(filteredItems))));
  1117. });
  1118. }
  1119. TRuntimeNode TProgramBuilder::SkipNullMembers(TRuntimeNode list) {
  1120. return BuildFilterNulls<false, true>(list);
  1121. }
  1122. TRuntimeNode TProgramBuilder::FilterNullMembers(TRuntimeNode list) {
  1123. return BuildFilterNulls<true, true>(list);
  1124. }
  1125. TRuntimeNode TProgramBuilder::SkipNullMembers(TRuntimeNode list, const TArrayRef<const std::string_view>& members) {
  1126. return BuildFilterNulls<false, true>(list, members);
  1127. }
  1128. TRuntimeNode TProgramBuilder::FilterNullMembers(TRuntimeNode list, const TArrayRef<const std::string_view>& members) {
  1129. return BuildFilterNulls<true, true>(list, members);
  1130. }
  1131. TRuntimeNode TProgramBuilder::FilterNullElements(TRuntimeNode list) {
  1132. return BuildFilterNulls<true, false>(list);
  1133. }
  1134. TRuntimeNode TProgramBuilder::SkipNullElements(TRuntimeNode list) {
  1135. return BuildFilterNulls<false, false>(list);
  1136. }
  1137. TRuntimeNode TProgramBuilder::FilterNullElements(TRuntimeNode list, const TArrayRef<const ui32>& elements) {
  1138. return BuildFilterNulls<true, false>(list, elements);
  1139. }
  1140. TRuntimeNode TProgramBuilder::SkipNullElements(TRuntimeNode list, const TArrayRef<const ui32>& elements) {
  1141. return BuildFilterNulls<false, false>(list, elements);
  1142. }
  1143. template <typename ResultType>
  1144. TRuntimeNode TProgramBuilder::BuildContainerProperty(const std::string_view& callableName, TRuntimeNode listOrDict) {
  1145. const auto type = listOrDict.GetStaticType();
  1146. MKQL_ENSURE(type->IsList() || type->IsDict() || type->IsEmptyList() || type->IsEmptyDict(), "Expected list or dict.");
  1147. if (type->IsList()) {
  1148. const auto itemType = AS_TYPE(TListType, type)->GetItemType();
  1149. ThrowIfListOfVoid(itemType);
  1150. }
  1151. TCallableBuilder callableBuilder(Env, callableName, NewDataType(NUdf::TDataType<ResultType>::Id));
  1152. callableBuilder.Add(listOrDict);
  1153. return TRuntimeNode(callableBuilder.Build(), false);
  1154. }
  1155. TRuntimeNode TProgramBuilder::Length(TRuntimeNode listOrDict) {
  1156. return BuildContainerProperty<ui64>(__func__, listOrDict);
  1157. }
  1158. TRuntimeNode TProgramBuilder::Iterator(TRuntimeNode list, const TArrayRef<const TRuntimeNode>& dependentNodes) {
  1159. const auto streamType = NewStreamType(AS_TYPE(TListType, list.GetStaticType())->GetItemType());
  1160. TCallableBuilder callableBuilder(Env, __func__, streamType);
  1161. callableBuilder.Add(list);
  1162. for (auto node : dependentNodes) {
  1163. callableBuilder.Add(node);
  1164. }
  1165. return TRuntimeNode(callableBuilder.Build(), false);
  1166. }
  1167. TRuntimeNode TProgramBuilder::EmptyIterator(TType* streamType) {
  1168. MKQL_ENSURE(streamType->IsStream() || streamType->IsFlow(), "Expected stream or flow.");
  1169. if (RuntimeVersion < 7U && streamType->IsFlow()) {
  1170. return ToFlow(EmptyIterator(NewStreamType(AS_TYPE(TFlowType, streamType)->GetItemType())));
  1171. }
  1172. TCallableBuilder callableBuilder(Env, __func__, streamType);
  1173. return TRuntimeNode(callableBuilder.Build(), false);
  1174. }
  1175. TRuntimeNode TProgramBuilder::Collect(TRuntimeNode flow) {
  1176. const auto seqType = flow.GetStaticType();
  1177. TType* itemType = nullptr;
  1178. if (seqType->IsFlow()) {
  1179. itemType = AS_TYPE(TFlowType, seqType)->GetItemType();
  1180. } else if (seqType->IsList()) {
  1181. itemType = AS_TYPE(TListType, seqType)->GetItemType();
  1182. } else if (seqType->IsStream()) {
  1183. itemType = AS_TYPE(TStreamType, seqType)->GetItemType();
  1184. } else {
  1185. THROW yexception() << "Expected flow, list or stream.";
  1186. }
  1187. TCallableBuilder callableBuilder(Env, __func__, NewListType(itemType));
  1188. callableBuilder.Add(flow);
  1189. return TRuntimeNode(callableBuilder.Build(), false);
  1190. }
  1191. TRuntimeNode TProgramBuilder::LazyList(TRuntimeNode list) {
  1192. const auto type = list.GetStaticType();
  1193. bool isOptional;
  1194. const auto listType = UnpackOptional(type, isOptional);
  1195. MKQL_ENSURE(listType->IsList(), "Expected list");
  1196. TCallableBuilder callableBuilder(Env, __func__, type);
  1197. callableBuilder.Add(list);
  1198. return TRuntimeNode(callableBuilder.Build(), false);
  1199. }
  1200. TRuntimeNode TProgramBuilder::ForwardList(TRuntimeNode stream) {
  1201. const auto type = stream.GetStaticType();
  1202. MKQL_ENSURE(type->IsStream() || type->IsFlow(), "Expected flow or stream.");
  1203. if constexpr (RuntimeVersion < 10U) {
  1204. if (type->IsFlow()) {
  1205. return ForwardList(FromFlow(stream));
  1206. }
  1207. }
  1208. TCallableBuilder callableBuilder(Env, __func__, NewListType(type->IsFlow() ? AS_TYPE(TFlowType, stream)->GetItemType() : AS_TYPE(TStreamType, stream)->GetItemType()));
  1209. callableBuilder.Add(stream);
  1210. return TRuntimeNode(callableBuilder.Build(), false);
  1211. }
  1212. TRuntimeNode TProgramBuilder::ToFlow(TRuntimeNode stream) {
  1213. const auto type = stream.GetStaticType();
  1214. MKQL_ENSURE(type->IsStream() || type->IsList() || type->IsOptional(), "Expected stream, list or optional.");
  1215. const auto itemType = type->IsStream() ? AS_TYPE(TStreamType, stream)->GetItemType() :
  1216. type->IsList() ? AS_TYPE(TListType, stream)->GetItemType() : AS_TYPE(TOptionalType, stream)->GetItemType();
  1217. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(itemType));
  1218. callableBuilder.Add(stream);
  1219. return TRuntimeNode(callableBuilder.Build(), false);
  1220. }
  1221. TRuntimeNode TProgramBuilder::FromFlow(TRuntimeNode flow) {
  1222. MKQL_ENSURE(flow.GetStaticType()->IsFlow(), "Expected flow.");
  1223. TCallableBuilder callableBuilder(Env, __func__, NewStreamType(AS_TYPE(TFlowType, flow)->GetItemType()));
  1224. callableBuilder.Add(flow);
  1225. return TRuntimeNode(callableBuilder.Build(), false);
  1226. }
  1227. TRuntimeNode TProgramBuilder::Steal(TRuntimeNode input) {
  1228. if constexpr (RuntimeVersion < 27U) {
  1229. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  1230. }
  1231. TCallableBuilder callableBuilder(Env, __func__, input.GetStaticType(), true);
  1232. callableBuilder.Add(input);
  1233. return TRuntimeNode(callableBuilder.Build(), false);
  1234. }
  1235. TRuntimeNode TProgramBuilder::ToBlocks(TRuntimeNode flow) {
  1236. auto* flowType = AS_TYPE(TFlowType, flow.GetStaticType());
  1237. auto* blockType = NewBlockType(flowType->GetItemType(), TBlockType::EShape::Many);
  1238. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(blockType));
  1239. callableBuilder.Add(flow);
  1240. return TRuntimeNode(callableBuilder.Build(), false);
  1241. }
  1242. TRuntimeNode TProgramBuilder::WideToBlocks(TRuntimeNode flow) {
  1243. TType* outputItemType;
  1244. {
  1245. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  1246. std::vector<TType*> outputItems;
  1247. outputItems.reserve(wideComponents.size());
  1248. for (size_t i = 0; i < wideComponents.size(); ++i) {
  1249. outputItems.push_back(NewBlockType(wideComponents[i], TBlockType::EShape::Many));
  1250. }
  1251. outputItems.push_back(NewBlockType(NewDataType(NUdf::TDataType<ui64>::Id), TBlockType::EShape::Scalar));
  1252. outputItemType = NewMultiType(outputItems);
  1253. }
  1254. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(outputItemType));
  1255. callableBuilder.Add(flow);
  1256. return TRuntimeNode(callableBuilder.Build(), false);
  1257. }
  1258. TRuntimeNode TProgramBuilder::FromBlocks(TRuntimeNode flow) {
  1259. auto* flowType = AS_TYPE(TFlowType, flow.GetStaticType());
  1260. auto* blockType = AS_TYPE(TBlockType, flowType->GetItemType());
  1261. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(blockType->GetItemType()));
  1262. callableBuilder.Add(flow);
  1263. return TRuntimeNode(callableBuilder.Build(), false);
  1264. }
  1265. TRuntimeNode TProgramBuilder::WideFromBlocks(TRuntimeNode flow) {
  1266. auto outputItems = ValidateBlockFlowType(flow.GetStaticType());
  1267. outputItems.pop_back();
  1268. TType* outputMultiType = NewMultiType(outputItems);
  1269. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(outputMultiType));
  1270. callableBuilder.Add(flow);
  1271. return TRuntimeNode(callableBuilder.Build(), false);
  1272. }
  1273. TRuntimeNode TProgramBuilder::WideSkipBlocks(TRuntimeNode flow, TRuntimeNode count) {
  1274. return BuildWideSkipTakeBlocks(__func__, flow, count);
  1275. }
  1276. TRuntimeNode TProgramBuilder::WideTakeBlocks(TRuntimeNode flow, TRuntimeNode count) {
  1277. return BuildWideSkipTakeBlocks(__func__, flow, count);
  1278. }
  1279. TRuntimeNode TProgramBuilder::WideTopBlocks(TRuntimeNode flow, TRuntimeNode count, const std::vector<std::pair<ui32, TRuntimeNode>>& keys) {
  1280. return BuildWideTopOrSort(__func__, flow, count, keys);
  1281. }
  1282. TRuntimeNode TProgramBuilder::WideTopSortBlocks(TRuntimeNode flow, TRuntimeNode count, const std::vector<std::pair<ui32, TRuntimeNode>>& keys) {
  1283. return BuildWideTopOrSort(__func__, flow, count, keys);
  1284. }
  1285. TRuntimeNode TProgramBuilder::WideSortBlocks(TRuntimeNode flow, const std::vector<std::pair<ui32, TRuntimeNode>>& keys) {
  1286. return BuildWideTopOrSort(__func__, flow, Nothing(), keys);
  1287. }
  1288. TRuntimeNode TProgramBuilder::AsScalar(TRuntimeNode value) {
  1289. TCallableBuilder callableBuilder(Env, __func__, NewBlockType(value.GetStaticType(), TBlockType::EShape::Scalar));
  1290. callableBuilder.Add(value);
  1291. return TRuntimeNode(callableBuilder.Build(), false);
  1292. }
  1293. TRuntimeNode TProgramBuilder::ReplicateScalar(TRuntimeNode value, TRuntimeNode count) {
  1294. if constexpr (RuntimeVersion < 43U) {
  1295. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  1296. }
  1297. auto valueType = AS_TYPE(TBlockType, value.GetStaticType());
  1298. auto countType = AS_TYPE(TBlockType, count.GetStaticType());
  1299. MKQL_ENSURE(valueType->GetShape() == TBlockType::EShape::Scalar, "Expecting scalar as first arguemnt");
  1300. MKQL_ENSURE(countType->GetShape() == TBlockType::EShape::Scalar, "Expecting scalar as second arguemnt");
  1301. MKQL_ENSURE(countType->GetItemType()->IsData(), "Expected scalar data as second argument");
  1302. MKQL_ENSURE(AS_TYPE(TDataType, countType->GetItemType())->GetSchemeType() ==
  1303. NUdf::TDataType<ui64>::Id, "Expected scalar ui64 as second argument");
  1304. auto outputType = NewBlockType(valueType->GetItemType(), TBlockType::EShape::Many);
  1305. TCallableBuilder callableBuilder(Env, __func__, outputType);
  1306. callableBuilder.Add(value);
  1307. callableBuilder.Add(count);
  1308. return TRuntimeNode(callableBuilder.Build(), false);
  1309. }
  1310. TRuntimeNode TProgramBuilder::BlockCompress(TRuntimeNode flow, ui32 bitmapIndex) {
  1311. auto blockItemTypes = ValidateBlockFlowType(flow.GetStaticType());
  1312. MKQL_ENSURE(blockItemTypes.size() >= 2, "Expected at least two input columns");
  1313. MKQL_ENSURE(bitmapIndex < blockItemTypes.size() - 1, "Invalid bitmap index");
  1314. MKQL_ENSURE(AS_TYPE(TDataType, blockItemTypes[bitmapIndex])->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected Bool as bitmap column type");
  1315. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  1316. MKQL_ENSURE(wideComponents.size() == blockItemTypes.size(), "Unexpected tuple size");
  1317. std::vector<TType*> flowItems;
  1318. for (size_t i = 0; i < wideComponents.size(); ++i) {
  1319. if (i == bitmapIndex) {
  1320. continue;
  1321. }
  1322. flowItems.push_back(wideComponents[i]);
  1323. }
  1324. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewMultiType(flowItems)));
  1325. callableBuilder.Add(flow);
  1326. callableBuilder.Add(NewDataLiteral<ui32>(bitmapIndex));
  1327. return TRuntimeNode(callableBuilder.Build(), false);
  1328. }
  1329. TRuntimeNode TProgramBuilder::BlockExpandChunked(TRuntimeNode comp) {
  1330. if (comp.GetStaticType()->IsStream()) {
  1331. ValidateBlockStreamType(comp.GetStaticType());
  1332. } else {
  1333. ValidateBlockFlowType(comp.GetStaticType());
  1334. }
  1335. TCallableBuilder callableBuilder(Env, __func__, comp.GetStaticType());
  1336. callableBuilder.Add(comp);
  1337. return TRuntimeNode(callableBuilder.Build(), false);
  1338. }
  1339. TRuntimeNode TProgramBuilder::BlockCoalesce(TRuntimeNode first, TRuntimeNode second) {
  1340. auto firstType = AS_TYPE(TBlockType, first.GetStaticType());
  1341. auto secondType = AS_TYPE(TBlockType, second.GetStaticType());
  1342. auto firstItemType = firstType->GetItemType();
  1343. auto secondItemType = secondType->GetItemType();
  1344. MKQL_ENSURE(firstItemType->IsOptional() || firstItemType->IsPg(), "Expecting Optional or Pg type as first argument");
  1345. if (!firstItemType->IsSameType(*secondItemType)) {
  1346. bool firstOptional;
  1347. firstItemType = UnpackOptional(firstItemType, firstOptional);
  1348. MKQL_ENSURE(firstItemType->IsSameType(*secondItemType), "Uncompatible arguemnt types");
  1349. }
  1350. auto outputType = NewBlockType(secondType->GetItemType(), GetResultShape({firstType, secondType}));
  1351. TCallableBuilder callableBuilder(Env, __func__, outputType);
  1352. callableBuilder.Add(first);
  1353. callableBuilder.Add(second);
  1354. return TRuntimeNode(callableBuilder.Build(), false);
  1355. }
  1356. TRuntimeNode TProgramBuilder::BlockExists(TRuntimeNode data) {
  1357. auto dataType = AS_TYPE(TBlockType, data.GetStaticType());
  1358. auto outputType = NewBlockType(NewDataType(NUdf::TDataType<bool>::Id), dataType->GetShape());
  1359. TCallableBuilder callableBuilder(Env, __func__, outputType);
  1360. callableBuilder.Add(data);
  1361. return TRuntimeNode(callableBuilder.Build(), false);
  1362. }
  1363. TRuntimeNode TProgramBuilder::BlockMember(TRuntimeNode structObj, const std::string_view& memberName) {
  1364. auto blockType = AS_TYPE(TBlockType, structObj.GetStaticType());
  1365. bool isOptional;
  1366. const auto type = AS_TYPE(TStructType, UnpackOptional(blockType->GetItemType(), isOptional));
  1367. const auto memberIndex = type->GetMemberIndex(memberName);
  1368. auto memberType = type->GetMemberType(memberIndex);
  1369. if (isOptional && !memberType->IsOptional() && !memberType->IsNull() && !memberType->IsPg()) {
  1370. memberType = NewOptionalType(memberType);
  1371. }
  1372. auto returnType = NewBlockType(memberType, blockType->GetShape());
  1373. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1374. callableBuilder.Add(structObj);
  1375. callableBuilder.Add(NewDataLiteral<ui32>(memberIndex));
  1376. return TRuntimeNode(callableBuilder.Build(), false);
  1377. }
  1378. TRuntimeNode TProgramBuilder::BlockNth(TRuntimeNode tuple, ui32 index) {
  1379. auto blockType = AS_TYPE(TBlockType, tuple.GetStaticType());
  1380. bool isOptional;
  1381. const auto type = AS_TYPE(TTupleType, UnpackOptional(blockType->GetItemType(), isOptional));
  1382. MKQL_ENSURE(index < type->GetElementsCount(), "Index out of range: " << index <<
  1383. " is not less than " << type->GetElementsCount());
  1384. auto itemType = type->GetElementType(index);
  1385. if (isOptional && !itemType->IsOptional() && !itemType->IsNull() && !itemType->IsPg()) {
  1386. itemType = TOptionalType::Create(itemType, Env);
  1387. }
  1388. auto returnType = NewBlockType(itemType, blockType->GetShape());
  1389. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1390. callableBuilder.Add(tuple);
  1391. callableBuilder.Add(NewDataLiteral<ui32>(index));
  1392. return TRuntimeNode(callableBuilder.Build(), false);
  1393. }
  1394. TRuntimeNode TProgramBuilder::BlockAsStruct(const TArrayRef<std::pair<std::string_view, TRuntimeNode>>& args) {
  1395. MKQL_ENSURE(!args.empty(), "Expected at least one argument");
  1396. TBlockType::EShape resultShape = TBlockType::EShape::Scalar;
  1397. TVector<std::pair<std::string_view, TType*>> members;
  1398. for (const auto& x : args) {
  1399. auto blockType = AS_TYPE(TBlockType, x.second.GetStaticType());
  1400. members.emplace_back(x.first, blockType->GetItemType());
  1401. if (blockType->GetShape() == TBlockType::EShape::Many) {
  1402. resultShape = TBlockType::EShape::Many;
  1403. }
  1404. }
  1405. auto returnType = NewBlockType(NewStructType(members), resultShape);
  1406. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1407. for (const auto& x : args) {
  1408. callableBuilder.Add(x.second);
  1409. }
  1410. return TRuntimeNode(callableBuilder.Build(), false);
  1411. }
  1412. TRuntimeNode TProgramBuilder::BlockAsTuple(const TArrayRef<const TRuntimeNode>& args) {
  1413. MKQL_ENSURE(!args.empty(), "Expected at least one argument");
  1414. TBlockType::EShape resultShape = TBlockType::EShape::Scalar;
  1415. TVector<TType*> types;
  1416. for (const auto& x : args) {
  1417. auto blockType = AS_TYPE(TBlockType, x.GetStaticType());
  1418. types.push_back(blockType->GetItemType());
  1419. if (blockType->GetShape() == TBlockType::EShape::Many) {
  1420. resultShape = TBlockType::EShape::Many;
  1421. }
  1422. }
  1423. auto tupleType = NewTupleType(types);
  1424. auto returnType = NewBlockType(tupleType, resultShape);
  1425. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1426. for (const auto& x : args) {
  1427. callableBuilder.Add(x);
  1428. }
  1429. return TRuntimeNode(callableBuilder.Build(), false);
  1430. }
  1431. TRuntimeNode TProgramBuilder::BlockToPg(TRuntimeNode input, TType* returnType) {
  1432. if constexpr (RuntimeVersion < 37U) {
  1433. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  1434. }
  1435. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1436. callableBuilder.Add(input);
  1437. return TRuntimeNode(callableBuilder.Build(), false);
  1438. }
  1439. TRuntimeNode TProgramBuilder::BlockFromPg(TRuntimeNode input, TType* returnType) {
  1440. if constexpr (RuntimeVersion < 37U) {
  1441. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  1442. }
  1443. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1444. callableBuilder.Add(input);
  1445. return TRuntimeNode(callableBuilder.Build(), false);
  1446. }
  1447. TRuntimeNode TProgramBuilder::BlockNot(TRuntimeNode data) {
  1448. auto dataType = AS_TYPE(TBlockType, data.GetStaticType());
  1449. bool isOpt;
  1450. MKQL_ENSURE(UnpackOptionalData(dataType->GetItemType(), isOpt)->GetSchemeType() == NUdf::TDataType<bool>::Id, "Requires boolean args.");
  1451. TCallableBuilder callableBuilder(Env, __func__, data.GetStaticType());
  1452. callableBuilder.Add(data);
  1453. return TRuntimeNode(callableBuilder.Build(), false);
  1454. }
  1455. TRuntimeNode TProgramBuilder::BlockAnd(TRuntimeNode first, TRuntimeNode second) {
  1456. return BuildBlockLogical(__func__, first, second);
  1457. }
  1458. TRuntimeNode TProgramBuilder::BlockOr(TRuntimeNode first, TRuntimeNode second) {
  1459. return BuildBlockLogical(__func__, first, second);
  1460. }
  1461. TRuntimeNode TProgramBuilder::BlockXor(TRuntimeNode first, TRuntimeNode second) {
  1462. return BuildBlockLogical(__func__, first, second);
  1463. }
  1464. TRuntimeNode TProgramBuilder::BlockDecimalDiv(TRuntimeNode first, TRuntimeNode second) {
  1465. return BuildBlockDecimalBinary(__func__, first, second);
  1466. }
  1467. TRuntimeNode TProgramBuilder::BlockDecimalMod(TRuntimeNode first, TRuntimeNode second) {
  1468. return BuildBlockDecimalBinary(__func__, first, second);
  1469. }
  1470. TRuntimeNode TProgramBuilder::BlockDecimalMul(TRuntimeNode first, TRuntimeNode second) {
  1471. return BuildBlockDecimalBinary(__func__, first, second);
  1472. }
  1473. TRuntimeNode TProgramBuilder::ListFromRange(TRuntimeNode start, TRuntimeNode end, TRuntimeNode step) {
  1474. MKQL_ENSURE(start.GetStaticType()->IsData(), "Expected data");
  1475. MKQL_ENSURE(end.GetStaticType()->IsSameType(*start.GetStaticType()), "Mismatch type");
  1476. if constexpr (RuntimeVersion < 24U) {
  1477. MKQL_ENSURE(IsNumericType(AS_TYPE(TDataType, start)->GetSchemeType()), "Expected numeric");
  1478. } else {
  1479. MKQL_ENSURE(IsNumericType(AS_TYPE(TDataType, start)->GetSchemeType()) ||
  1480. IsDateType(AS_TYPE(TDataType, start)->GetSchemeType()) ||
  1481. IsTzDateType(AS_TYPE(TDataType, start)->GetSchemeType()) ||
  1482. IsIntervalType(AS_TYPE(TDataType, start)->GetSchemeType()),
  1483. "Expected numeric, date or tzdate");
  1484. if (IsNumericType(AS_TYPE(TDataType, start)->GetSchemeType())) {
  1485. MKQL_ENSURE(IsNumericType(AS_TYPE(TDataType, step)->GetSchemeType()), "Expected numeric");
  1486. } else {
  1487. MKQL_ENSURE(IsIntervalType(AS_TYPE(TDataType, step)->GetSchemeType()), "Expected interval");
  1488. }
  1489. }
  1490. TCallableBuilder callableBuilder(Env, __func__, TListType::Create(start.GetStaticType(), Env));
  1491. callableBuilder.Add(start);
  1492. callableBuilder.Add(end);
  1493. callableBuilder.Add(step);
  1494. return TRuntimeNode(callableBuilder.Build(), false);
  1495. }
  1496. TRuntimeNode TProgramBuilder::Switch(TRuntimeNode stream,
  1497. const TArrayRef<const TSwitchInput>& handlerInputs,
  1498. std::function<TRuntimeNode(ui32 index, TRuntimeNode item)> handler,
  1499. ui64 memoryLimitBytes, TType* returnType) {
  1500. MKQL_ENSURE(stream.GetStaticType()->IsStream() || stream.GetStaticType()->IsFlow(), "Expected stream or flow.");
  1501. std::vector<TRuntimeNode> argNodes(handlerInputs.size());
  1502. std::vector<TRuntimeNode> outputNodes(handlerInputs.size());
  1503. for (ui32 i = 0; i < handlerInputs.size(); ++i) {
  1504. TRuntimeNode arg = Arg(handlerInputs[i].InputType);
  1505. argNodes[i] = arg;
  1506. outputNodes[i] = handler(i, arg);
  1507. }
  1508. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1509. callableBuilder.Add(stream);
  1510. callableBuilder.Add(NewDataLiteral<ui64>(memoryLimitBytes));
  1511. for (ui32 i = 0; i < handlerInputs.size(); ++i) {
  1512. std::vector<TRuntimeNode> tupleElems;
  1513. for (auto index : handlerInputs[i].Indicies) {
  1514. tupleElems.push_back(NewDataLiteral<ui32>(index));
  1515. }
  1516. auto indiciesTuple = NewTuple(tupleElems);
  1517. callableBuilder.Add(indiciesTuple);
  1518. callableBuilder.Add(argNodes[i]);
  1519. callableBuilder.Add(outputNodes[i]);
  1520. if (!handlerInputs[i].ResultVariantOffset) {
  1521. callableBuilder.Add(NewVoid());
  1522. } else {
  1523. callableBuilder.Add(NewDataLiteral<ui32>(*handlerInputs[i].ResultVariantOffset));
  1524. }
  1525. }
  1526. return TRuntimeNode(callableBuilder.Build(), false);
  1527. }
  1528. TRuntimeNode TProgramBuilder::HasItems(TRuntimeNode listOrDict) {
  1529. return BuildContainerProperty<bool>(__func__, listOrDict);
  1530. }
  1531. TRuntimeNode TProgramBuilder::Reverse(TRuntimeNode list) {
  1532. bool isOptional = false;
  1533. const auto listType = UnpackOptional(list, isOptional);
  1534. if (isOptional) {
  1535. return Map(list, [&](TRuntimeNode unpacked) { return Reverse(unpacked); } );
  1536. }
  1537. const auto listDetailedType = AS_TYPE(TListType, listType);
  1538. const auto itemType = listDetailedType->GetItemType();
  1539. ThrowIfListOfVoid(itemType);
  1540. TCallableBuilder callableBuilder(Env, __func__, listType);
  1541. callableBuilder.Add(list);
  1542. return TRuntimeNode(callableBuilder.Build(), false);
  1543. }
  1544. TRuntimeNode TProgramBuilder::Skip(TRuntimeNode list, TRuntimeNode count) {
  1545. return BuildTake(__func__, list, count);
  1546. }
  1547. TRuntimeNode TProgramBuilder::Take(TRuntimeNode list, TRuntimeNode count) {
  1548. return BuildTake(__func__, list, count);
  1549. }
  1550. TRuntimeNode TProgramBuilder::Sort(TRuntimeNode list, TRuntimeNode ascending, const TUnaryLambda& keyExtractor)
  1551. {
  1552. return BuildSort(__func__, list, ascending, keyExtractor);
  1553. }
  1554. TRuntimeNode TProgramBuilder::WideTop(TRuntimeNode flow, TRuntimeNode count, const std::vector<std::pair<ui32, TRuntimeNode>>& keys)
  1555. {
  1556. return BuildWideTopOrSort(__func__, flow, count, keys);
  1557. }
  1558. TRuntimeNode TProgramBuilder::WideTopSort(TRuntimeNode flow, TRuntimeNode count, const std::vector<std::pair<ui32, TRuntimeNode>>& keys)
  1559. {
  1560. return BuildWideTopOrSort(__func__, flow, count, keys);
  1561. }
  1562. TRuntimeNode TProgramBuilder::WideSort(TRuntimeNode flow, const std::vector<std::pair<ui32, TRuntimeNode>>& keys)
  1563. {
  1564. return BuildWideTopOrSort(__func__, flow, Nothing(), keys);
  1565. }
  1566. TRuntimeNode TProgramBuilder::BuildWideTopOrSort(const std::string_view& callableName, TRuntimeNode flow, TMaybe<TRuntimeNode> count, const std::vector<std::pair<ui32, TRuntimeNode>>& keys) {
  1567. if (count) {
  1568. if constexpr (RuntimeVersion < 33U) {
  1569. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << callableName;
  1570. }
  1571. } else {
  1572. if constexpr (RuntimeVersion < 34U) {
  1573. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << callableName;
  1574. }
  1575. }
  1576. const auto width = GetWideComponentsCount(AS_TYPE(TFlowType, flow.GetStaticType()));
  1577. MKQL_ENSURE(!keys.empty() && keys.size() <= width, "Unexpected keys count: " << keys.size());
  1578. TCallableBuilder callableBuilder(Env, callableName, flow.GetStaticType());
  1579. callableBuilder.Add(flow);
  1580. if (count) {
  1581. callableBuilder.Add(*count);
  1582. }
  1583. std::for_each(keys.cbegin(), keys.cend(), [&](const std::pair<ui32, TRuntimeNode>& key) {
  1584. MKQL_ENSURE(key.first < width, "Key index too large: " << key.first);
  1585. callableBuilder.Add(NewDataLiteral(key.first));
  1586. callableBuilder.Add(key.second);
  1587. });
  1588. return TRuntimeNode(callableBuilder.Build(), false);
  1589. }
  1590. TRuntimeNode TProgramBuilder::Top(TRuntimeNode flow, TRuntimeNode count, TRuntimeNode ascending, const TUnaryLambda& keyExtractor) {
  1591. if (const auto flowType = flow.GetStaticType(); flowType->IsFlow() || flowType->IsStream()) {
  1592. const TUnaryLambda getKey = [&](TRuntimeNode item) { return Nth(item, 0U); };
  1593. const TUnaryLambda getItem = [&](TRuntimeNode item) { return Nth(item, 1U); };
  1594. const TUnaryLambda cacheKeyExtractor = [&](TRuntimeNode item) {
  1595. return NewTuple({keyExtractor(item), item});
  1596. };
  1597. return FlatMap(Condense1(Map(flow, cacheKeyExtractor),
  1598. [&](TRuntimeNode item) { return AsList(item); },
  1599. [this](TRuntimeNode, TRuntimeNode) { return NewDataLiteral<bool>(false); },
  1600. [&](TRuntimeNode item, TRuntimeNode state) {
  1601. return KeepTop(count, state, item, ascending, getKey);
  1602. }
  1603. ),
  1604. [&](TRuntimeNode list) { return Map(Top(list, count, ascending, getKey), getItem); }
  1605. );
  1606. }
  1607. return BuildListNth(__func__, flow, count, ascending, keyExtractor);
  1608. }
  1609. TRuntimeNode TProgramBuilder::TopSort(TRuntimeNode flow, TRuntimeNode count, TRuntimeNode ascending, const TUnaryLambda& keyExtractor) {
  1610. if (const auto flowType = flow.GetStaticType(); flowType->IsFlow() || flowType->IsStream()) {
  1611. const TUnaryLambda getKey = [&](TRuntimeNode item) { return Nth(item, 0U); };
  1612. const TUnaryLambda getItem = [&](TRuntimeNode item) { return Nth(item, 1U); };
  1613. const TUnaryLambda cacheKeyExtractor = [&](TRuntimeNode item) {
  1614. return NewTuple({keyExtractor(item), item});
  1615. };
  1616. return FlatMap(Condense1(Map(flow, cacheKeyExtractor),
  1617. [&](TRuntimeNode item) { return AsList(item); },
  1618. [this](TRuntimeNode, TRuntimeNode) { return NewDataLiteral<bool>(false); },
  1619. [&](TRuntimeNode item, TRuntimeNode state) {
  1620. return KeepTop(count, state, item, ascending, getKey);
  1621. }
  1622. ),
  1623. [&](TRuntimeNode list) { return Map(TopSort(list, count, ascending, getKey), getItem); }
  1624. );
  1625. }
  1626. if constexpr (RuntimeVersion >= 25U)
  1627. return BuildListNth(__func__, flow, count, ascending, keyExtractor);
  1628. else
  1629. return BuildListSort("Sort", BuildListNth("Top", flow, count, ascending, keyExtractor), ascending, keyExtractor);
  1630. }
  1631. TRuntimeNode TProgramBuilder::KeepTop(TRuntimeNode count, TRuntimeNode list, TRuntimeNode item, TRuntimeNode ascending, const TUnaryLambda& keyExtractor) {
  1632. const auto listType = list.GetStaticType();
  1633. MKQL_ENSURE(listType->IsList(), "Expected list.");
  1634. const auto itemType = static_cast<const TListType&>(*listType).GetItemType();
  1635. ThrowIfListOfVoid(itemType);
  1636. MKQL_ENSURE(count.GetStaticType()->IsData(), "Expected data");
  1637. MKQL_ENSURE(static_cast<const TDataType&>(*count.GetStaticType()).GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64");
  1638. MKQL_ENSURE(itemType->IsSameType(*item.GetStaticType()), "Types of list and item are different.");
  1639. const auto ascendingType = ascending.GetStaticType();
  1640. const auto itemArg = Arg(itemType);
  1641. auto key = keyExtractor(itemArg);
  1642. const auto hotkey = Arg(key.GetStaticType());
  1643. if (ascendingType->IsTuple()) {
  1644. const auto ascendingTuple = AS_TYPE(TTupleType, ascendingType);
  1645. if (ascendingTuple->GetElementsCount() == 0) {
  1646. return If(AggrLess(Length(list), count), Append(list, item), list);
  1647. }
  1648. if (ascendingTuple->GetElementsCount() == 1) {
  1649. ascending = Nth(ascending, 0);
  1650. key = Nth(key, 0);
  1651. }
  1652. }
  1653. TCallableBuilder callableBuilder(Env, __func__, listType);
  1654. callableBuilder.Add(count);
  1655. callableBuilder.Add(list);
  1656. callableBuilder.Add(item);
  1657. callableBuilder.Add(itemArg);
  1658. callableBuilder.Add(key);
  1659. callableBuilder.Add(ascending);
  1660. callableBuilder.Add(hotkey);
  1661. return TRuntimeNode(callableBuilder.Build(), false);
  1662. }
  1663. TRuntimeNode TProgramBuilder::Contains(TRuntimeNode dict, TRuntimeNode key) {
  1664. if constexpr (RuntimeVersion >= 25U)
  1665. if (!dict.GetStaticType()->IsDict())
  1666. return DataCompare(__func__, dict, key);
  1667. const auto keyType = AS_TYPE(TDictType, dict.GetStaticType())->GetKeyType();
  1668. MKQL_ENSURE(keyType->IsSameType(*key.GetStaticType()), "Key type mismatch. Requred: " << *keyType << ", but got: " << *key.GetStaticType());
  1669. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<bool>::Id));
  1670. callableBuilder.Add(dict);
  1671. callableBuilder.Add(key);
  1672. return TRuntimeNode(callableBuilder.Build(), false);
  1673. }
  1674. TRuntimeNode TProgramBuilder::Lookup(TRuntimeNode dict, TRuntimeNode key) {
  1675. const auto dictType = AS_TYPE(TDictType, dict.GetStaticType());
  1676. const auto keyType = dictType->GetKeyType();
  1677. MKQL_ENSURE(keyType->IsSameType(*key.GetStaticType()), "Key type mismatch. Requred: " << *keyType << ", but got: " << *key.GetStaticType());
  1678. TCallableBuilder callableBuilder(Env, __func__, NewOptionalType(dictType->GetPayloadType()));
  1679. callableBuilder.Add(dict);
  1680. callableBuilder.Add(key);
  1681. return TRuntimeNode(callableBuilder.Build(), false);
  1682. }
  1683. TRuntimeNode TProgramBuilder::DictItems(TRuntimeNode dict, EDictItems mode) {
  1684. const auto dictTypeChecked = AS_TYPE(TDictType, dict.GetStaticType());
  1685. TType* itemType;
  1686. switch (mode) {
  1687. case EDictItems::Both: {
  1688. const std::array<TType*, 2U> tupleTypes = {{ dictTypeChecked->GetKeyType(), dictTypeChecked->GetPayloadType() }};
  1689. itemType = NewTupleType(tupleTypes);
  1690. break;
  1691. }
  1692. case EDictItems::Keys: itemType = dictTypeChecked->GetKeyType(); break;
  1693. case EDictItems::Payloads: itemType = dictTypeChecked->GetPayloadType(); break;
  1694. }
  1695. TCallableBuilder callableBuilder(Env, __func__, NewListType(itemType));
  1696. callableBuilder.Add(dict);
  1697. callableBuilder.Add(NewDataLiteral((ui32)mode));
  1698. return TRuntimeNode(callableBuilder.Build(), false);
  1699. }
  1700. TRuntimeNode TProgramBuilder::DictItems(TRuntimeNode dict) {
  1701. if constexpr (RuntimeVersion < 6U) {
  1702. return DictItems(dict, EDictItems::Both);
  1703. }
  1704. const auto dictTypeChecked = AS_TYPE(TDictType, dict.GetStaticType());
  1705. const auto itemType = NewTupleType({ dictTypeChecked->GetKeyType(), dictTypeChecked->GetPayloadType() });
  1706. TCallableBuilder callableBuilder(Env, __func__, NewListType(itemType));
  1707. callableBuilder.Add(dict);
  1708. return TRuntimeNode(callableBuilder.Build(), false);
  1709. }
  1710. TRuntimeNode TProgramBuilder::DictKeys(TRuntimeNode dict) {
  1711. if constexpr (RuntimeVersion < 6U) {
  1712. return DictItems(dict, EDictItems::Keys);
  1713. }
  1714. const auto dictTypeChecked = AS_TYPE(TDictType, dict.GetStaticType());
  1715. TCallableBuilder callableBuilder(Env, __func__, NewListType(dictTypeChecked->GetKeyType()));
  1716. callableBuilder.Add(dict);
  1717. return TRuntimeNode(callableBuilder.Build(), false);
  1718. }
  1719. TRuntimeNode TProgramBuilder::DictPayloads(TRuntimeNode dict) {
  1720. if constexpr (RuntimeVersion < 6U) {
  1721. return DictItems(dict, EDictItems::Payloads);
  1722. }
  1723. const auto dictTypeChecked = AS_TYPE(TDictType, dict.GetStaticType());
  1724. TCallableBuilder callableBuilder(Env, __func__, NewListType(dictTypeChecked->GetPayloadType()));
  1725. callableBuilder.Add(dict);
  1726. return TRuntimeNode(callableBuilder.Build(), false);
  1727. }
  1728. TRuntimeNode TProgramBuilder::ToIndexDict(TRuntimeNode list) {
  1729. const auto itemType = AS_TYPE(TListType, list.GetStaticType())->GetItemType();
  1730. ThrowIfListOfVoid(itemType);
  1731. const auto keyType = NewDataType(NUdf::TDataType<ui64>::Id);
  1732. const auto dictType = NewDictType(keyType, itemType, false);
  1733. TCallableBuilder callableBuilder(Env, __func__, dictType);
  1734. callableBuilder.Add(list);
  1735. return TRuntimeNode(callableBuilder.Build(), false);
  1736. }
  1737. TRuntimeNode TProgramBuilder::JoinDict(TRuntimeNode dict1, bool isMulti1, TRuntimeNode dict2, bool isMulti2, EJoinKind joinKind) {
  1738. const auto dict1type = AS_TYPE(TDictType, dict1);
  1739. const auto dict2type = AS_TYPE(TDictType, dict2);
  1740. MKQL_ENSURE(dict1type->GetKeyType()->IsSameType(*dict2type->GetKeyType()), "Dict key types must be the same");
  1741. if (joinKind == EJoinKind::RightOnly || joinKind == EJoinKind::RightSemi)
  1742. MKQL_ENSURE(dict1type->GetPayloadType()->IsVoid(), "Void required for first dict payload.");
  1743. else if (isMulti1)
  1744. MKQL_ENSURE(dict1type->GetPayloadType()->IsList(), "List required for first dict payload.");
  1745. if (joinKind == EJoinKind::LeftOnly || joinKind == EJoinKind::LeftSemi)
  1746. MKQL_ENSURE(dict2type->GetPayloadType()->IsVoid(), "Void required for second dict payload.");
  1747. else if (isMulti2)
  1748. MKQL_ENSURE(dict2type->GetPayloadType()->IsList(), "List required for second dict payload.");
  1749. std::array<TType*, 2> tupleItems = {{ dict1type->GetPayloadType(), dict2type->GetPayloadType() }};
  1750. if (isMulti1 && tupleItems.front()->IsList())
  1751. tupleItems.front() = AS_TYPE(TListType, tupleItems.front())->GetItemType();
  1752. if (isMulti2 && tupleItems.back()->IsList())
  1753. tupleItems.back() = AS_TYPE(TListType, tupleItems.back())->GetItemType();
  1754. if (IsLeftOptional(joinKind))
  1755. tupleItems.front() = NewOptionalType(tupleItems.front());
  1756. if (IsRightOptional(joinKind))
  1757. tupleItems.back() = NewOptionalType(tupleItems.back());
  1758. TType* itemType;
  1759. if (joinKind == EJoinKind::LeftOnly || joinKind == EJoinKind::LeftSemi)
  1760. itemType = tupleItems.front();
  1761. else if (joinKind == EJoinKind::RightOnly || joinKind == EJoinKind::RightSemi)
  1762. itemType = tupleItems.back();
  1763. else
  1764. itemType = NewTupleType(tupleItems);
  1765. const auto returnType = NewListType(itemType);
  1766. TCallableBuilder callableBuilder(Env, __func__, returnType);
  1767. callableBuilder.Add(dict1);
  1768. callableBuilder.Add(dict2);
  1769. callableBuilder.Add(NewDataLiteral(isMulti1));
  1770. callableBuilder.Add(NewDataLiteral(isMulti2));
  1771. callableBuilder.Add(NewDataLiteral(ui32(joinKind)));
  1772. return TRuntimeNode(callableBuilder.Build(), false);
  1773. }
  1774. TRuntimeNode TProgramBuilder::GraceJoinCommon(const TStringBuf& funcName, TRuntimeNode flowLeft, TRuntimeNode flowRight, EJoinKind joinKind,
  1775. const TArrayRef<const ui32>& leftKeyColumns, const TArrayRef<const ui32>& rightKeyColumns,
  1776. const TArrayRef<const ui32>& leftRenames, const TArrayRef<const ui32>& rightRenames, TType* returnType, EAnyJoinSettings anyJoinSettings ) {
  1777. MKQL_ENSURE(!leftKeyColumns.empty(), "At least one key column must be specified");
  1778. if (flowRight) {
  1779. MKQL_ENSURE(!rightKeyColumns.empty(), "At least one key column must be specified");
  1780. }
  1781. TRuntimeNode::TList leftKeyColumnsNodes, rightKeyColumnsNodes, leftRenamesNodes, rightRenamesNodes;
  1782. leftKeyColumnsNodes.reserve(leftKeyColumns.size());
  1783. std::transform(leftKeyColumns.cbegin(), leftKeyColumns.cend(), std::back_inserter(leftKeyColumnsNodes), [this](const ui32 idx) { return NewDataLiteral(idx); });
  1784. rightKeyColumnsNodes.reserve(rightKeyColumns.size());
  1785. std::transform(rightKeyColumns.cbegin(), rightKeyColumns.cend(), std::back_inserter(rightKeyColumnsNodes), [this](const ui32 idx) { return NewDataLiteral(idx); });
  1786. leftRenamesNodes.reserve(leftRenames.size());
  1787. std::transform(leftRenames.cbegin(), leftRenames.cend(), std::back_inserter(leftRenamesNodes), [this](const ui32 idx) { return NewDataLiteral(idx); });
  1788. rightRenamesNodes.reserve(rightRenames.size());
  1789. std::transform(rightRenames.cbegin(), rightRenames.cend(), std::back_inserter(rightRenamesNodes), [this](const ui32 idx) { return NewDataLiteral(idx); });
  1790. TCallableBuilder callableBuilder(Env, funcName, returnType);
  1791. callableBuilder.Add(flowLeft);
  1792. if (flowRight) {
  1793. callableBuilder.Add(flowRight);
  1794. }
  1795. callableBuilder.Add(NewDataLiteral((ui32)joinKind));
  1796. callableBuilder.Add(NewTuple(leftKeyColumnsNodes));
  1797. callableBuilder.Add(NewTuple(rightKeyColumnsNodes));
  1798. callableBuilder.Add(NewTuple(leftRenamesNodes));
  1799. callableBuilder.Add(NewTuple(rightRenamesNodes));
  1800. callableBuilder.Add(NewDataLiteral((ui32)anyJoinSettings));
  1801. return TRuntimeNode(callableBuilder.Build(), false);
  1802. }
  1803. TRuntimeNode TProgramBuilder::GraceJoin(TRuntimeNode flowLeft, TRuntimeNode flowRight, EJoinKind joinKind,
  1804. const TArrayRef<const ui32>& leftKeyColumns, const TArrayRef<const ui32>& rightKeyColumns,
  1805. const TArrayRef<const ui32>& leftRenames, const TArrayRef<const ui32>& rightRenames, TType* returnType, EAnyJoinSettings anyJoinSettings ) {
  1806. return GraceJoinCommon(__func__, flowLeft, flowRight, joinKind, leftKeyColumns, rightKeyColumns, leftRenames, rightRenames, returnType, anyJoinSettings);
  1807. }
  1808. TRuntimeNode TProgramBuilder::GraceSelfJoin(TRuntimeNode flowLeft, EJoinKind joinKind, const TArrayRef<const ui32>& leftKeyColumns, const TArrayRef<const ui32>& rightKeyColumns,
  1809. const TArrayRef<const ui32>& leftRenames, const TArrayRef<const ui32>& rightRenames, TType* returnType, EAnyJoinSettings anyJoinSettings ) {
  1810. if constexpr (RuntimeVersion < 40U) {
  1811. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  1812. }
  1813. return GraceJoinCommon(__func__, flowLeft, {}, joinKind, leftKeyColumns, rightKeyColumns, leftRenames, rightRenames, returnType, anyJoinSettings);
  1814. }
  1815. TRuntimeNode TProgramBuilder::ToSortedDict(TRuntimeNode list, bool all, const TUnaryLambda& keySelector,
  1816. const TUnaryLambda& payloadSelector, bool isCompact, ui64 itemsCountHint) {
  1817. return ToDict(list, all, keySelector, payloadSelector, __func__, isCompact, itemsCountHint);
  1818. }
  1819. TRuntimeNode TProgramBuilder::ToHashedDict(TRuntimeNode list, bool all, const TUnaryLambda& keySelector,
  1820. const TUnaryLambda& payloadSelector, bool isCompact, ui64 itemsCountHint) {
  1821. return ToDict(list, all, keySelector, payloadSelector, __func__, isCompact, itemsCountHint);
  1822. }
  1823. TRuntimeNode TProgramBuilder::SqueezeToSortedDict(TRuntimeNode stream, bool all, const TUnaryLambda& keySelector,
  1824. const TUnaryLambda& payloadSelector, bool isCompact, ui64 itemsCountHint) {
  1825. return SqueezeToDict(stream, all, keySelector, payloadSelector, __func__, isCompact, itemsCountHint);
  1826. }
  1827. TRuntimeNode TProgramBuilder::SqueezeToHashedDict(TRuntimeNode stream, bool all, const TUnaryLambda& keySelector,
  1828. const TUnaryLambda& payloadSelector, bool isCompact, ui64 itemsCountHint) {
  1829. return SqueezeToDict(stream, all, keySelector, payloadSelector, __func__, isCompact, itemsCountHint);
  1830. }
  1831. TRuntimeNode TProgramBuilder::NarrowSqueezeToSortedDict(TRuntimeNode stream, bool all, const TNarrowLambda& keySelector,
  1832. const TNarrowLambda& payloadSelector, bool isCompact, ui64 itemsCountHint) {
  1833. return NarrowSqueezeToDict(stream, all, keySelector, payloadSelector, __func__, isCompact, itemsCountHint);
  1834. }
  1835. TRuntimeNode TProgramBuilder::NarrowSqueezeToHashedDict(TRuntimeNode stream, bool all, const TNarrowLambda& keySelector,
  1836. const TNarrowLambda& payloadSelector, bool isCompact, ui64 itemsCountHint) {
  1837. return NarrowSqueezeToDict(stream, all, keySelector, payloadSelector, __func__, isCompact, itemsCountHint);
  1838. }
  1839. TRuntimeNode TProgramBuilder::SqueezeToList(TRuntimeNode flow, TRuntimeNode limit) {
  1840. if constexpr (RuntimeVersion < 25U) {
  1841. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  1842. }
  1843. const auto itemType = AS_TYPE(TFlowType, flow.GetStaticType())->GetItemType();
  1844. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewListType(itemType)));
  1845. callableBuilder.Add(flow);
  1846. callableBuilder.Add(limit);
  1847. return TRuntimeNode(callableBuilder.Build(), false);
  1848. }
  1849. TRuntimeNode TProgramBuilder::Append(TRuntimeNode list, TRuntimeNode item) {
  1850. auto listType = list.GetStaticType();
  1851. AS_TYPE(TListType, listType);
  1852. const auto& listDetailedType = static_cast<const TListType&>(*listType);
  1853. auto itemType = item.GetStaticType();
  1854. MKQL_ENSURE(itemType->IsSameType(*listDetailedType.GetItemType()), "Types of list and item are different");
  1855. TCallableBuilder callableBuilder(Env, __func__, listType);
  1856. callableBuilder.Add(list);
  1857. callableBuilder.Add(item);
  1858. return TRuntimeNode(callableBuilder.Build(), false);
  1859. }
  1860. TRuntimeNode TProgramBuilder::Prepend(TRuntimeNode item, TRuntimeNode list) {
  1861. auto listType = list.GetStaticType();
  1862. AS_TYPE(TListType, listType);
  1863. const auto& listDetailedType = static_cast<const TListType&>(*listType);
  1864. auto itemType = item.GetStaticType();
  1865. MKQL_ENSURE(itemType->IsSameType(*listDetailedType.GetItemType()), "Types of list and item are different");
  1866. TCallableBuilder callableBuilder(Env, __func__, listType);
  1867. callableBuilder.Add(item);
  1868. callableBuilder.Add(list);
  1869. return TRuntimeNode(callableBuilder.Build(), false);
  1870. }
  1871. TRuntimeNode TProgramBuilder::BuildExtend(const std::string_view& callableName, const TArrayRef<const TRuntimeNode>& lists) {
  1872. MKQL_ENSURE(lists.size() > 0, "Expected at least 1 list or flow");
  1873. if (lists.size() == 1) {
  1874. return lists.front();
  1875. }
  1876. auto listType = lists.front().GetStaticType();
  1877. MKQL_ENSURE(listType->IsFlow() || listType->IsList() || listType->IsStream(), "Expected either flow, list or stream");
  1878. for (ui32 i = 1; i < lists.size(); ++i) {
  1879. auto listType2 = lists[i].GetStaticType();
  1880. MKQL_ENSURE(listType->IsSameType(*listType2), "Types of flows are different, left: " <<
  1881. PrintNode(listType, true) << ", right: " <<
  1882. PrintNode(listType2, true));
  1883. }
  1884. TCallableBuilder callableBuilder(Env, callableName, listType);
  1885. for (auto list : lists) {
  1886. callableBuilder.Add(list);
  1887. }
  1888. return TRuntimeNode(callableBuilder.Build(), false);
  1889. }
  1890. TRuntimeNode TProgramBuilder::Extend(const TArrayRef<const TRuntimeNode>& lists) {
  1891. return BuildExtend(__func__, lists);
  1892. }
  1893. TRuntimeNode TProgramBuilder::OrderedExtend(const TArrayRef<const TRuntimeNode>& lists) {
  1894. return BuildExtend(__func__, lists);
  1895. }
  1896. template<>
  1897. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::String>(const NUdf::TStringRef& data) const {
  1898. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<const char*>::Id, Env), true);
  1899. }
  1900. template<>
  1901. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Utf8>(const NUdf::TStringRef& data) const {
  1902. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TUtf8>::Id, Env), true);
  1903. }
  1904. template<>
  1905. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Yson>(const NUdf::TStringRef& data) const {
  1906. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TYson>::Id, Env), true);
  1907. }
  1908. template<>
  1909. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Json>(const NUdf::TStringRef& data) const {
  1910. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TJson>::Id, Env), true);
  1911. }
  1912. template<>
  1913. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::JsonDocument>(const NUdf::TStringRef& data) const {
  1914. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TJsonDocument>::Id, Env), true);
  1915. }
  1916. template<>
  1917. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Uuid>(const NUdf::TStringRef& data) const {
  1918. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TUuid>::Id, Env), true);
  1919. }
  1920. template<>
  1921. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Date>(const NUdf::TStringRef& data) const {
  1922. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TDate>::Id, Env), true);
  1923. }
  1924. template<>
  1925. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Datetime>(const NUdf::TStringRef& data) const {
  1926. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TDatetime>::Id, Env), true);
  1927. }
  1928. template<>
  1929. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Timestamp>(const NUdf::TStringRef& data) const {
  1930. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TTimestamp>::Id, Env), true);
  1931. }
  1932. template<>
  1933. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Interval>(const NUdf::TStringRef& data) const {
  1934. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TInterval>::Id, Env), true);
  1935. }
  1936. template<>
  1937. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::DyNumber>(const NUdf::TStringRef& data) const {
  1938. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TDyNumber>::Id, Env), true);
  1939. }
  1940. TRuntimeNode TProgramBuilder::NewDecimalLiteral(NYql::NDecimal::TInt128 data, ui8 precision, ui8 scale) const {
  1941. return TRuntimeNode(TDataLiteral::Create(NUdf::TUnboxedValuePod(data), TDataDecimalType::Create(precision, scale, Env), Env), true);
  1942. }
  1943. template<>
  1944. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Date32>(const NUdf::TStringRef& data) const {
  1945. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TDate32>::Id, Env), true);
  1946. }
  1947. template<>
  1948. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Datetime64>(const NUdf::TStringRef& data) const {
  1949. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TDatetime64>::Id, Env), true);
  1950. }
  1951. template<>
  1952. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Timestamp64>(const NUdf::TStringRef& data) const {
  1953. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TTimestamp64>::Id, Env), true);
  1954. }
  1955. template<>
  1956. TRuntimeNode TProgramBuilder::NewDataLiteral<NUdf::EDataSlot::Interval64>(const NUdf::TStringRef& data) const {
  1957. return TRuntimeNode(BuildDataLiteral(data, NUdf::TDataType<NUdf::TInterval64>::Id, Env), true);
  1958. }
  1959. TRuntimeNode TProgramBuilder::NewOptional(TRuntimeNode data) {
  1960. auto type = TOptionalType::Create(data.GetStaticType(), Env);
  1961. return TRuntimeNode(TOptionalLiteral::Create(data, type, Env), true);
  1962. }
  1963. TRuntimeNode TProgramBuilder::NewOptional(TType* optionalType, TRuntimeNode data) {
  1964. auto type = AS_TYPE(TOptionalType, optionalType);
  1965. return TRuntimeNode(TOptionalLiteral::Create(data, type, Env), true);
  1966. }
  1967. TRuntimeNode TProgramBuilder::NewVoid() {
  1968. return TRuntimeNode(Env.GetVoidLazy(), true);
  1969. }
  1970. TRuntimeNode TProgramBuilder::NewEmptyListOfVoid() {
  1971. return TRuntimeNode(Env.GetListOfVoidLazy(), true);
  1972. }
  1973. TRuntimeNode TProgramBuilder::NewEmptyOptional(TType* optionalOrPgType) {
  1974. MKQL_ENSURE(optionalOrPgType->IsOptional() || optionalOrPgType->IsPg(), "Expected optional or pg type");
  1975. if (optionalOrPgType->IsOptional()) {
  1976. return TRuntimeNode(TOptionalLiteral::Create(static_cast<TOptionalType*>(optionalOrPgType), Env), true);
  1977. }
  1978. return PgCast(NewNull(), optionalOrPgType);
  1979. }
  1980. TRuntimeNode TProgramBuilder::NewEmptyOptionalDataLiteral(NUdf::TDataTypeId schemeType) {
  1981. return TRuntimeNode(BuildEmptyOptionalDataLiteral(schemeType, Env), true);
  1982. }
  1983. TRuntimeNode TProgramBuilder::NewEmptyStruct() {
  1984. return TRuntimeNode(Env.GetEmptyStructLazy(), true);
  1985. }
  1986. TRuntimeNode TProgramBuilder::NewStruct(const TArrayRef<const std::pair<std::string_view, TRuntimeNode>>& members) {
  1987. if (members.empty()) {
  1988. return NewEmptyStruct();
  1989. }
  1990. TStructLiteralBuilder builder(Env);
  1991. for (auto x : members) {
  1992. builder.Add(x.first, x.second);
  1993. }
  1994. return TRuntimeNode(builder.Build(), true);
  1995. }
  1996. TRuntimeNode TProgramBuilder::NewStruct(TType* structType, const TArrayRef<const std::pair<std::string_view, TRuntimeNode>>& members) {
  1997. const auto detailedStructType = AS_TYPE(TStructType, structType);
  1998. MKQL_ENSURE(members.size() == detailedStructType->GetMembersCount(), "Mismatch count of members");
  1999. if (members.empty()) {
  2000. return NewEmptyStruct();
  2001. }
  2002. std::vector<TRuntimeNode> values(detailedStructType->GetMembersCount());
  2003. for (ui32 i = 0; i < detailedStructType->GetMembersCount(); ++i) {
  2004. const auto& name = members[i].first;
  2005. ui32 index = detailedStructType->GetMemberIndex(name);
  2006. MKQL_ENSURE(!values[index], "Duplicate of member: " << name);
  2007. values[index] = members[i].second;
  2008. }
  2009. return TRuntimeNode(TStructLiteral::Create(values.size(), values.data(), detailedStructType, Env), true);
  2010. }
  2011. TRuntimeNode TProgramBuilder::NewEmptyList() {
  2012. return TRuntimeNode(Env.GetEmptyListLazy(), true);
  2013. }
  2014. TRuntimeNode TProgramBuilder::NewEmptyList(TType* itemType) {
  2015. TListLiteralBuilder builder(Env, itemType);
  2016. return TRuntimeNode(builder.Build(), true);
  2017. }
  2018. TRuntimeNode TProgramBuilder::NewList(TType* itemType, const TArrayRef<const TRuntimeNode>& items) {
  2019. TListLiteralBuilder builder(Env, itemType);
  2020. for (auto item : items) {
  2021. builder.Add(item);
  2022. }
  2023. return TRuntimeNode(builder.Build(), true);
  2024. }
  2025. TRuntimeNode TProgramBuilder::NewEmptyDict() {
  2026. return TRuntimeNode(Env.GetEmptyDictLazy(), true);
  2027. }
  2028. TRuntimeNode TProgramBuilder::NewDict(TType* dictType, const TArrayRef<const std::pair<TRuntimeNode, TRuntimeNode>>& items) {
  2029. MKQL_ENSURE(dictType->IsDict(), "Expected dict type");
  2030. return TRuntimeNode(TDictLiteral::Create(items.size(), items.data(), static_cast<TDictType*>(dictType), Env), true);
  2031. }
  2032. TRuntimeNode TProgramBuilder::NewEmptyTuple() {
  2033. return TRuntimeNode(Env.GetEmptyTupleLazy(), true);
  2034. }
  2035. TRuntimeNode TProgramBuilder::NewTuple(TType* tupleType, const TArrayRef<const TRuntimeNode>& elements) {
  2036. MKQL_ENSURE(tupleType->IsTuple(), "Expected tuple type");
  2037. return TRuntimeNode(TTupleLiteral::Create(elements.size(), elements.data(), static_cast<TTupleType*>(tupleType), Env), true);
  2038. }
  2039. TRuntimeNode TProgramBuilder::NewTuple(const TArrayRef<const TRuntimeNode>& elements) {
  2040. std::vector<TType*> types;
  2041. types.reserve(elements.size());
  2042. for (auto elem : elements) {
  2043. types.push_back(elem.GetStaticType());
  2044. }
  2045. return NewTuple(NewTupleType(types), elements);
  2046. }
  2047. TRuntimeNode TProgramBuilder::NewVariant(TRuntimeNode item, ui32 index, TType* variantType) {
  2048. const auto type = AS_TYPE(TVariantType, variantType);
  2049. MKQL_ENSURE(type->GetUnderlyingType()->IsTuple(), "Expected tuple as underlying type");
  2050. return TRuntimeNode(TVariantLiteral::Create(item, index, type, Env), true);
  2051. }
  2052. TRuntimeNode TProgramBuilder::NewVariant(TRuntimeNode item, const std::string_view& member, TType* variantType) {
  2053. const auto type = AS_TYPE(TVariantType, variantType);
  2054. MKQL_ENSURE(type->GetUnderlyingType()->IsStruct(), "Expected struct as underlying type");
  2055. ui32 index = AS_TYPE(TStructType, type->GetUnderlyingType())->GetMemberIndex(member);
  2056. return TRuntimeNode(TVariantLiteral::Create(item, index, type, Env), true);
  2057. }
  2058. TRuntimeNode TProgramBuilder::Coalesce(TRuntimeNode data, TRuntimeNode defaultData) {
  2059. bool isOptional = false;
  2060. const auto dataType = UnpackOptional(data, isOptional);
  2061. if (!isOptional && !data.GetStaticType()->IsPg()) {
  2062. MKQL_ENSURE(data.GetStaticType()->IsSameType(*defaultData.GetStaticType()), "Mismatch operand types");
  2063. return data;
  2064. }
  2065. if (!dataType->IsSameType(*defaultData.GetStaticType())) {
  2066. bool isOptionalDefault;
  2067. const auto defaultDataType = UnpackOptional(defaultData, isOptionalDefault);
  2068. MKQL_ENSURE(dataType->IsSameType(*defaultDataType), "Mismatch operand types");
  2069. }
  2070. TCallableBuilder callableBuilder(Env, __func__, defaultData.GetStaticType());
  2071. callableBuilder.Add(data);
  2072. callableBuilder.Add(defaultData);
  2073. return TRuntimeNode(callableBuilder.Build(), false);
  2074. }
  2075. TRuntimeNode TProgramBuilder::Unwrap(TRuntimeNode optional, TRuntimeNode message, const std::string_view& file, ui32 row, ui32 column) {
  2076. bool isOptional;
  2077. auto underlyingType = UnpackOptional(optional, isOptional);
  2078. MKQL_ENSURE(isOptional, "Expected optional");
  2079. const auto& messageType = message.GetStaticType();
  2080. MKQL_ENSURE(messageType->IsData(), "Expected data");
  2081. const auto& messageTypeData = static_cast<const TDataType&>(*messageType);
  2082. MKQL_ENSURE(messageTypeData.GetSchemeType() == NUdf::TDataType<char*>::Id || messageTypeData.GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id, "Expected string or utf8.");
  2083. TCallableBuilder callableBuilder(Env, __func__, underlyingType);
  2084. callableBuilder.Add(optional);
  2085. callableBuilder.Add(message);
  2086. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(file));
  2087. callableBuilder.Add(NewDataLiteral(row));
  2088. callableBuilder.Add(NewDataLiteral(column));
  2089. return TRuntimeNode(callableBuilder.Build(), false);
  2090. }
  2091. TRuntimeNode TProgramBuilder::Increment(TRuntimeNode data) {
  2092. const std::array<TRuntimeNode, 1> args = {{ data }};
  2093. bool isOptional;
  2094. const auto type = UnpackOptionalData(data, isOptional);
  2095. if (type->GetSchemeType() != NUdf::TDataType<NUdf::TDecimal>::Id)
  2096. return Invoke(__func__, data.GetStaticType(), args);
  2097. return Invoke(TString("Inc_") += ::ToString(static_cast<TDataDecimalType*>(type)->GetParams().first), data.GetStaticType(), args);
  2098. }
  2099. TRuntimeNode TProgramBuilder::Decrement(TRuntimeNode data) {
  2100. const std::array<TRuntimeNode, 1> args = {{ data }};
  2101. bool isOptional;
  2102. const auto type = UnpackOptionalData(data, isOptional);
  2103. if (type->GetSchemeType() != NUdf::TDataType<NUdf::TDecimal>::Id)
  2104. return Invoke(__func__, data.GetStaticType(), args);
  2105. return Invoke(TString("Dec_") += ::ToString(static_cast<TDataDecimalType*>(type)->GetParams().first), data.GetStaticType(), args);
  2106. }
  2107. TRuntimeNode TProgramBuilder::Abs(TRuntimeNode data) {
  2108. const std::array<TRuntimeNode, 1> args = {{ data }};
  2109. return Invoke(__func__, data.GetStaticType(), args);
  2110. }
  2111. TRuntimeNode TProgramBuilder::Plus(TRuntimeNode data) {
  2112. const std::array<TRuntimeNode, 1> args = {{ data }};
  2113. return Invoke(__func__, data.GetStaticType(), args);
  2114. }
  2115. TRuntimeNode TProgramBuilder::Minus(TRuntimeNode data) {
  2116. const std::array<TRuntimeNode, 1> args = {{ data }};
  2117. return Invoke(__func__, data.GetStaticType(), args);
  2118. }
  2119. TRuntimeNode TProgramBuilder::Add(TRuntimeNode data1, TRuntimeNode data2) {
  2120. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2121. bool isOptionalLeft;
  2122. const auto leftType = UnpackOptionalData(data1, isOptionalLeft);
  2123. if (leftType->GetSchemeType() != NUdf::TDataType<NUdf::TDecimal>::Id)
  2124. return Invoke(__func__, BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType()), args);
  2125. const auto decimalType = static_cast<TDataDecimalType*>(leftType);
  2126. bool isOptionalRight;
  2127. const auto rightType = static_cast<TDataDecimalType*>(UnpackOptionalData(data2, isOptionalRight));
  2128. MKQL_ENSURE(rightType->IsSameType(*decimalType), "Operands type mismatch");
  2129. const auto resultType = isOptionalLeft || isOptionalRight ? NewOptionalType(decimalType) : decimalType;
  2130. return Invoke(TString("Add_") += ::ToString(decimalType->GetParams().first), resultType, args);
  2131. }
  2132. TRuntimeNode TProgramBuilder::Sub(TRuntimeNode data1, TRuntimeNode data2) {
  2133. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2134. bool isOptionalLeft;
  2135. const auto leftType = UnpackOptionalData(data1, isOptionalLeft);
  2136. if (leftType->GetSchemeType() != NUdf::TDataType<NUdf::TDecimal>::Id)
  2137. return Invoke(__func__, BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType()), args);
  2138. const auto decimalType = static_cast<TDataDecimalType*>(leftType);
  2139. bool isOptionalRight;
  2140. const auto rightType = static_cast<TDataDecimalType*>(UnpackOptionalData(data2, isOptionalRight));
  2141. MKQL_ENSURE(rightType->IsSameType(*decimalType), "Operands type mismatch");
  2142. const auto resultType = isOptionalLeft || isOptionalRight ? NewOptionalType(decimalType) : decimalType;
  2143. return Invoke(TString("Sub_") += ::ToString(decimalType->GetParams().first), resultType, args);
  2144. }
  2145. TRuntimeNode TProgramBuilder::Mul(TRuntimeNode data1, TRuntimeNode data2) {
  2146. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2147. return Invoke(__func__, BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType()), args);
  2148. }
  2149. TRuntimeNode TProgramBuilder::Div(TRuntimeNode data1, TRuntimeNode data2) {
  2150. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2151. auto resultType = BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType());
  2152. if (resultType->IsData() && !(NUdf::GetDataTypeInfo(*static_cast<TDataType*>(resultType)->GetDataSlot()).Features & (NUdf::EDataTypeFeatures::FloatType | NUdf::EDataTypeFeatures::DecimalType))) {
  2153. resultType = NewOptionalType(resultType);
  2154. }
  2155. return Invoke(__func__, resultType, args);
  2156. }
  2157. TRuntimeNode TProgramBuilder::DecimalDiv(TRuntimeNode data1, TRuntimeNode data2) {
  2158. bool isOptionalLeft, isOptionalRight;
  2159. const auto leftType = static_cast<TDataDecimalType*>(UnpackOptionalData(data1, isOptionalLeft));
  2160. const auto rightType = UnpackOptionalData(data2, isOptionalRight);
  2161. if (rightType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id)
  2162. MKQL_ENSURE(static_cast<TDataDecimalType*>(rightType)->IsSameType(*leftType), "Operands type mismatch");
  2163. else
  2164. MKQL_ENSURE(NUdf::GetDataTypeInfo(*rightType->GetDataSlot()).Features & NUdf::IntegralType, "Operands type mismatch");
  2165. const auto returnType = isOptionalLeft || isOptionalRight ? NewOptionalType(leftType) : leftType;
  2166. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2167. callableBuilder.Add(data1);
  2168. callableBuilder.Add(data2);
  2169. return TRuntimeNode(callableBuilder.Build(), false);
  2170. }
  2171. TRuntimeNode TProgramBuilder::DecimalMod(TRuntimeNode data1, TRuntimeNode data2) {
  2172. bool isOptionalLeft, isOptionalRight;
  2173. const auto leftType = static_cast<TDataDecimalType*>(UnpackOptionalData(data1, isOptionalLeft));
  2174. const auto rightType = UnpackOptionalData(data2, isOptionalRight);
  2175. if (rightType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id)
  2176. MKQL_ENSURE(static_cast<TDataDecimalType*>(rightType)->IsSameType(*leftType), "Operands type mismatch");
  2177. else
  2178. MKQL_ENSURE(NUdf::GetDataTypeInfo(*rightType->GetDataSlot()).Features & NUdf::IntegralType, "Operands type mismatch");
  2179. const auto returnType = isOptionalLeft || isOptionalRight ? NewOptionalType(leftType) : leftType;
  2180. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2181. callableBuilder.Add(data1);
  2182. callableBuilder.Add(data2);
  2183. return TRuntimeNode(callableBuilder.Build(), false);
  2184. }
  2185. TRuntimeNode TProgramBuilder::DecimalMul(TRuntimeNode data1, TRuntimeNode data2) {
  2186. bool isOptionalLeft, isOptionalRight;
  2187. const auto leftType = static_cast<TDataDecimalType*>(UnpackOptionalData(data1, isOptionalLeft));
  2188. const auto rightType = UnpackOptionalData(data2, isOptionalRight);
  2189. if (rightType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id)
  2190. MKQL_ENSURE(static_cast<TDataDecimalType*>(rightType)->IsSameType(*leftType), "Operands type mismatch");
  2191. else
  2192. MKQL_ENSURE(NUdf::GetDataTypeInfo(*rightType->GetDataSlot()).Features & NUdf::IntegralType, "Operands type mismatch");
  2193. const auto returnType = isOptionalLeft || isOptionalRight ? NewOptionalType(leftType) : leftType;
  2194. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2195. callableBuilder.Add(data1);
  2196. callableBuilder.Add(data2);
  2197. return TRuntimeNode(callableBuilder.Build(), false);
  2198. }
  2199. TRuntimeNode TProgramBuilder::AllOf(TRuntimeNode list, const TUnaryLambda& predicate) {
  2200. return Not(NotAllOf(list, predicate));
  2201. }
  2202. TRuntimeNode TProgramBuilder::NotAllOf(TRuntimeNode list, const TUnaryLambda& predicate) {
  2203. return Exists(ToOptional(SkipWhile(list, predicate)));
  2204. }
  2205. TRuntimeNode TProgramBuilder::BitNot(TRuntimeNode data) {
  2206. const std::array<TRuntimeNode, 1> args = {{ data }};
  2207. return Invoke(__func__, data.GetStaticType(), args);
  2208. }
  2209. TRuntimeNode TProgramBuilder::CountBits(TRuntimeNode data) {
  2210. const std::array<TRuntimeNode, 1> args = {{ data }};
  2211. return Invoke(__func__, data.GetStaticType(), args);
  2212. }
  2213. TRuntimeNode TProgramBuilder::BitAnd(TRuntimeNode data1, TRuntimeNode data2) {
  2214. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2215. return Invoke(__func__, BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType()), args);
  2216. }
  2217. TRuntimeNode TProgramBuilder::BitOr(TRuntimeNode data1, TRuntimeNode data2) {
  2218. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2219. return Invoke(__func__, BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType()), args);
  2220. }
  2221. TRuntimeNode TProgramBuilder::BitXor(TRuntimeNode data1, TRuntimeNode data2) {
  2222. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2223. return Invoke(__func__, BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType()), args);
  2224. }
  2225. TRuntimeNode TProgramBuilder::ShiftLeft(TRuntimeNode arg, TRuntimeNode bits) {
  2226. const std::array<TRuntimeNode, 2> args = {{ arg, bits }};
  2227. return Invoke(__func__, arg.GetStaticType(), args);
  2228. }
  2229. TRuntimeNode TProgramBuilder::RotLeft(TRuntimeNode arg, TRuntimeNode bits) {
  2230. const std::array<TRuntimeNode, 2> args = {{ arg, bits }};
  2231. return Invoke(__func__, arg.GetStaticType(), args);
  2232. }
  2233. TRuntimeNode TProgramBuilder::ShiftRight(TRuntimeNode arg, TRuntimeNode bits) {
  2234. const std::array<TRuntimeNode, 2> args = {{ arg, bits }};
  2235. return Invoke(__func__, arg.GetStaticType(), args);
  2236. }
  2237. TRuntimeNode TProgramBuilder::RotRight(TRuntimeNode arg, TRuntimeNode bits) {
  2238. const std::array<TRuntimeNode, 2> args = {{ arg, bits }};
  2239. return Invoke(__func__, arg.GetStaticType(), args);
  2240. }
  2241. TRuntimeNode TProgramBuilder::Mod(TRuntimeNode data1, TRuntimeNode data2) {
  2242. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2243. auto resultType = BuildArithmeticCommonType(data1.GetStaticType(), data2.GetStaticType());
  2244. if (resultType->IsData() && !(NUdf::GetDataTypeInfo(*static_cast<TDataType*>(resultType)->GetDataSlot()).Features & (NUdf::EDataTypeFeatures::FloatType | NUdf::EDataTypeFeatures::DecimalType))) {
  2245. resultType = NewOptionalType(resultType);
  2246. }
  2247. return Invoke(__func__, resultType, args);
  2248. }
  2249. TRuntimeNode TProgramBuilder::BuildMinMax(const std::string_view& callableName, const TRuntimeNode* data, size_t size) {
  2250. switch (size) {
  2251. case 0U: return NewNull();
  2252. case 1U: return *data;
  2253. case 2U: return InvokeBinary(callableName, ChooseCommonType(data[0U].GetStaticType(), data[1U].GetStaticType()), data[0U], data[1U]);
  2254. default: break;
  2255. }
  2256. const auto half = size >> 1U;
  2257. const std::array<TRuntimeNode, 2U> args = {{ BuildMinMax(callableName, data, half), BuildMinMax(callableName, data + half, size - half) }};
  2258. return BuildMinMax(callableName, args.data(), args.size());
  2259. }
  2260. TRuntimeNode TProgramBuilder::BuildWideSkipTakeBlocks(const std::string_view& callableName, TRuntimeNode flow, TRuntimeNode count) {
  2261. ValidateBlockFlowType(flow.GetStaticType());
  2262. MKQL_ENSURE(count.GetStaticType()->IsData(), "Expected data");
  2263. MKQL_ENSURE(static_cast<const TDataType&>(*count.GetStaticType()).GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64");
  2264. TCallableBuilder callableBuilder(Env, callableName, flow.GetStaticType());
  2265. callableBuilder.Add(flow);
  2266. callableBuilder.Add(count);
  2267. return TRuntimeNode(callableBuilder.Build(), false);
  2268. }
  2269. TRuntimeNode TProgramBuilder::BuildBlockLogical(const std::string_view& callableName, TRuntimeNode first, TRuntimeNode second) {
  2270. auto firstType = AS_TYPE(TBlockType, first.GetStaticType());
  2271. auto secondType = AS_TYPE(TBlockType, second.GetStaticType());
  2272. bool isOpt1, isOpt2;
  2273. MKQL_ENSURE(UnpackOptionalData(firstType->GetItemType(), isOpt1)->GetSchemeType() == NUdf::TDataType<bool>::Id, "Requires boolean args.");
  2274. MKQL_ENSURE(UnpackOptionalData(secondType->GetItemType(), isOpt2)->GetSchemeType() == NUdf::TDataType<bool>::Id, "Requires boolean args.");
  2275. const auto itemType = NewDataType(NUdf::TDataType<bool>::Id, isOpt1 || isOpt2);
  2276. auto outputType = NewBlockType(itemType, GetResultShape({firstType, secondType}));
  2277. TCallableBuilder callableBuilder(Env, callableName, outputType);
  2278. callableBuilder.Add(first);
  2279. callableBuilder.Add(second);
  2280. return TRuntimeNode(callableBuilder.Build(), false);
  2281. }
  2282. TRuntimeNode TProgramBuilder::BuildBlockDecimalBinary(const std::string_view& callableName, TRuntimeNode first, TRuntimeNode second) {
  2283. auto firstType = AS_TYPE(TBlockType, first.GetStaticType());
  2284. auto secondType = AS_TYPE(TBlockType, second.GetStaticType());
  2285. bool isOpt1, isOpt2;
  2286. auto* leftDataType = UnpackOptionalData(firstType->GetItemType(), isOpt1);
  2287. UnpackOptionalData(secondType->GetItemType(), isOpt2);
  2288. MKQL_ENSURE(leftDataType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id, "Requires decimal args.");
  2289. const auto& lParams = static_cast<TDataDecimalType*>(leftDataType)->GetParams();
  2290. auto [precision, scale] = lParams;
  2291. TType* outputType = TDataDecimalType::Create(precision, scale, Env);
  2292. if (isOpt1 || isOpt2) {
  2293. outputType = TOptionalType::Create(outputType, Env);
  2294. }
  2295. outputType = NewBlockType(outputType, TBlockType::EShape::Many);
  2296. TCallableBuilder callableBuilder(Env, callableName, outputType);
  2297. callableBuilder.Add(first);
  2298. callableBuilder.Add(second);
  2299. return TRuntimeNode(callableBuilder.Build(), false);
  2300. }
  2301. TRuntimeNode TProgramBuilder::Min(const TArrayRef<const TRuntimeNode>& args) {
  2302. return BuildMinMax(__func__, args.data(), args.size());
  2303. }
  2304. TRuntimeNode TProgramBuilder::Max(const TArrayRef<const TRuntimeNode>& args) {
  2305. return BuildMinMax(__func__, args.data(), args.size());
  2306. }
  2307. TRuntimeNode TProgramBuilder::Min(TRuntimeNode data1, TRuntimeNode data2) {
  2308. const std::array<TRuntimeNode, 2U> args = {{ data1, data2 }};
  2309. return Min(args);
  2310. }
  2311. TRuntimeNode TProgramBuilder::Max(TRuntimeNode data1, TRuntimeNode data2) {
  2312. const std::array<TRuntimeNode, 2U> args = {{ data1, data2 }};
  2313. return Max(args);
  2314. }
  2315. TRuntimeNode TProgramBuilder::Equals(TRuntimeNode data1, TRuntimeNode data2) {
  2316. return DataCompare(__func__, data1, data2);
  2317. }
  2318. TRuntimeNode TProgramBuilder::NotEquals(TRuntimeNode data1, TRuntimeNode data2) {
  2319. return DataCompare(__func__, data1, data2);
  2320. }
  2321. TRuntimeNode TProgramBuilder::Less(TRuntimeNode data1, TRuntimeNode data2) {
  2322. return DataCompare(__func__, data1, data2);
  2323. }
  2324. TRuntimeNode TProgramBuilder::LessOrEqual(TRuntimeNode data1, TRuntimeNode data2) {
  2325. return DataCompare(__func__, data1, data2);
  2326. }
  2327. TRuntimeNode TProgramBuilder::Greater(TRuntimeNode data1, TRuntimeNode data2) {
  2328. return DataCompare(__func__, data1, data2);
  2329. }
  2330. TRuntimeNode TProgramBuilder::GreaterOrEqual(TRuntimeNode data1, TRuntimeNode data2) {
  2331. return DataCompare(__func__, data1, data2);
  2332. }
  2333. TRuntimeNode TProgramBuilder::InvokeBinary(const std::string_view& callableName, TType* type, TRuntimeNode data1, TRuntimeNode data2) {
  2334. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2335. return Invoke(callableName, type, args);
  2336. }
  2337. TRuntimeNode TProgramBuilder::AggrCompare(const std::string_view& callableName, TRuntimeNode data1, TRuntimeNode data2) {
  2338. return InvokeBinary(callableName, NewDataType(NUdf::TDataType<bool>::Id), data1, data2);
  2339. }
  2340. TRuntimeNode TProgramBuilder::DataCompare(const std::string_view& callableName, TRuntimeNode left, TRuntimeNode right) {
  2341. bool isOptionalLeft, isOptionalRight;
  2342. const auto leftType = UnpackOptionalData(left, isOptionalLeft);
  2343. const auto rightType = UnpackOptionalData(right, isOptionalRight);
  2344. const auto lId = leftType->GetSchemeType();
  2345. const auto rId = rightType->GetSchemeType();
  2346. if (lId == NUdf::TDataType<NUdf::TDecimal>::Id && rId == NUdf::TDataType<NUdf::TDecimal>::Id) {
  2347. const auto& lDec = static_cast<TDataDecimalType*>(leftType)->GetParams();
  2348. const auto& rDec = static_cast<TDataDecimalType*>(rightType)->GetParams();
  2349. if (lDec.second < rDec.second) {
  2350. left = ToDecimal(left, std::min<ui8>(lDec.first + rDec.second - lDec.second, NYql::NDecimal::MaxPrecision), rDec.second);
  2351. } else if (lDec.second > rDec.second) {
  2352. right = ToDecimal(right, std::min<ui8>(rDec.first + lDec.second - rDec.second, NYql::NDecimal::MaxPrecision), lDec.second);
  2353. }
  2354. } else if (lId == NUdf::TDataType<NUdf::TDecimal>::Id && NUdf::GetDataTypeInfo(NUdf::GetDataSlot(rId)).Features & NUdf::EDataTypeFeatures::IntegralType) {
  2355. const auto scale = static_cast<TDataDecimalType*>(leftType)->GetParams().second;
  2356. right = ToDecimal(right, std::min<ui8>(NYql::NDecimal::MaxPrecision, NUdf::GetDataTypeInfo(NUdf::GetDataSlot(rId)).DecimalDigits + scale), scale);
  2357. } else if (rId == NUdf::TDataType<NUdf::TDecimal>::Id && NUdf::GetDataTypeInfo(NUdf::GetDataSlot(lId)).Features & NUdf::EDataTypeFeatures::IntegralType) {
  2358. const auto scale = static_cast<TDataDecimalType*>(rightType)->GetParams().second;
  2359. left = ToDecimal(left, std::min<ui8>(NYql::NDecimal::MaxPrecision, NUdf::GetDataTypeInfo(NUdf::GetDataSlot(lId)).DecimalDigits + scale), scale);
  2360. }
  2361. const std::array<TRuntimeNode, 2> args = {{ left, right }};
  2362. const auto resultType = isOptionalLeft || isOptionalRight ? NewOptionalType(NewDataType(NUdf::TDataType<bool>::Id)) : NewDataType(NUdf::TDataType<bool>::Id);
  2363. return Invoke(callableName, resultType, args);
  2364. }
  2365. TRuntimeNode TProgramBuilder::BuildRangeLogical(const std::string_view& callableName, const TArrayRef<const TRuntimeNode>& lists) {
  2366. MKQL_ENSURE(!lists.empty(), "Expecting at least one argument");
  2367. for (auto& list : lists) {
  2368. MKQL_ENSURE(list.GetStaticType()->IsList(), "Expecting lists");
  2369. MKQL_ENSURE(list.GetStaticType()->IsSameType(*lists.front().GetStaticType()), "Expecting arguments of same type");
  2370. }
  2371. TCallableBuilder callableBuilder(Env, callableName, lists.front().GetStaticType());
  2372. for (auto& list : lists) {
  2373. callableBuilder.Add(list);
  2374. }
  2375. return TRuntimeNode(callableBuilder.Build(), false);
  2376. }
  2377. TRuntimeNode TProgramBuilder::AggrEquals(TRuntimeNode data1, TRuntimeNode data2) {
  2378. return AggrCompare(__func__, data1, data2);
  2379. }
  2380. TRuntimeNode TProgramBuilder::AggrNotEquals(TRuntimeNode data1, TRuntimeNode data2) {
  2381. return AggrCompare(__func__, data1, data2);
  2382. }
  2383. TRuntimeNode TProgramBuilder::AggrLess(TRuntimeNode data1, TRuntimeNode data2) {
  2384. return AggrCompare(__func__, data1, data2);
  2385. }
  2386. TRuntimeNode TProgramBuilder::AggrLessOrEqual(TRuntimeNode data1, TRuntimeNode data2) {
  2387. return AggrCompare(__func__, data1, data2);
  2388. }
  2389. TRuntimeNode TProgramBuilder::AggrGreater(TRuntimeNode data1, TRuntimeNode data2) {
  2390. return AggrCompare(__func__, data1, data2);
  2391. }
  2392. TRuntimeNode TProgramBuilder::AggrGreaterOrEqual(TRuntimeNode data1, TRuntimeNode data2) {
  2393. return AggrCompare(__func__, data1, data2);
  2394. }
  2395. TRuntimeNode TProgramBuilder::If(TRuntimeNode condition, TRuntimeNode thenBranch, TRuntimeNode elseBranch) {
  2396. bool condOpt, thenOpt, elseOpt;
  2397. const auto conditionType = UnpackOptionalData(condition, condOpt);
  2398. MKQL_ENSURE(conditionType->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected bool");
  2399. const auto thenUnpacked = UnpackOptional(thenBranch, thenOpt);
  2400. const auto elseUnpacked = UnpackOptional(elseBranch, elseOpt);
  2401. MKQL_ENSURE(thenUnpacked->IsSameType(*elseUnpacked), "Different return types in branches.");
  2402. const bool isOptional = condOpt || thenOpt || elseOpt;
  2403. TCallableBuilder callableBuilder(Env, __func__, isOptional ? NewOptionalType(thenUnpacked) : thenUnpacked);
  2404. callableBuilder.Add(condition);
  2405. callableBuilder.Add(thenBranch);
  2406. callableBuilder.Add(elseBranch);
  2407. return TRuntimeNode(callableBuilder.Build(), false);
  2408. }
  2409. TRuntimeNode TProgramBuilder::If(const TArrayRef<const TRuntimeNode>& args) {
  2410. MKQL_ENSURE(args.size() % 2U, "Expected odd arguments.");
  2411. MKQL_ENSURE(args.size() >= 3U, "Expected at least three arguments.");
  2412. return If(args.front(), args[1U], 3U == args.size() ? args.back() : If(args.last(args.size() - 2U)));
  2413. }
  2414. TRuntimeNode TProgramBuilder::If(TRuntimeNode condition, TRuntimeNode thenBranch, TRuntimeNode elseBranch, TType* resultType) {
  2415. bool condOpt;
  2416. const auto conditionType = UnpackOptionalData(condition, condOpt);
  2417. MKQL_ENSURE(conditionType->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected bool");
  2418. TCallableBuilder callableBuilder(Env, __func__, resultType);
  2419. callableBuilder.Add(condition);
  2420. callableBuilder.Add(thenBranch);
  2421. callableBuilder.Add(elseBranch);
  2422. return TRuntimeNode(callableBuilder.Build(), false);
  2423. }
  2424. TRuntimeNode TProgramBuilder::Ensure(TRuntimeNode value, TRuntimeNode predicate, TRuntimeNode message, const std::string_view& file, ui32 row, ui32 column) {
  2425. bool isOptional;
  2426. const auto unpackedType = UnpackOptionalData(predicate, isOptional);
  2427. MKQL_ENSURE(unpackedType->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected bool");
  2428. const auto& messageType = message.GetStaticType();
  2429. MKQL_ENSURE(messageType->IsData(), "Expected data");
  2430. const auto& messageTypeData = static_cast<const TDataType&>(*messageType);
  2431. MKQL_ENSURE(messageTypeData.GetSchemeType() == NUdf::TDataType<char*>::Id || messageTypeData.GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id, "Expected string or utf8.");
  2432. TCallableBuilder callableBuilder(Env, __func__, value.GetStaticType());
  2433. callableBuilder.Add(value);
  2434. callableBuilder.Add(predicate);
  2435. callableBuilder.Add(message);
  2436. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(file));
  2437. callableBuilder.Add(NewDataLiteral(row));
  2438. callableBuilder.Add(NewDataLiteral(column));
  2439. return TRuntimeNode(callableBuilder.Build(), false);
  2440. }
  2441. TRuntimeNode TProgramBuilder::SourceOf(TType* returnType) {
  2442. MKQL_ENSURE(returnType->IsFlow() || returnType->IsStream(), "Expected flow or stream.");
  2443. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2444. return TRuntimeNode(callableBuilder.Build(), false);
  2445. }
  2446. TRuntimeNode TProgramBuilder::Source() {
  2447. if constexpr (RuntimeVersion < 18U) {
  2448. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  2449. }
  2450. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewMultiType({})));
  2451. return TRuntimeNode(callableBuilder.Build(), false);
  2452. }
  2453. TRuntimeNode TProgramBuilder::IfPresent(TRuntimeNode optional, const TUnaryLambda& thenBranch, TRuntimeNode elseBranch) {
  2454. bool isOptional;
  2455. const auto unpackedType = UnpackOptional(optional, isOptional);
  2456. if (!isOptional) {
  2457. return thenBranch(optional);
  2458. }
  2459. const auto itemArg = Arg(unpackedType);
  2460. const auto then = thenBranch(itemArg);
  2461. bool thenOpt, elseOpt;
  2462. const auto thenUnpacked = UnpackOptional(then, thenOpt);
  2463. const auto elseUnpacked = UnpackOptional(elseBranch, elseOpt);
  2464. MKQL_ENSURE(thenUnpacked->IsSameType(*elseUnpacked), "Different return types in branches.");
  2465. TCallableBuilder callableBuilder(Env, __func__, (thenOpt || elseOpt) ? NewOptionalType(thenUnpacked) : thenUnpacked);
  2466. callableBuilder.Add(optional);
  2467. callableBuilder.Add(itemArg);
  2468. callableBuilder.Add(then);
  2469. callableBuilder.Add(elseBranch);
  2470. return TRuntimeNode(callableBuilder.Build(), false);
  2471. }
  2472. TRuntimeNode TProgramBuilder::IfPresent(TRuntimeNode::TList optionals, const TNarrowLambda& thenBranch, TRuntimeNode elseBranch) {
  2473. switch (optionals.size()) {
  2474. case 0U:
  2475. return thenBranch({});
  2476. case 1U:
  2477. return IfPresent(optionals.front(), [&](TRuntimeNode unwrap){ return thenBranch({unwrap}); }, elseBranch);
  2478. default:
  2479. break;
  2480. }
  2481. const auto first = optionals.front();
  2482. optionals.erase(optionals.cbegin());
  2483. return IfPresent(first,
  2484. [&](TRuntimeNode head) {
  2485. return IfPresent(optionals,
  2486. [&](TRuntimeNode::TList tail) {
  2487. tail.insert(tail.cbegin(), head);
  2488. return thenBranch(tail);
  2489. },
  2490. elseBranch
  2491. );
  2492. },
  2493. elseBranch
  2494. );
  2495. }
  2496. TRuntimeNode TProgramBuilder::Not(TRuntimeNode data) {
  2497. return UnaryDataFunction(data, __func__, TDataFunctionFlags::CommonOptionalResult | TDataFunctionFlags::RequiresBooleanArgs | TDataFunctionFlags::AllowOptionalArgs);
  2498. }
  2499. TRuntimeNode TProgramBuilder::BuildBinaryLogical(const std::string_view& callableName, TRuntimeNode data1, TRuntimeNode data2) {
  2500. bool isOpt1, isOpt2;
  2501. MKQL_ENSURE(UnpackOptionalData(data1, isOpt1)->GetSchemeType() == NUdf::TDataType<bool>::Id, "Requires boolean args.");
  2502. MKQL_ENSURE(UnpackOptionalData(data2, isOpt2)->GetSchemeType() == NUdf::TDataType<bool>::Id, "Requires boolean args.");
  2503. const auto resultType = NewDataType(NUdf::TDataType<bool>::Id, isOpt1 || isOpt2);
  2504. TCallableBuilder callableBuilder(Env, callableName, resultType);
  2505. callableBuilder.Add(data1);
  2506. callableBuilder.Add(data2);
  2507. return TRuntimeNode(callableBuilder.Build(), false);
  2508. }
  2509. TRuntimeNode TProgramBuilder::BuildLogical(const std::string_view& callableName, const TArrayRef<const TRuntimeNode>& args) {
  2510. MKQL_ENSURE(!args.empty(), "Empty logical args.");
  2511. switch (args.size()) {
  2512. case 1U: return args.front();
  2513. case 2U: return BuildBinaryLogical(callableName, args.front(), args.back());
  2514. }
  2515. const auto half = (args.size() + 1U) >> 1U;
  2516. const TArrayRef<const TRuntimeNode> one(args.data(), half), two(args.data() + half, args.size() - half);
  2517. return BuildBinaryLogical(callableName, BuildLogical(callableName, one), BuildLogical(callableName, two));
  2518. }
  2519. TRuntimeNode TProgramBuilder::And(const TArrayRef<const TRuntimeNode>& args) {
  2520. return BuildLogical(__func__, args);
  2521. }
  2522. TRuntimeNode TProgramBuilder::Or(const TArrayRef<const TRuntimeNode>& args) {
  2523. return BuildLogical(__func__, args);
  2524. }
  2525. TRuntimeNode TProgramBuilder::Xor(const TArrayRef<const TRuntimeNode>& args) {
  2526. return BuildLogical(__func__, args);
  2527. }
  2528. TRuntimeNode TProgramBuilder::Exists(TRuntimeNode data) {
  2529. const auto& nodeType = data.GetStaticType();
  2530. if (nodeType->IsVoid()) {
  2531. return NewDataLiteral(false);
  2532. }
  2533. if (!nodeType->IsOptional() && !nodeType->IsPg()) {
  2534. return NewDataLiteral(true);
  2535. }
  2536. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<bool>::Id));
  2537. callableBuilder.Add(data);
  2538. return TRuntimeNode(callableBuilder.Build(), false);
  2539. }
  2540. TRuntimeNode TProgramBuilder::NewMTRand(TRuntimeNode seed) {
  2541. auto seedData = AS_TYPE(TDataType, seed);
  2542. MKQL_ENSURE(seedData->GetSchemeType() == NUdf::TDataType<ui64>::Id, "seed must be ui64");
  2543. TCallableBuilder callableBuilder(Env, __func__, NewResourceType(RandomMTResource), true);
  2544. callableBuilder.Add(seed);
  2545. return TRuntimeNode(callableBuilder.Build(), false);
  2546. }
  2547. TRuntimeNode TProgramBuilder::NextMTRand(TRuntimeNode rand) {
  2548. auto resType = AS_TYPE(TResourceType, rand);
  2549. MKQL_ENSURE(resType->GetTag() == RandomMTResource, "Expected MTRand resource");
  2550. const std::array<TType*, 2U> tupleTypes = {{ NewDataType(NUdf::TDataType<ui64>::Id), rand.GetStaticType() }};
  2551. auto returnType = NewTupleType(tupleTypes);
  2552. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2553. callableBuilder.Add(rand);
  2554. return TRuntimeNode(callableBuilder.Build(), false);
  2555. }
  2556. TRuntimeNode TProgramBuilder::AggrCountInit(TRuntimeNode value) {
  2557. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<ui64>::Id));
  2558. callableBuilder.Add(value);
  2559. return TRuntimeNode(callableBuilder.Build(), false);
  2560. }
  2561. TRuntimeNode TProgramBuilder::AggrCountUpdate(TRuntimeNode value, TRuntimeNode state) {
  2562. MKQL_ENSURE(AS_TYPE(TDataType, state)->GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64 type");
  2563. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<ui64>::Id));
  2564. callableBuilder.Add(value);
  2565. callableBuilder.Add(state);
  2566. return TRuntimeNode(callableBuilder.Build(), false);
  2567. }
  2568. TRuntimeNode TProgramBuilder::AggrMin(TRuntimeNode data1, TRuntimeNode data2) {
  2569. const auto type = data1.GetStaticType();
  2570. MKQL_ENSURE(type->IsSameType(*data2.GetStaticType()), "Must be same type.");
  2571. return InvokeBinary(__func__, type, data1, data2);
  2572. }
  2573. TRuntimeNode TProgramBuilder::AggrMax(TRuntimeNode data1, TRuntimeNode data2) {
  2574. const auto type = data1.GetStaticType();
  2575. MKQL_ENSURE(type->IsSameType(*data2.GetStaticType()), "Must be same type.");
  2576. return InvokeBinary(__func__, type, data1, data2);
  2577. }
  2578. TRuntimeNode TProgramBuilder::AggrAdd(TRuntimeNode data1, TRuntimeNode data2) {
  2579. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  2580. bool isOptionalLeft;
  2581. const auto leftType = UnpackOptionalData(data1, isOptionalLeft);
  2582. if (leftType->GetSchemeType() != NUdf::TDataType<NUdf::TDecimal>::Id)
  2583. return Invoke(__func__, data1.GetStaticType(), args);
  2584. const auto decimalType = static_cast<TDataDecimalType*>(leftType);
  2585. bool isOptionalRight;
  2586. const auto rightType = static_cast<TDataDecimalType*>(UnpackOptionalData(data2, isOptionalRight));
  2587. MKQL_ENSURE(rightType->IsSameType(*decimalType), "Operands type mismatch");
  2588. return Invoke(TString("AggrAdd_") += ::ToString(decimalType->GetParams().first), data1.GetStaticType(), args);
  2589. }
  2590. TRuntimeNode TProgramBuilder::QueueCreate(TRuntimeNode initCapacity, TRuntimeNode initSize, const TArrayRef<const TRuntimeNode>& dependentNodes, TType* returnType) {
  2591. auto resType = AS_TYPE(TResourceType, returnType);
  2592. const auto tag = resType->GetTag();
  2593. if (initCapacity.GetStaticType()->IsVoid()) {
  2594. MKQL_ENSURE(RuntimeVersion >= 13, "Unbounded queue is not supported in runtime version " << RuntimeVersion);
  2595. } else {
  2596. auto initCapacityType = AS_TYPE(TDataType, initCapacity);
  2597. MKQL_ENSURE(initCapacityType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "init capcity must be ui64");
  2598. }
  2599. auto initSizeType = AS_TYPE(TDataType, initSize);
  2600. MKQL_ENSURE(initSizeType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "init size must be ui64");
  2601. TCallableBuilder callableBuilder(Env, __func__, returnType, true);
  2602. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(tag));
  2603. callableBuilder.Add(initCapacity);
  2604. callableBuilder.Add(initSize);
  2605. for (auto node : dependentNodes) {
  2606. callableBuilder.Add(node);
  2607. }
  2608. return TRuntimeNode(callableBuilder.Build(), false);
  2609. }
  2610. TRuntimeNode TProgramBuilder::QueuePush(TRuntimeNode resource, TRuntimeNode value) {
  2611. auto resType = AS_TYPE(TResourceType, resource);
  2612. const auto tag = resType->GetTag();
  2613. MKQL_ENSURE(tag.StartsWith(ResourceQueuePrefix), "Expected Queue resource");
  2614. TCallableBuilder callableBuilder(Env, __func__, resource.GetStaticType());
  2615. callableBuilder.Add(resource);
  2616. callableBuilder.Add(value);
  2617. return TRuntimeNode(callableBuilder.Build(), false);
  2618. }
  2619. TRuntimeNode TProgramBuilder::QueuePop(TRuntimeNode resource) {
  2620. auto resType = AS_TYPE(TResourceType, resource);
  2621. const auto tag = resType->GetTag();
  2622. MKQL_ENSURE(tag.StartsWith(ResourceQueuePrefix), "Expected Queue resource");
  2623. TCallableBuilder callableBuilder(Env, __func__, resource.GetStaticType());
  2624. callableBuilder.Add(resource);
  2625. return TRuntimeNode(callableBuilder.Build(), false);
  2626. }
  2627. TRuntimeNode TProgramBuilder::QueuePeek(TRuntimeNode resource, TRuntimeNode index, const TArrayRef<const TRuntimeNode>& dependentNodes, TType* returnType) {
  2628. MKQL_ENSURE(returnType->IsOptional(), "Expected optional type as result of QueuePeek");
  2629. auto resType = AS_TYPE(TResourceType, resource);
  2630. auto indexType = AS_TYPE(TDataType, index);
  2631. MKQL_ENSURE(indexType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "index size must be ui64");
  2632. const auto tag = resType->GetTag();
  2633. MKQL_ENSURE(tag.StartsWith(ResourceQueuePrefix), "Expected Queue resource");
  2634. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2635. callableBuilder.Add(resource);
  2636. callableBuilder.Add(index);
  2637. for (auto node : dependentNodes) {
  2638. callableBuilder.Add(node);
  2639. }
  2640. return TRuntimeNode(callableBuilder.Build(), false);
  2641. }
  2642. TRuntimeNode TProgramBuilder::QueueRange(TRuntimeNode resource, TRuntimeNode begin, TRuntimeNode end, const TArrayRef<const TRuntimeNode>& dependentNodes, TType* returnType) {
  2643. MKQL_ENSURE(RuntimeVersion >= 14, "QueueRange is not supported in runtime version " << RuntimeVersion);
  2644. MKQL_ENSURE(returnType->IsList(), "Expected list type as result of QueueRange");
  2645. auto resType = AS_TYPE(TResourceType, resource);
  2646. auto beginType = AS_TYPE(TDataType, begin);
  2647. MKQL_ENSURE(beginType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "begin index must be ui64");
  2648. auto endType = AS_TYPE(TDataType, end);
  2649. MKQL_ENSURE(endType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "end index must be ui64");
  2650. const auto tag = resType->GetTag();
  2651. MKQL_ENSURE(tag.StartsWith(ResourceQueuePrefix), "Expected Queue resource");
  2652. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2653. callableBuilder.Add(resource);
  2654. callableBuilder.Add(begin);
  2655. callableBuilder.Add(end);
  2656. for (auto node : dependentNodes) {
  2657. callableBuilder.Add(node);
  2658. }
  2659. return TRuntimeNode(callableBuilder.Build(), false);
  2660. }
  2661. TRuntimeNode TProgramBuilder::PreserveStream(TRuntimeNode stream, TRuntimeNode queue, TRuntimeNode outpace) {
  2662. auto streamType = AS_TYPE(TStreamType, stream);
  2663. auto resType = AS_TYPE(TResourceType, queue);
  2664. auto outpaceType = AS_TYPE(TDataType, outpace);
  2665. MKQL_ENSURE(outpaceType->GetSchemeType() == NUdf::TDataType<ui64>::Id, "PreserveStream: outpace size must be ui64");
  2666. const auto tag = resType->GetTag();
  2667. MKQL_ENSURE(tag.StartsWith(ResourceQueuePrefix), "PreserveStream: Expected Queue resource");
  2668. TCallableBuilder callableBuilder(Env, __func__, streamType);
  2669. callableBuilder.Add(stream);
  2670. callableBuilder.Add(queue);
  2671. callableBuilder.Add(outpace);
  2672. return TRuntimeNode(callableBuilder.Build(), false);
  2673. }
  2674. TRuntimeNode TProgramBuilder::Seq(const TArrayRef<const TRuntimeNode>& args, TType* returnType) {
  2675. MKQL_ENSURE(RuntimeVersion >= 15, "Seq is not supported in runtime version " << RuntimeVersion);
  2676. TCallableBuilder callableBuilder(Env, __func__, returnType);
  2677. for (auto node : args) {
  2678. callableBuilder.Add(node);
  2679. }
  2680. return TRuntimeNode(callableBuilder.Build(), false);
  2681. }
  2682. TRuntimeNode TProgramBuilder::FromYsonSimpleType(TRuntimeNode input, NUdf::TDataTypeId schemeType) {
  2683. auto type = input.GetStaticType();
  2684. if (type->IsOptional()) {
  2685. type = static_cast<const TOptionalType&>(*type).GetItemType();
  2686. }
  2687. MKQL_ENSURE(type->IsData(), "Expected data type");
  2688. auto resDataType = NewDataType(schemeType);
  2689. auto resultType = NewOptionalType(resDataType);
  2690. TCallableBuilder callableBuilder(Env, __func__, resultType);
  2691. callableBuilder.Add(input);
  2692. callableBuilder.Add(NewDataLiteral(static_cast<ui32>(schemeType)));
  2693. return TRuntimeNode(callableBuilder.Build(), false);
  2694. }
  2695. TRuntimeNode TProgramBuilder::TryWeakMemberFromDict(TRuntimeNode other, TRuntimeNode rest, NUdf::TDataTypeId schemeType, const std::string_view& memberName) {
  2696. auto resDataType = NewDataType(schemeType);
  2697. auto resultType = NewOptionalType(resDataType);
  2698. TCallableBuilder callableBuilder(Env, __func__, resultType);
  2699. callableBuilder.Add(other);
  2700. callableBuilder.Add(rest);
  2701. callableBuilder.Add(NewDataLiteral(static_cast<ui32>(schemeType)));
  2702. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(memberName));
  2703. return TRuntimeNode(callableBuilder.Build(), false);
  2704. }
  2705. TRuntimeNode TProgramBuilder::TimezoneId(TRuntimeNode name) {
  2706. bool isOptional;
  2707. auto dataType = UnpackOptionalData(name, isOptional);
  2708. MKQL_ENSURE(dataType->GetSchemeType() == NUdf::TDataType<char*>::Id, "Expected string");
  2709. auto resultType = NewOptionalType(NewDataType(NUdf::EDataSlot::Uint16));
  2710. TCallableBuilder callableBuilder(Env, __func__, resultType);
  2711. callableBuilder.Add(name);
  2712. return TRuntimeNode(callableBuilder.Build(), false);
  2713. }
  2714. TRuntimeNode TProgramBuilder::TimezoneName(TRuntimeNode id) {
  2715. bool isOptional;
  2716. auto dataType = UnpackOptionalData(id, isOptional);
  2717. MKQL_ENSURE(dataType->GetSchemeType() == NUdf::TDataType<ui16>::Id, "Expected ui32");
  2718. auto resultType = NewOptionalType(NewDataType(NUdf::EDataSlot::String));
  2719. TCallableBuilder callableBuilder(Env, __func__, resultType);
  2720. callableBuilder.Add(id);
  2721. return TRuntimeNode(callableBuilder.Build(), false);
  2722. }
  2723. TRuntimeNode TProgramBuilder::AddTimezone(TRuntimeNode utc, TRuntimeNode id) {
  2724. bool isOptional1;
  2725. auto dataType1 = UnpackOptionalData(utc, isOptional1);
  2726. MKQL_ENSURE(NUdf::GetDataTypeInfo(*dataType1->GetDataSlot()).Features & NUdf::DateType, "Expected date type");
  2727. bool isOptional2;
  2728. auto dataType2 = UnpackOptionalData(id, isOptional2);
  2729. MKQL_ENSURE(dataType2->GetSchemeType() == NUdf::TDataType<ui16>::Id, "Expected ui16");
  2730. NUdf::EDataSlot tzType;
  2731. switch (*dataType1->GetDataSlot()) {
  2732. case NUdf::EDataSlot::Date: tzType = NUdf::EDataSlot::TzDate; break;
  2733. case NUdf::EDataSlot::Datetime: tzType = NUdf::EDataSlot::TzDatetime; break;
  2734. case NUdf::EDataSlot::Timestamp: tzType = NUdf::EDataSlot::TzTimestamp; break;
  2735. case NUdf::EDataSlot::Date32: tzType = NUdf::EDataSlot::TzDate32; break;
  2736. case NUdf::EDataSlot::Datetime64: tzType = NUdf::EDataSlot::TzDatetime64; break;
  2737. case NUdf::EDataSlot::Timestamp64: tzType = NUdf::EDataSlot::TzTimestamp64; break;
  2738. default:
  2739. ythrow yexception() << "Unknown date type: " << *dataType1->GetDataSlot();
  2740. }
  2741. auto resultType = NewOptionalType(NewDataType(tzType));
  2742. TCallableBuilder callableBuilder(Env, __func__, resultType);
  2743. callableBuilder.Add(utc);
  2744. callableBuilder.Add(id);
  2745. return TRuntimeNode(callableBuilder.Build(), false);
  2746. }
  2747. TRuntimeNode TProgramBuilder::RemoveTimezone(TRuntimeNode local) {
  2748. bool isOptional1;
  2749. const auto dataType1 = UnpackOptionalData(local, isOptional1);
  2750. MKQL_ENSURE((NUdf::GetDataTypeInfo(*dataType1->GetDataSlot()).Features & NUdf::TzDateType), "Expected date with timezone type");
  2751. NUdf::EDataSlot type;
  2752. switch (*dataType1->GetDataSlot()) {
  2753. case NUdf::EDataSlot::TzDate: type = NUdf::EDataSlot::Date; break;
  2754. case NUdf::EDataSlot::TzDatetime: type = NUdf::EDataSlot::Datetime; break;
  2755. case NUdf::EDataSlot::TzTimestamp: type = NUdf::EDataSlot::Timestamp; break;
  2756. case NUdf::EDataSlot::TzDate32: type = NUdf::EDataSlot::Date32; break;
  2757. case NUdf::EDataSlot::TzDatetime64: type = NUdf::EDataSlot::Datetime64; break;
  2758. case NUdf::EDataSlot::TzTimestamp64: type = NUdf::EDataSlot::Timestamp64; break;
  2759. default:
  2760. ythrow yexception() << "Unknown date with timezone type: " << *dataType1->GetDataSlot();
  2761. }
  2762. return Convert(local, NewDataType(type, isOptional1));
  2763. }
  2764. TRuntimeNode TProgramBuilder::Nth(TRuntimeNode tuple, ui32 index) {
  2765. bool isOptional;
  2766. const auto type = AS_TYPE(TTupleType, UnpackOptional(tuple.GetStaticType(), isOptional));
  2767. MKQL_ENSURE(index < type->GetElementsCount(), "Index out of range: " << index <<
  2768. " is not less than " << type->GetElementsCount());
  2769. auto itemType = type->GetElementType(index);
  2770. if (isOptional && !itemType->IsOptional() && !itemType->IsNull() && !itemType->IsPg()) {
  2771. itemType = TOptionalType::Create(itemType, Env);
  2772. }
  2773. TCallableBuilder callableBuilder(Env, __func__, itemType);
  2774. callableBuilder.Add(tuple);
  2775. callableBuilder.Add(NewDataLiteral<ui32>(index));
  2776. return TRuntimeNode(callableBuilder.Build(), false);
  2777. }
  2778. TRuntimeNode TProgramBuilder::Element(TRuntimeNode tuple, ui32 index) {
  2779. return Nth(tuple, index);
  2780. }
  2781. TRuntimeNode TProgramBuilder::Guess(TRuntimeNode variant, ui32 tupleIndex) {
  2782. bool isOptional;
  2783. auto unpacked = UnpackOptional(variant, isOptional);
  2784. auto type = AS_TYPE(TVariantType, unpacked);
  2785. auto underlyingType = AS_TYPE(TTupleType, type->GetUnderlyingType());
  2786. MKQL_ENSURE(tupleIndex < underlyingType->GetElementsCount(), "Wrong tuple index");
  2787. auto resType = TOptionalType::Create(underlyingType->GetElementType(tupleIndex), Env);
  2788. TCallableBuilder callableBuilder(Env, __func__, resType);
  2789. callableBuilder.Add(variant);
  2790. callableBuilder.Add(NewDataLiteral<ui32>(tupleIndex));
  2791. return TRuntimeNode(callableBuilder.Build(), false);
  2792. }
  2793. TRuntimeNode TProgramBuilder::Guess(TRuntimeNode variant, const std::string_view& memberName) {
  2794. bool isOptional;
  2795. auto unpacked = UnpackOptional(variant, isOptional);
  2796. auto type = AS_TYPE(TVariantType, unpacked);
  2797. auto underlyingType = AS_TYPE(TStructType, type->GetUnderlyingType());
  2798. auto structIndex = underlyingType->GetMemberIndex(memberName);
  2799. auto resType = TOptionalType::Create(underlyingType->GetMemberType(structIndex), Env);
  2800. TCallableBuilder callableBuilder(Env, __func__, resType);
  2801. callableBuilder.Add(variant);
  2802. callableBuilder.Add(NewDataLiteral<ui32>(structIndex));
  2803. return TRuntimeNode(callableBuilder.Build(), false);
  2804. }
  2805. TRuntimeNode TProgramBuilder::Way(TRuntimeNode variant) {
  2806. bool isOptional;
  2807. auto unpacked = UnpackOptional(variant, isOptional);
  2808. auto type = AS_TYPE(TVariantType, unpacked);
  2809. auto underlyingType = type->GetUnderlyingType();
  2810. auto dataType = NewDataType(underlyingType->IsTuple() ? NUdf::EDataSlot::Uint32 : NUdf::EDataSlot::Utf8);
  2811. auto resType = isOptional ? TOptionalType::Create(dataType, Env) : dataType;
  2812. TCallableBuilder callableBuilder(Env, __func__, resType);
  2813. callableBuilder.Add(variant);
  2814. return TRuntimeNode(callableBuilder.Build(), false);
  2815. }
  2816. TRuntimeNode TProgramBuilder::VariantItem(TRuntimeNode variant) {
  2817. bool isOptional;
  2818. auto unpacked = UnpackOptional(variant, isOptional);
  2819. auto type = AS_TYPE(TVariantType, unpacked);
  2820. auto underlyingType = type->GetAlternativeType(0);
  2821. auto resType = isOptional ? TOptionalType::Create(underlyingType, Env) : underlyingType;
  2822. TCallableBuilder callableBuilder(Env, __func__, resType);
  2823. callableBuilder.Add(variant);
  2824. return TRuntimeNode(callableBuilder.Build(), false);
  2825. }
  2826. TRuntimeNode TProgramBuilder::VisitAll(TRuntimeNode variant, std::function<TRuntimeNode(ui32, TRuntimeNode)> handler) {
  2827. const auto type = AS_TYPE(TVariantType, variant);
  2828. std::vector<TRuntimeNode> items;
  2829. std::vector<TRuntimeNode> newItems;
  2830. for (ui32 i = 0; i < type->GetAlternativesCount(); ++i) {
  2831. const auto itemType = type->GetAlternativeType(i);
  2832. const auto itemArg = Arg(itemType);
  2833. const auto res = handler(i, itemArg);
  2834. items.emplace_back(itemArg);
  2835. newItems.emplace_back(res);
  2836. }
  2837. bool hasOptional;
  2838. const auto firstUnpacked = UnpackOptional(newItems.front(), hasOptional);
  2839. bool allOptional = hasOptional;
  2840. for (size_t i = 1U; i < newItems.size(); ++i) {
  2841. bool isOptional;
  2842. const auto unpacked = UnpackOptional(newItems[i].GetStaticType(), isOptional);
  2843. MKQL_ENSURE(unpacked->IsSameType(*firstUnpacked), "Different return types in branches.");
  2844. hasOptional = hasOptional || isOptional;
  2845. allOptional = allOptional && isOptional;
  2846. }
  2847. if (hasOptional && !allOptional) {
  2848. for (auto& item : newItems) {
  2849. if (!item.GetStaticType()->IsOptional()) {
  2850. item = NewOptional(item);
  2851. }
  2852. }
  2853. }
  2854. TCallableBuilder callableBuilder(Env, __func__, newItems.front().GetStaticType());
  2855. callableBuilder.Add(variant);
  2856. for (ui32 i = 0; i < type->GetAlternativesCount(); ++i) {
  2857. callableBuilder.Add(items[i]);
  2858. callableBuilder.Add(newItems[i]);
  2859. }
  2860. return TRuntimeNode(callableBuilder.Build(), false);
  2861. }
  2862. TRuntimeNode TProgramBuilder::UnaryDataFunction(TRuntimeNode data, const std::string_view& callableName, ui32 flags) {
  2863. bool isOptional;
  2864. auto type = UnpackOptionalData(data, isOptional);
  2865. if (!(flags & TDataFunctionFlags::AllowOptionalArgs)) {
  2866. MKQL_ENSURE(!isOptional, "Optional data is not allowed");
  2867. }
  2868. auto schemeType = type->GetSchemeType();
  2869. if (flags & TDataFunctionFlags::RequiresBooleanArgs) {
  2870. MKQL_ENSURE(schemeType == NUdf::TDataType<bool>::Id, "Boolean data is required");
  2871. } else if (flags & TDataFunctionFlags::RequiresStringArgs) {
  2872. MKQL_ENSURE(schemeType == NUdf::TDataType<char*>::Id, "String data is required");
  2873. }
  2874. if (!schemeType) {
  2875. MKQL_ENSURE((flags & TDataFunctionFlags::AllowNull) != 0, "Null is not allowed");
  2876. }
  2877. TType* resultType;
  2878. if (flags & TDataFunctionFlags::HasBooleanResult) {
  2879. resultType = TDataType::Create(NUdf::TDataType<bool>::Id, Env);
  2880. } else if (flags & TDataFunctionFlags::HasUi32Result) {
  2881. resultType = TDataType::Create(NUdf::TDataType<ui32>::Id, Env);
  2882. } else if (flags & TDataFunctionFlags::HasStringResult) {
  2883. resultType = TDataType::Create(NUdf::TDataType<char*>::Id, Env);
  2884. } else if (flags & TDataFunctionFlags::HasOptionalResult) {
  2885. resultType = TOptionalType::Create(type, Env);
  2886. } else {
  2887. resultType = type;
  2888. }
  2889. if ((flags & TDataFunctionFlags::CommonOptionalResult) && isOptional) {
  2890. resultType = TOptionalType::Create(resultType, Env);
  2891. }
  2892. TCallableBuilder callableBuilder(Env, callableName, resultType);
  2893. callableBuilder.Add(data);
  2894. return TRuntimeNode(callableBuilder.Build(), false);
  2895. }
  2896. TRuntimeNode TProgramBuilder::ToDict(TRuntimeNode list, bool multi, const TUnaryLambda& keySelector,
  2897. const TUnaryLambda& payloadSelector, std::string_view callableName, bool isCompact, ui64 itemsCountHint)
  2898. {
  2899. bool isOptional;
  2900. const auto type = UnpackOptional(list, isOptional);
  2901. MKQL_ENSURE(type->IsList(), "Expected list.");
  2902. if (isOptional) {
  2903. return Map(list, [&](TRuntimeNode unpacked) { return ToDict(unpacked, multi, keySelector, payloadSelector, callableName, isCompact, itemsCountHint); } );
  2904. }
  2905. const auto itemType = AS_TYPE(TListType, type)->GetItemType();
  2906. ThrowIfListOfVoid(itemType);
  2907. const auto itemArg = Arg(itemType);
  2908. const auto key = keySelector(itemArg);
  2909. const auto keyType = key.GetStaticType();
  2910. auto payload = payloadSelector(itemArg);
  2911. auto payloadType = payload.GetStaticType();
  2912. if (multi) {
  2913. payloadType = TListType::Create(payloadType, Env);
  2914. }
  2915. auto dictType = TDictType::Create(keyType, payloadType, Env);
  2916. TCallableBuilder callableBuilder(Env, callableName, dictType);
  2917. callableBuilder.Add(list);
  2918. callableBuilder.Add(itemArg);
  2919. callableBuilder.Add(key);
  2920. callableBuilder.Add(payload);
  2921. callableBuilder.Add(NewDataLiteral(multi));
  2922. callableBuilder.Add(NewDataLiteral(isCompact));
  2923. callableBuilder.Add(NewDataLiteral(itemsCountHint));
  2924. return TRuntimeNode(callableBuilder.Build(), false);
  2925. }
  2926. TRuntimeNode TProgramBuilder::SqueezeToDict(TRuntimeNode stream, bool multi, const TUnaryLambda& keySelector,
  2927. const TUnaryLambda& payloadSelector, std::string_view callableName, bool isCompact, ui64 itemsCountHint)
  2928. {
  2929. if constexpr (RuntimeVersion < 21U) {
  2930. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  2931. }
  2932. const auto type = stream.GetStaticType();
  2933. MKQL_ENSURE(type->IsStream() || type->IsFlow(), "Expected stream or flow.");
  2934. const auto itemType = type->IsFlow() ? AS_TYPE(TFlowType, type)->GetItemType() : AS_TYPE(TStreamType, type)->GetItemType();
  2935. ThrowIfListOfVoid(itemType);
  2936. const auto itemArg = Arg(itemType);
  2937. const auto key = keySelector(itemArg);
  2938. const auto keyType = key.GetStaticType();
  2939. auto payload = payloadSelector(itemArg);
  2940. auto payloadType = payload.GetStaticType();
  2941. if (multi) {
  2942. payloadType = TListType::Create(payloadType, Env);
  2943. }
  2944. auto dictType = TDictType::Create(keyType, payloadType, Env);
  2945. auto returnType = type->IsFlow()
  2946. ? (TType*) TFlowType::Create(dictType, Env)
  2947. : (TType*) TStreamType::Create(dictType, Env);
  2948. TCallableBuilder callableBuilder(Env, callableName, returnType);
  2949. callableBuilder.Add(stream);
  2950. callableBuilder.Add(itemArg);
  2951. callableBuilder.Add(key);
  2952. callableBuilder.Add(payload);
  2953. callableBuilder.Add(NewDataLiteral(multi));
  2954. callableBuilder.Add(NewDataLiteral(isCompact));
  2955. callableBuilder.Add(NewDataLiteral(itemsCountHint));
  2956. return TRuntimeNode(callableBuilder.Build(), false);
  2957. }
  2958. TRuntimeNode TProgramBuilder::NarrowSqueezeToDict(TRuntimeNode flow, bool multi, const TNarrowLambda& keySelector,
  2959. const TNarrowLambda& payloadSelector, std::string_view callableName, bool isCompact, ui64 itemsCountHint)
  2960. {
  2961. if constexpr (RuntimeVersion < 23U) {
  2962. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  2963. }
  2964. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  2965. TRuntimeNode::TList itemArgs;
  2966. itemArgs.reserve(wideComponents.size());
  2967. auto i = 0U;
  2968. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  2969. const auto key = keySelector(itemArgs);
  2970. const auto keyType = key.GetStaticType();
  2971. auto payload = payloadSelector(itemArgs);
  2972. auto payloadType = payload.GetStaticType();
  2973. if (multi) {
  2974. payloadType = TListType::Create(payloadType, Env);
  2975. }
  2976. const auto dictType = TDictType::Create(keyType, payloadType, Env);
  2977. const auto returnType = TFlowType::Create(dictType, Env);
  2978. TCallableBuilder callableBuilder(Env, callableName, returnType);
  2979. callableBuilder.Add(flow);
  2980. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  2981. callableBuilder.Add(key);
  2982. callableBuilder.Add(payload);
  2983. callableBuilder.Add(NewDataLiteral(multi));
  2984. callableBuilder.Add(NewDataLiteral(isCompact));
  2985. callableBuilder.Add(NewDataLiteral(itemsCountHint));
  2986. return TRuntimeNode(callableBuilder.Build(), false);
  2987. }
  2988. void TProgramBuilder::ThrowIfListOfVoid(TType* type) {
  2989. MKQL_ENSURE(!VoidWithEffects || !type->IsVoid(), "List of void is forbidden for current function");
  2990. }
  2991. TRuntimeNode TProgramBuilder::BuildFlatMap(const std::string_view& callableName, TRuntimeNode list, const TUnaryLambda& handler)
  2992. {
  2993. const auto listType = list.GetStaticType();
  2994. MKQL_ENSURE(listType->IsFlow() || listType->IsList() || listType->IsOptional() || listType->IsStream(), "Expected flow, list, stream or optional");
  2995. if (listType->IsOptional()) {
  2996. const auto itemArg = Arg(AS_TYPE(TOptionalType, listType)->GetItemType());
  2997. const auto newList = handler(itemArg);
  2998. const auto type = newList.GetStaticType();
  2999. MKQL_ENSURE(type->IsList() || type->IsOptional() || type->IsStream() || type->IsFlow(), "Expected flow, list, stream or optional");
  3000. return IfPresent(list, [&](TRuntimeNode item) {
  3001. return handler(item);
  3002. }, type->IsOptional() ? NewEmptyOptional(type) : type->IsList() ? NewEmptyList(AS_TYPE(TListType, type)->GetItemType()) : EmptyIterator(type));
  3003. }
  3004. const auto itemType = listType->IsFlow() ?
  3005. AS_TYPE(TFlowType, listType)->GetItemType():
  3006. listType->IsList() ?
  3007. AS_TYPE(TListType, listType)->GetItemType():
  3008. AS_TYPE(TStreamType, listType)->GetItemType();
  3009. ThrowIfListOfVoid(itemType);
  3010. const auto itemArg = Arg(itemType);
  3011. const auto newList = handler(itemArg);
  3012. const auto type = newList.GetStaticType();
  3013. TType* retItemType = nullptr;
  3014. if (type->IsOptional()) {
  3015. retItemType = AS_TYPE(TOptionalType, type)->GetItemType();
  3016. } else if (type->IsFlow()) {
  3017. retItemType = AS_TYPE(TFlowType, type)->GetItemType();
  3018. } else if (type->IsList()) {
  3019. retItemType = AS_TYPE(TListType, type)->GetItemType();
  3020. } else if (type->IsStream()) {
  3021. retItemType = AS_TYPE(TStreamType, type)->GetItemType();
  3022. } else {
  3023. THROW yexception() << "Expected flow, list or stream.";
  3024. }
  3025. const auto resultListType = listType->IsFlow() || type->IsFlow() ?
  3026. TFlowType::Create(retItemType, Env):
  3027. listType->IsList() ?
  3028. (TType*)TListType::Create(retItemType, Env):
  3029. (TType*)TStreamType::Create(retItemType, Env);
  3030. TCallableBuilder callableBuilder(Env, callableName, resultListType);
  3031. callableBuilder.Add(list);
  3032. callableBuilder.Add(itemArg);
  3033. callableBuilder.Add(newList);
  3034. return TRuntimeNode(callableBuilder.Build(), false);
  3035. }
  3036. TRuntimeNode TProgramBuilder::MultiMap(TRuntimeNode list, const TExpandLambda& handler)
  3037. {
  3038. if constexpr (RuntimeVersion < 16U) {
  3039. const auto single = [=](TRuntimeNode item) -> TRuntimeNode {
  3040. const auto newList = handler(item);
  3041. const auto retItemType = newList.front().GetStaticType();
  3042. MKQL_ENSURE(retItemType->IsSameType(*newList.back().GetStaticType()), "Must be same type.");
  3043. return NewList(retItemType, newList);
  3044. };
  3045. return OrderedFlatMap(list, single);
  3046. }
  3047. const auto listType = list.GetStaticType();
  3048. MKQL_ENSURE(listType->IsFlow() || listType->IsList(), "Expected flow, list, stream or optional");
  3049. const auto itemType = listType->IsFlow() ? AS_TYPE(TFlowType, listType)->GetItemType() : AS_TYPE(TListType, listType)->GetItemType();
  3050. const auto itemArg = Arg(itemType);
  3051. const auto newList = handler(itemArg);
  3052. MKQL_ENSURE(newList.size() > 1U, "Expected many items.");
  3053. const auto retItemType = newList.front().GetStaticType();
  3054. MKQL_ENSURE(retItemType->IsSameType(*newList.back().GetStaticType()), "Must be same type.");
  3055. const auto resultListType = listType->IsFlow() ?
  3056. (TType*)TFlowType::Create(retItemType, Env) : (TType*)TListType::Create(retItemType, Env);
  3057. TCallableBuilder callableBuilder(Env, __func__, resultListType);
  3058. callableBuilder.Add(list);
  3059. callableBuilder.Add(itemArg);
  3060. std::for_each(newList.cbegin(), newList.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3061. return TRuntimeNode(callableBuilder.Build(), false);
  3062. }
  3063. TRuntimeNode TProgramBuilder::NarrowMultiMap(TRuntimeNode flow, const TWideLambda& handler) {
  3064. if constexpr (RuntimeVersion < 18U) {
  3065. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3066. }
  3067. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3068. TRuntimeNode::TList itemArgs;
  3069. itemArgs.reserve(wideComponents.size());
  3070. auto i = 0U;
  3071. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3072. const auto newList = handler(itemArgs);
  3073. MKQL_ENSURE(newList.size() > 1U, "Expected many items.");
  3074. const auto retItemType = newList.front().GetStaticType();
  3075. MKQL_ENSURE(retItemType->IsSameType(*newList.back().GetStaticType()), "Must be same type.");
  3076. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(newList.front().GetStaticType()));
  3077. callableBuilder.Add(flow);
  3078. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3079. std::for_each(newList.cbegin(), newList.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3080. return TRuntimeNode(callableBuilder.Build(), false);
  3081. }
  3082. TRuntimeNode TProgramBuilder::ExpandMap(TRuntimeNode flow, const TExpandLambda& handler) {
  3083. if constexpr (RuntimeVersion < 18U) {
  3084. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3085. }
  3086. const auto itemType = AS_TYPE(TFlowType, flow.GetStaticType())->GetItemType();
  3087. const auto itemArg = Arg(itemType);
  3088. const auto newItems = handler(itemArg);
  3089. std::vector<TType*> tupleItems;
  3090. tupleItems.reserve(newItems.size());
  3091. std::transform(newItems.cbegin(), newItems.cend(), std::back_inserter(tupleItems), std::bind(&TRuntimeNode::GetStaticType, std::placeholders::_1));
  3092. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewMultiType(tupleItems)));
  3093. callableBuilder.Add(flow);
  3094. callableBuilder.Add(itemArg);
  3095. std::for_each(newItems.cbegin(), newItems.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3096. return TRuntimeNode(callableBuilder.Build(), false);
  3097. }
  3098. TRuntimeNode TProgramBuilder::WideMap(TRuntimeNode flow, const TWideLambda& handler) {
  3099. if constexpr (RuntimeVersion < 18U) {
  3100. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3101. }
  3102. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3103. TRuntimeNode::TList itemArgs;
  3104. itemArgs.reserve(wideComponents.size());
  3105. auto i = 0U;
  3106. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3107. const auto newItems = handler(itemArgs);
  3108. std::vector<TType*> tupleItems;
  3109. tupleItems.reserve(newItems.size());
  3110. std::transform(newItems.cbegin(), newItems.cend(), std::back_inserter(tupleItems), std::bind(&TRuntimeNode::GetStaticType, std::placeholders::_1));
  3111. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewMultiType(tupleItems)));
  3112. callableBuilder.Add(flow);
  3113. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3114. std::for_each(newItems.cbegin(), newItems.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3115. return TRuntimeNode(callableBuilder.Build(), false);
  3116. }
  3117. TRuntimeNode TProgramBuilder::WideChain1Map(TRuntimeNode flow, const TWideLambda& init, const TBinaryWideLambda& update) {
  3118. if constexpr (RuntimeVersion < 23U) {
  3119. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3120. }
  3121. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3122. TRuntimeNode::TList inputArgs;
  3123. inputArgs.reserve(wideComponents.size());
  3124. auto i = 0U;
  3125. std::generate_n(std::back_inserter(inputArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3126. const auto initItems = init(inputArgs);
  3127. std::vector<TType*> tupleItems;
  3128. tupleItems.reserve(initItems.size());
  3129. std::transform(initItems.cbegin(), initItems.cend(), std::back_inserter(tupleItems), std::bind(&TRuntimeNode::GetStaticType, std::placeholders::_1));
  3130. TRuntimeNode::TList outputArgs;
  3131. outputArgs.reserve(tupleItems.size());
  3132. std::transform(tupleItems.cbegin(), tupleItems.cend(), std::back_inserter(outputArgs), std::bind(&TProgramBuilder::Arg, this, std::placeholders::_1));
  3133. const auto updateItems = update(inputArgs, outputArgs);
  3134. MKQL_ENSURE(initItems.size() == updateItems.size(), "Expected same width.");
  3135. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewMultiType(tupleItems)));
  3136. callableBuilder.Add(flow);
  3137. std::for_each(inputArgs.cbegin(), inputArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3138. std::for_each(initItems.cbegin(), initItems.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3139. std::for_each(outputArgs.cbegin(), outputArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3140. std::for_each(updateItems.cbegin(), updateItems.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3141. return TRuntimeNode(callableBuilder.Build(), false);
  3142. }
  3143. TRuntimeNode TProgramBuilder::NarrowMap(TRuntimeNode flow, const TNarrowLambda& handler) {
  3144. if constexpr (RuntimeVersion < 18U) {
  3145. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3146. }
  3147. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3148. TRuntimeNode::TList itemArgs;
  3149. itemArgs.reserve(wideComponents.size());
  3150. auto i = 0U;
  3151. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3152. const auto newItem = handler(itemArgs);
  3153. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(newItem.GetStaticType()));
  3154. callableBuilder.Add(flow);
  3155. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3156. callableBuilder.Add(newItem);
  3157. return TRuntimeNode(callableBuilder.Build(), false);
  3158. }
  3159. TRuntimeNode TProgramBuilder::NarrowFlatMap(TRuntimeNode flow, const TNarrowLambda& handler) {
  3160. if constexpr (RuntimeVersion < 18U) {
  3161. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3162. }
  3163. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3164. TRuntimeNode::TList itemArgs;
  3165. itemArgs.reserve(wideComponents.size());
  3166. auto i = 0U;
  3167. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3168. const auto newList = handler(itemArgs);
  3169. const auto type = newList.GetStaticType();
  3170. TType* retItemType = nullptr;
  3171. if (type->IsOptional()) {
  3172. retItemType = AS_TYPE(TOptionalType, type)->GetItemType();
  3173. } else if (type->IsFlow()) {
  3174. retItemType = AS_TYPE(TFlowType, type)->GetItemType();
  3175. } else if (type->IsList()) {
  3176. retItemType = AS_TYPE(TListType, type)->GetItemType();
  3177. } else if (type->IsStream()) {
  3178. retItemType = AS_TYPE(TStreamType, type)->GetItemType();
  3179. } else {
  3180. THROW yexception() << "Expected flow, list or stream.";
  3181. }
  3182. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(retItemType));
  3183. callableBuilder.Add(flow);
  3184. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3185. callableBuilder.Add(newList);
  3186. return TRuntimeNode(callableBuilder.Build(), false);
  3187. }
  3188. TRuntimeNode TProgramBuilder::BuildWideFilter(const std::string_view& callableName, TRuntimeNode flow, const TNarrowLambda& handler) {
  3189. if constexpr (RuntimeVersion < 18U) {
  3190. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3191. }
  3192. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3193. TRuntimeNode::TList itemArgs;
  3194. itemArgs.reserve(wideComponents.size());
  3195. auto i = 0U;
  3196. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3197. const auto predicate = handler(itemArgs);
  3198. TCallableBuilder callableBuilder(Env, callableName, flow.GetStaticType());
  3199. callableBuilder.Add(flow);
  3200. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3201. callableBuilder.Add(predicate);
  3202. return TRuntimeNode(callableBuilder.Build(), false);
  3203. }
  3204. TRuntimeNode TProgramBuilder::WideFilter(TRuntimeNode flow, const TNarrowLambda& handler) {
  3205. return BuildWideFilter(__func__, flow, handler);
  3206. }
  3207. TRuntimeNode TProgramBuilder::WideTakeWhile(TRuntimeNode flow, const TNarrowLambda& handler) {
  3208. return BuildWideFilter(__func__, flow, handler);
  3209. }
  3210. TRuntimeNode TProgramBuilder::WideSkipWhile(TRuntimeNode flow, const TNarrowLambda& handler) {
  3211. return BuildWideFilter(__func__, flow, handler);
  3212. }
  3213. TRuntimeNode TProgramBuilder::WideTakeWhileInclusive(TRuntimeNode flow, const TNarrowLambda& handler) {
  3214. return BuildWideFilter(__func__, flow, handler);
  3215. }
  3216. TRuntimeNode TProgramBuilder::WideSkipWhileInclusive(TRuntimeNode flow, const TNarrowLambda& handler) {
  3217. return BuildWideFilter(__func__, flow, handler);
  3218. }
  3219. TRuntimeNode TProgramBuilder::WideFilter(TRuntimeNode flow, TRuntimeNode limit, const TNarrowLambda& handler) {
  3220. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3221. TRuntimeNode::TList itemArgs;
  3222. itemArgs.reserve(wideComponents.size());
  3223. auto i = 0U;
  3224. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3225. const auto predicate = handler(itemArgs);
  3226. TCallableBuilder callableBuilder(Env, __func__, flow.GetStaticType());
  3227. callableBuilder.Add(flow);
  3228. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3229. callableBuilder.Add(predicate);
  3230. callableBuilder.Add(limit);
  3231. return TRuntimeNode(callableBuilder.Build(), false);
  3232. }
  3233. TRuntimeNode TProgramBuilder::BuildFilter(const std::string_view& callableName, TRuntimeNode list, const TUnaryLambda& handler, TType* resultType)
  3234. {
  3235. const auto listType = list.GetStaticType();
  3236. MKQL_ENSURE(listType->IsFlow() || listType->IsList() || listType->IsStream(), "Expected flow, list or stream.");
  3237. const auto outputType = resultType ? resultType : listType;
  3238. const auto itemType = listType->IsFlow() ?
  3239. AS_TYPE(TFlowType, listType)->GetItemType():
  3240. listType->IsList() ?
  3241. AS_TYPE(TListType, listType)->GetItemType():
  3242. AS_TYPE(TStreamType, listType)->GetItemType();
  3243. ThrowIfListOfVoid(itemType);
  3244. const auto itemArg = Arg(itemType);
  3245. const auto predicate = handler(itemArg);
  3246. MKQL_ENSURE(predicate.GetStaticType()->IsData(), "Expected boolean data");
  3247. const auto& detailedPredicateType = static_cast<const TDataType&>(*predicate.GetStaticType());
  3248. MKQL_ENSURE(detailedPredicateType.GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected boolean data");
  3249. TCallableBuilder callableBuilder(Env, callableName, outputType);
  3250. callableBuilder.Add(list);
  3251. callableBuilder.Add(itemArg);
  3252. callableBuilder.Add(predicate);
  3253. return TRuntimeNode(callableBuilder.Build(), false);
  3254. }
  3255. TRuntimeNode TProgramBuilder::BuildFilter(const std::string_view& callableName, TRuntimeNode list, TRuntimeNode limit, const TUnaryLambda& handler, TType* resultType)
  3256. {
  3257. if constexpr (RuntimeVersion < 4U) {
  3258. return Take(BuildFilter(callableName, list, handler, resultType), limit);
  3259. }
  3260. const auto listType = list.GetStaticType();
  3261. MKQL_ENSURE(listType->IsFlow() || listType->IsList() || listType->IsStream(), "Expected flow, list or stream.");
  3262. MKQL_ENSURE(limit.GetStaticType()->IsData(), "Expected data");
  3263. const auto outputType = resultType ? resultType : listType;
  3264. const auto itemType = listType->IsFlow() ?
  3265. AS_TYPE(TFlowType, listType)->GetItemType():
  3266. listType->IsList() ?
  3267. AS_TYPE(TListType, listType)->GetItemType():
  3268. AS_TYPE(TStreamType, listType)->GetItemType();
  3269. ThrowIfListOfVoid(itemType);
  3270. const auto itemArg = Arg(itemType);
  3271. const auto predicate = handler(itemArg);
  3272. MKQL_ENSURE(predicate.GetStaticType()->IsData(), "Expected boolean data");
  3273. const auto& detailedPredicateType = static_cast<const TDataType&>(*predicate.GetStaticType());
  3274. MKQL_ENSURE(detailedPredicateType.GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected boolean data");
  3275. TCallableBuilder callableBuilder(Env, callableName, outputType);
  3276. callableBuilder.Add(list);
  3277. callableBuilder.Add(limit);
  3278. callableBuilder.Add(itemArg);
  3279. callableBuilder.Add(predicate);
  3280. return TRuntimeNode(callableBuilder.Build(), false);
  3281. }
  3282. TRuntimeNode TProgramBuilder::Filter(TRuntimeNode list, const TUnaryLambda& handler, TType* resultType)
  3283. {
  3284. const auto type = list.GetStaticType();
  3285. if (type->IsOptional()) {
  3286. return
  3287. IfPresent(list,
  3288. [&](TRuntimeNode item) {
  3289. return If(handler(item), item, NewEmptyOptional(resultType), resultType);
  3290. },
  3291. NewEmptyOptional(resultType)
  3292. );
  3293. }
  3294. return BuildFilter(__func__, list, handler, resultType);
  3295. }
  3296. TRuntimeNode TProgramBuilder::BuildHeap(const std::string_view& callableName, TRuntimeNode list, const TBinaryLambda& comparator) {
  3297. const auto listType = list.GetStaticType();
  3298. MKQL_ENSURE(listType->IsList(), "Expected list.");
  3299. const auto itemType = AS_TYPE(TListType, listType)->GetItemType();
  3300. const auto leftArg = Arg(itemType);
  3301. const auto rightArg = Arg(itemType);
  3302. const auto predicate = comparator(leftArg, rightArg);
  3303. TCallableBuilder callableBuilder(Env, callableName, listType);
  3304. callableBuilder.Add(list);
  3305. callableBuilder.Add(leftArg);
  3306. callableBuilder.Add(rightArg);
  3307. callableBuilder.Add(predicate);
  3308. return TRuntimeNode(callableBuilder.Build(), false);
  3309. }
  3310. TRuntimeNode TProgramBuilder::BuildNth(const std::string_view& callableName, TRuntimeNode list, TRuntimeNode n, const TBinaryLambda& comparator) {
  3311. const auto listType = list.GetStaticType();
  3312. MKQL_ENSURE(listType->IsList(), "Expected list.");
  3313. const auto itemType = AS_TYPE(TListType, listType)->GetItemType();
  3314. MKQL_ENSURE(n.GetStaticType()->IsData(), "Expected data");
  3315. MKQL_ENSURE(static_cast<const TDataType&>(*n.GetStaticType()).GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64");
  3316. const auto leftArg = Arg(itemType);
  3317. const auto rightArg = Arg(itemType);
  3318. const auto predicate = comparator(leftArg, rightArg);
  3319. TCallableBuilder callableBuilder(Env, callableName, listType);
  3320. callableBuilder.Add(list);
  3321. callableBuilder.Add(n);
  3322. callableBuilder.Add(leftArg);
  3323. callableBuilder.Add(rightArg);
  3324. callableBuilder.Add(predicate);
  3325. return TRuntimeNode(callableBuilder.Build(), false);
  3326. }
  3327. TRuntimeNode TProgramBuilder::MakeHeap(TRuntimeNode list, const TBinaryLambda& comparator) {
  3328. return BuildHeap(__func__, list, std::move(comparator));
  3329. }
  3330. TRuntimeNode TProgramBuilder::PushHeap(TRuntimeNode list, const TBinaryLambda& comparator) {
  3331. return BuildHeap(__func__, list, std::move(comparator));
  3332. }
  3333. TRuntimeNode TProgramBuilder::PopHeap(TRuntimeNode list, const TBinaryLambda& comparator) {
  3334. return BuildHeap(__func__, list, std::move(comparator));
  3335. }
  3336. TRuntimeNode TProgramBuilder::SortHeap(TRuntimeNode list, const TBinaryLambda& comparator) {
  3337. return BuildHeap(__func__, list, std::move(comparator));
  3338. }
  3339. TRuntimeNode TProgramBuilder::StableSort(TRuntimeNode list, const TBinaryLambda& comparator) {
  3340. return BuildHeap(__func__, list, std::move(comparator));
  3341. }
  3342. TRuntimeNode TProgramBuilder::NthElement(TRuntimeNode list, TRuntimeNode n, const TBinaryLambda& comparator) {
  3343. return BuildNth(__func__, list, n, std::move(comparator));
  3344. }
  3345. TRuntimeNode TProgramBuilder::PartialSort(TRuntimeNode list, TRuntimeNode n, const TBinaryLambda& comparator) {
  3346. return BuildNth(__func__, list, n, std::move(comparator));
  3347. }
  3348. TRuntimeNode TProgramBuilder::BuildMap(const std::string_view& callableName, TRuntimeNode list, const TUnaryLambda& handler)
  3349. {
  3350. const auto listType = list.GetStaticType();
  3351. MKQL_ENSURE(listType->IsFlow() || listType->IsList() || listType->IsStream() || listType->IsOptional(), "Expected flow, list, stream or optional");
  3352. if (listType->IsOptional()) {
  3353. const auto itemArg = Arg(AS_TYPE(TOptionalType, listType)->GetItemType());
  3354. const auto newItem = handler(itemArg);
  3355. return IfPresent(list,
  3356. [&](TRuntimeNode item) { return NewOptional(handler(item)); },
  3357. NewEmptyOptional(NewOptionalType(newItem.GetStaticType()))
  3358. );
  3359. }
  3360. const auto itemType = listType->IsFlow() ?
  3361. AS_TYPE(TFlowType, listType)->GetItemType():
  3362. listType->IsList() ?
  3363. AS_TYPE(TListType, listType)->GetItemType():
  3364. AS_TYPE(TStreamType, listType)->GetItemType();
  3365. ThrowIfListOfVoid(itemType);
  3366. const auto itemArg = Arg(itemType);
  3367. const auto newItem = handler(itemArg);
  3368. const auto resultListType = listType->IsFlow() ?
  3369. (TType*)TFlowType::Create(newItem.GetStaticType(), Env):
  3370. listType->IsList() ?
  3371. (TType*)TListType::Create(newItem.GetStaticType(), Env):
  3372. (TType*)TStreamType::Create(newItem.GetStaticType(), Env);
  3373. TCallableBuilder callableBuilder(Env, callableName, resultListType);
  3374. callableBuilder.Add(list);
  3375. callableBuilder.Add(itemArg);
  3376. callableBuilder.Add(newItem);
  3377. return TRuntimeNode(callableBuilder.Build(), false);
  3378. }
  3379. TRuntimeNode TProgramBuilder::Invoke(const std::string_view& funcName, TType* resultType, const TArrayRef<const TRuntimeNode>& args) {
  3380. MKQL_ENSURE(args.size() >= 1U && args.size() <= 3U, "Expected from one to three arguments.");
  3381. std::array<TArgType, 4U> argTypes;
  3382. argTypes.front().first = UnpackOptionalData(resultType, argTypes.front().second)->GetSchemeType();
  3383. auto i = 0U;
  3384. for (const auto& arg : args) {
  3385. ++i;
  3386. argTypes[i].first = UnpackOptionalData(arg, argTypes[i].second)->GetSchemeType();
  3387. }
  3388. FunctionRegistry.GetBuiltins()->GetBuiltin(funcName, argTypes.data(), 1U + args.size());
  3389. TCallableBuilder callableBuilder(Env, __func__, resultType);
  3390. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(funcName));
  3391. for (const auto& arg : args) {
  3392. callableBuilder.Add(arg);
  3393. }
  3394. return TRuntimeNode(callableBuilder.Build(), false);
  3395. }
  3396. TRuntimeNode TProgramBuilder::Udf(
  3397. const std::string_view& funcName,
  3398. TRuntimeNode runConfig,
  3399. TType* userType,
  3400. const std::string_view& typeConfig
  3401. )
  3402. {
  3403. TRuntimeNode userTypeNode = userType ? TRuntimeNode(userType, true) : TRuntimeNode(Env.GetVoidLazy()->GetType(), true);
  3404. const ui32 flags = NUdf::IUdfModule::TFlags::TypesOnly;
  3405. if (!TypeInfoHelper) {
  3406. TypeInfoHelper = new TTypeInfoHelper();
  3407. }
  3408. TFunctionTypeInfo funcInfo;
  3409. TStatus status = FunctionRegistry.FindFunctionTypeInfo(
  3410. Env, TypeInfoHelper, nullptr, funcName, userType, typeConfig, flags, {}, nullptr, &funcInfo);
  3411. MKQL_ENSURE(status.IsOk(), status.GetError());
  3412. auto runConfigType = funcInfo.RunConfigType;
  3413. if (runConfig) {
  3414. bool typesMatch = runConfigType->IsSameType(*runConfig.GetStaticType());
  3415. MKQL_ENSURE(typesMatch, "RunConfig type mismatch");
  3416. } else {
  3417. MKQL_ENSURE(runConfigType->IsVoid() || runConfigType->IsOptional(), "RunConfig must be void or optional");
  3418. if (runConfigType->IsVoid()) {
  3419. runConfig = NewVoid();
  3420. } else {
  3421. runConfig = NewEmptyOptional(const_cast<TType*>(runConfigType));
  3422. }
  3423. }
  3424. auto funNameNode = NewDataLiteral<NUdf::EDataSlot::String>(funcName);
  3425. auto typeConfigNode = NewDataLiteral<NUdf::EDataSlot::String>(typeConfig);
  3426. TCallableBuilder callableBuilder(Env, __func__, funcInfo.FunctionType);
  3427. callableBuilder.Add(funNameNode);
  3428. callableBuilder.Add(userTypeNode);
  3429. callableBuilder.Add(typeConfigNode);
  3430. callableBuilder.Add(runConfig);
  3431. return TRuntimeNode(callableBuilder.Build(), false);
  3432. }
  3433. TRuntimeNode TProgramBuilder::TypedUdf(
  3434. const std::string_view& funcName,
  3435. TType* funcType,
  3436. TRuntimeNode runConfig,
  3437. TType* userType,
  3438. const std::string_view& typeConfig,
  3439. const std::string_view& file,
  3440. ui32 row,
  3441. ui32 column)
  3442. {
  3443. auto funNameNode = NewDataLiteral<NUdf::EDataSlot::String>(funcName);
  3444. auto typeConfigNode = NewDataLiteral<NUdf::EDataSlot::String>(typeConfig);
  3445. TRuntimeNode userTypeNode = userType ? TRuntimeNode(userType, true) : TRuntimeNode(Env.GetVoidLazy(), true);
  3446. TCallableBuilder callableBuilder(Env, "Udf", funcType);
  3447. callableBuilder.Add(funNameNode);
  3448. callableBuilder.Add(userTypeNode);
  3449. callableBuilder.Add(typeConfigNode);
  3450. callableBuilder.Add(runConfig);
  3451. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(file));
  3452. callableBuilder.Add(NewDataLiteral(row));
  3453. callableBuilder.Add(NewDataLiteral(column));
  3454. return TRuntimeNode(callableBuilder.Build(), false);
  3455. }
  3456. TRuntimeNode TProgramBuilder::ScriptUdf(
  3457. const std::string_view& moduleName,
  3458. const std::string_view& funcName,
  3459. TType* funcType,
  3460. TRuntimeNode script,
  3461. const std::string_view& file,
  3462. ui32 row,
  3463. ui32 column)
  3464. {
  3465. MKQL_ENSURE(funcType, "UDF callable type must not be empty");
  3466. MKQL_ENSURE(funcType->IsCallable(), "type must be callable");
  3467. auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(moduleName);
  3468. MKQL_ENSURE(scriptType != EScriptType::Unknown, "unknown script type '" << moduleName << "'");
  3469. EnsureScriptSpecificTypes(scriptType, static_cast<TCallableType*>(funcType), Env);
  3470. auto scriptTypeStr = IsCustomPython(scriptType) ? moduleName : ScriptTypeAsStr(CanonizeScriptType(scriptType));
  3471. TStringBuilder name;
  3472. name.reserve(scriptTypeStr.size() + funcName.size() + 1);
  3473. name << scriptTypeStr << '.' << funcName;
  3474. auto funcNameNode = NewDataLiteral<NUdf::EDataSlot::String>(name);
  3475. TRuntimeNode userTypeNode(funcType, true);
  3476. auto typeConfigNode = NewDataLiteral<NUdf::EDataSlot::String>("");
  3477. TCallableBuilder callableBuilder(Env, __func__, funcType);
  3478. callableBuilder.Add(funcNameNode);
  3479. callableBuilder.Add(userTypeNode);
  3480. callableBuilder.Add(typeConfigNode);
  3481. callableBuilder.Add(script);
  3482. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(file));
  3483. callableBuilder.Add(NewDataLiteral(row));
  3484. callableBuilder.Add(NewDataLiteral(column));
  3485. return TRuntimeNode(callableBuilder.Build(), false);
  3486. }
  3487. TRuntimeNode TProgramBuilder::Apply(TRuntimeNode callableNode, const TArrayRef<const TRuntimeNode>& args,
  3488. const std::string_view& file, ui32 row, ui32 column, ui32 dependentCount) {
  3489. MKQL_ENSURE(dependentCount <= args.size(), "Too many dependent nodes");
  3490. ui32 usedArgs = args.size() - dependentCount;
  3491. MKQL_ENSURE(!callableNode.IsImmediate() && callableNode.GetNode()->GetType()->IsCallable(),
  3492. "Expected callable");
  3493. auto callable = static_cast<TCallable*>(callableNode.GetNode());
  3494. TType* returnType = callable->GetType()->GetReturnType();
  3495. MKQL_ENSURE(returnType->IsCallable(), "Expected callable as return type");
  3496. auto callableType = static_cast<TCallableType*>(returnType);
  3497. MKQL_ENSURE(usedArgs <= callableType->GetArgumentsCount(), "Too many arguments");
  3498. MKQL_ENSURE(usedArgs >= callableType->GetArgumentsCount() - callableType->GetOptionalArgumentsCount(), "Too few arguments");
  3499. for (ui32 i = 0; i < usedArgs; i++) {
  3500. TType* argType = callableType->GetArgumentType(i);
  3501. TRuntimeNode arg = args[i];
  3502. MKQL_ENSURE(arg.GetStaticType()->IsConvertableTo(*argType),
  3503. "Argument type mismatch for argument " << i << ": runtime " << argType->GetKindAsStr()
  3504. << " with static " << arg.GetStaticType()->GetKindAsStr());
  3505. }
  3506. TCallableBuilder callableBuilder(Env, RuntimeVersion >= 8 ? "Apply2" : "Apply", callableType->GetReturnType());
  3507. callableBuilder.Add(callableNode);
  3508. callableBuilder.Add(NewDataLiteral<ui32>(dependentCount));
  3509. if constexpr (RuntimeVersion >= 8) {
  3510. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(file));
  3511. callableBuilder.Add(NewDataLiteral(row));
  3512. callableBuilder.Add(NewDataLiteral(column));
  3513. }
  3514. for (const auto& arg: args) {
  3515. callableBuilder.Add(arg);
  3516. }
  3517. return TRuntimeNode(callableBuilder.Build(), false);
  3518. }
  3519. TRuntimeNode TProgramBuilder::Apply(
  3520. TRuntimeNode callableNode,
  3521. const TArrayRef<const TRuntimeNode>& args,
  3522. ui32 dependentCount) {
  3523. return Apply(callableNode, args, {}, 0, 0, dependentCount);
  3524. }
  3525. TRuntimeNode TProgramBuilder::Callable(TType* callableType, const TArrayLambda& handler) {
  3526. auto castedCallableType = AS_TYPE(TCallableType, callableType);
  3527. std::vector<TRuntimeNode> args;
  3528. args.reserve(castedCallableType->GetArgumentsCount());
  3529. for (ui32 i = 0; i < castedCallableType->GetArgumentsCount(); ++i) {
  3530. args.push_back(Arg(castedCallableType->GetArgumentType(i)));
  3531. }
  3532. auto res = handler(args);
  3533. TCallableBuilder callableBuilder(Env, __func__, callableType);
  3534. for (ui32 i = 0; i < castedCallableType->GetArgumentsCount(); ++i) {
  3535. callableBuilder.Add(args[i]);
  3536. }
  3537. callableBuilder.Add(res);
  3538. return TRuntimeNode(callableBuilder.Build(), false);
  3539. }
  3540. TRuntimeNode TProgramBuilder::NewNull() {
  3541. if (!UseNullType || RuntimeVersion < 11) {
  3542. TCallableBuilder callableBuilder(Env, "Null", NewOptionalType(Env.GetVoidLazy()->GetType()));
  3543. return TRuntimeNode(callableBuilder.Build(), false);
  3544. } else {
  3545. return TRuntimeNode(Env.GetNullLazy(), true);
  3546. }
  3547. }
  3548. TRuntimeNode TProgramBuilder::Concat(TRuntimeNode data1, TRuntimeNode data2) {
  3549. bool isOpt1, isOpt2;
  3550. const auto type1 = UnpackOptionalData(data1, isOpt1)->GetSchemeType();
  3551. const auto type2 = UnpackOptionalData(data2, isOpt2)->GetSchemeType();
  3552. const auto resultType = NewDataType(type1 == type2 ? type1 : NUdf::TDataType<char*>::Id);
  3553. return InvokeBinary(__func__, isOpt1 || isOpt2 ? NewOptionalType(resultType) : resultType, data1, data2);
  3554. }
  3555. TRuntimeNode TProgramBuilder::AggrConcat(TRuntimeNode data1, TRuntimeNode data2) {
  3556. MKQL_ENSURE(data1.GetStaticType()->IsSameType(*data2.GetStaticType()), "Operands type mismatch.");
  3557. const std::array<TRuntimeNode, 2> args = {{ data1, data2 }};
  3558. return Invoke(__func__, data1.GetStaticType(), args);
  3559. }
  3560. TRuntimeNode TProgramBuilder::Substring(TRuntimeNode data, TRuntimeNode start, TRuntimeNode count) {
  3561. const std::array<TRuntimeNode, 3U> args = {{ data, start, count }};
  3562. return Invoke(__func__, data.GetStaticType(), args);
  3563. }
  3564. TRuntimeNode TProgramBuilder::Find(TRuntimeNode haystack, TRuntimeNode needle, TRuntimeNode pos) {
  3565. const std::array<TRuntimeNode, 3U> args = {{ haystack, needle, pos }};
  3566. return Invoke(__func__, NewOptionalType(NewDataType(NUdf::TDataType<ui32>::Id)), args);
  3567. }
  3568. TRuntimeNode TProgramBuilder::RFind(TRuntimeNode haystack, TRuntimeNode needle, TRuntimeNode pos) {
  3569. const std::array<TRuntimeNode, 3U> args = {{ haystack, needle, pos }};
  3570. return Invoke(__func__, NewOptionalType(NewDataType(NUdf::TDataType<ui32>::Id)), args);
  3571. }
  3572. TRuntimeNode TProgramBuilder::StartsWith(TRuntimeNode string, TRuntimeNode prefix) {
  3573. if constexpr (RuntimeVersion < 19U) {
  3574. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3575. }
  3576. return DataCompare(__func__, string, prefix);
  3577. }
  3578. TRuntimeNode TProgramBuilder::EndsWith(TRuntimeNode string, TRuntimeNode suffix) {
  3579. if constexpr (RuntimeVersion < 19U) {
  3580. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3581. }
  3582. return DataCompare(__func__, string, suffix);
  3583. }
  3584. TRuntimeNode TProgramBuilder::StringContains(TRuntimeNode string, TRuntimeNode pattern) {
  3585. bool isOpt1, isOpt2;
  3586. TDataType* type1 = UnpackOptionalData(string, isOpt1);
  3587. TDataType* type2 = UnpackOptionalData(pattern, isOpt2);
  3588. MKQL_ENSURE(type1->GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id ||
  3589. type1->GetSchemeType() == NUdf::TDataType<char*>::Id, "Expecting string as first argument");
  3590. MKQL_ENSURE(type2->GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id ||
  3591. type2->GetSchemeType() == NUdf::TDataType<char*>::Id, "Expecting string as second argument");
  3592. if constexpr (RuntimeVersion < 32U) {
  3593. auto stringCasted = (type1->GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id) ? ToString(string) : string;
  3594. auto patternCasted = (type2->GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id) ? ToString(pattern) : pattern;
  3595. auto found = Exists(Find(stringCasted, patternCasted, NewDataLiteral(ui32(0))));
  3596. if (!isOpt1 && !isOpt2) {
  3597. return found;
  3598. }
  3599. TVector<TRuntimeNode> predicates;
  3600. if (isOpt1) {
  3601. predicates.push_back(Exists(string));
  3602. }
  3603. if (isOpt2) {
  3604. predicates.push_back(Exists(pattern));
  3605. }
  3606. TRuntimeNode argsNotNull = (predicates.size() == 1) ? predicates.front() : And(predicates);
  3607. return If(argsNotNull, NewOptional(found), NewEmptyOptionalDataLiteral(NUdf::TDataType<bool>::Id));
  3608. }
  3609. return DataCompare(__func__, string, pattern);
  3610. }
  3611. TRuntimeNode TProgramBuilder::ByteAt(TRuntimeNode data, TRuntimeNode index) {
  3612. const std::array<TRuntimeNode, 2U> args = {{ data, index }};
  3613. return Invoke(__func__, NewOptionalType(NewDataType(NUdf::TDataType<ui8>::Id)), args);
  3614. }
  3615. TRuntimeNode TProgramBuilder::Size(TRuntimeNode data) {
  3616. return UnaryDataFunction(data, __func__, TDataFunctionFlags::HasUi32Result | TDataFunctionFlags::AllowNull | TDataFunctionFlags::AllowOptionalArgs | TDataFunctionFlags::CommonOptionalResult);
  3617. }
  3618. template <bool Utf8>
  3619. TRuntimeNode TProgramBuilder::ToString(TRuntimeNode data) {
  3620. bool isOptional;
  3621. UnpackOptionalData(data, isOptional);
  3622. const auto resultType = NewDataType(Utf8 ? NUdf::EDataSlot::Utf8 : NUdf::EDataSlot::String, isOptional);
  3623. TCallableBuilder callableBuilder(Env, __func__, resultType);
  3624. callableBuilder.Add(data);
  3625. return TRuntimeNode(callableBuilder.Build(), false);
  3626. }
  3627. TRuntimeNode TProgramBuilder::FromString(TRuntimeNode data, TType* type) {
  3628. bool isOptional;
  3629. const auto sourceType = UnpackOptionalData(data, isOptional);
  3630. const auto targetType = UnpackOptionalData(type, isOptional);
  3631. MKQL_ENSURE(sourceType->GetSchemeType() == NUdf::TDataType<char*>::Id || sourceType->GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id, "Expected String");
  3632. MKQL_ENSURE(targetType->GetSchemeType() != 0, "Null is not allowed");
  3633. TCallableBuilder callableBuilder(Env, __func__, type);
  3634. callableBuilder.Add(data);
  3635. callableBuilder.Add(NewDataLiteral(static_cast<ui32>(targetType->GetSchemeType())));
  3636. if (targetType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id) {
  3637. const auto& params = static_cast<const TDataDecimalType*>(targetType)->GetParams();
  3638. callableBuilder.Add(NewDataLiteral(params.first));
  3639. callableBuilder.Add(NewDataLiteral(params.second));
  3640. }
  3641. return TRuntimeNode(callableBuilder.Build(), false);
  3642. }
  3643. TRuntimeNode TProgramBuilder::StrictFromString(TRuntimeNode data, TType* type) {
  3644. bool isOptional;
  3645. const auto sourceType = UnpackOptionalData(data, isOptional);
  3646. const auto targetType = UnpackOptionalData(type, isOptional);
  3647. MKQL_ENSURE(sourceType->GetSchemeType() == NUdf::TDataType<char*>::Id || sourceType->GetSchemeType() == NUdf::TDataType<NUdf::TUtf8>::Id, "Expected String");
  3648. MKQL_ENSURE(targetType->GetSchemeType() != 0, "Null is not allowed");
  3649. TCallableBuilder callableBuilder(Env, __func__, type);
  3650. callableBuilder.Add(data);
  3651. callableBuilder.Add(NewDataLiteral(static_cast<ui32>(targetType->GetSchemeType())));
  3652. if (targetType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id) {
  3653. const auto& params = static_cast<const TDataDecimalType*>(targetType)->GetParams();
  3654. callableBuilder.Add(NewDataLiteral(params.first));
  3655. callableBuilder.Add(NewDataLiteral(params.second));
  3656. }
  3657. return TRuntimeNode(callableBuilder.Build(), false);
  3658. }
  3659. TRuntimeNode TProgramBuilder::ToBytes(TRuntimeNode data) {
  3660. return UnaryDataFunction(data, __func__, TDataFunctionFlags::HasStringResult | TDataFunctionFlags::AllowOptionalArgs | TDataFunctionFlags::CommonOptionalResult);
  3661. }
  3662. TRuntimeNode TProgramBuilder::FromBytes(TRuntimeNode data, TType* targetType) {
  3663. auto type = data.GetStaticType();
  3664. bool isOptional;
  3665. auto dataType = UnpackOptionalData(type, isOptional);
  3666. MKQL_ENSURE(dataType->GetSchemeType() == NUdf::TDataType<char*>::Id, "Expected String");
  3667. auto resultType = NewOptionalType(targetType);
  3668. TCallableBuilder callableBuilder(Env, __func__, resultType);
  3669. callableBuilder.Add(data);
  3670. auto targetDataType = AS_TYPE(TDataType, targetType);
  3671. callableBuilder.Add(NewDataLiteral(static_cast<ui32>(targetDataType->GetSchemeType())));
  3672. if (targetDataType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id) {
  3673. const auto& params = static_cast<const TDataDecimalType*>(targetType)->GetParams();
  3674. callableBuilder.Add(NewDataLiteral(params.first));
  3675. callableBuilder.Add(NewDataLiteral(params.second));
  3676. }
  3677. return TRuntimeNode(callableBuilder.Build(), false);
  3678. }
  3679. TRuntimeNode TProgramBuilder::InversePresortString(TRuntimeNode data) {
  3680. const std::array<TRuntimeNode, 1U> args = {{ data }};
  3681. return Invoke(__func__, NewDataType(NUdf::TDataType<char*>::Id), args);
  3682. }
  3683. TRuntimeNode TProgramBuilder::InverseString(TRuntimeNode data) {
  3684. const std::array<TRuntimeNode, 1U> args = {{ data }};
  3685. return Invoke(__func__, NewDataType(NUdf::TDataType<char*>::Id), args);
  3686. }
  3687. TRuntimeNode TProgramBuilder::Random(const TArrayRef<const TRuntimeNode>& dependentNodes) {
  3688. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<double>::Id));
  3689. for (auto& x : dependentNodes) {
  3690. callableBuilder.Add(x);
  3691. }
  3692. return TRuntimeNode(callableBuilder.Build(), false);
  3693. }
  3694. TRuntimeNode TProgramBuilder::RandomNumber(const TArrayRef<const TRuntimeNode>& dependentNodes) {
  3695. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<ui64>::Id));
  3696. for (auto& x : dependentNodes) {
  3697. callableBuilder.Add(x);
  3698. }
  3699. return TRuntimeNode(callableBuilder.Build(), false);
  3700. }
  3701. TRuntimeNode TProgramBuilder::RandomUuid(const TArrayRef<const TRuntimeNode>& dependentNodes) {
  3702. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<NUdf::TUuid>::Id));
  3703. for (auto& x : dependentNodes) {
  3704. callableBuilder.Add(x);
  3705. }
  3706. return TRuntimeNode(callableBuilder.Build(), false);
  3707. }
  3708. TRuntimeNode TProgramBuilder::Now(const TArrayRef<const TRuntimeNode>& args) {
  3709. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::TDataType<ui64>::Id));
  3710. for (const auto& x : args) {
  3711. callableBuilder.Add(x);
  3712. }
  3713. return TRuntimeNode(callableBuilder.Build(), false);
  3714. }
  3715. TRuntimeNode TProgramBuilder::CurrentUtcDate(const TArrayRef<const TRuntimeNode>& args) {
  3716. return Cast(CurrentUtcTimestamp(args), NewDataType(NUdf::TDataType<NUdf::TDate>::Id));
  3717. }
  3718. TRuntimeNode TProgramBuilder::CurrentUtcDatetime(const TArrayRef<const TRuntimeNode>& args) {
  3719. return Cast(CurrentUtcTimestamp(args), NewDataType(NUdf::TDataType<NUdf::TDatetime>::Id));
  3720. }
  3721. TRuntimeNode TProgramBuilder::CurrentUtcTimestamp(const TArrayRef<const TRuntimeNode>& args) {
  3722. return Coalesce(ToIntegral(Now(args), NewDataType(NUdf::TDataType<NUdf::TTimestamp>::Id, true)),
  3723. TRuntimeNode(BuildDataLiteral(NUdf::TUnboxedValuePod(ui64(NUdf::MAX_TIMESTAMP - 1ULL)), NUdf::TDataType<NUdf::TTimestamp>::Id, Env), true));
  3724. }
  3725. TRuntimeNode TProgramBuilder::Pickle(TRuntimeNode data) {
  3726. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::EDataSlot::String));
  3727. callableBuilder.Add(data);
  3728. return TRuntimeNode(callableBuilder.Build(), false);
  3729. }
  3730. TRuntimeNode TProgramBuilder::StablePickle(TRuntimeNode data) {
  3731. TCallableBuilder callableBuilder(Env, __func__, NewDataType(NUdf::EDataSlot::String));
  3732. callableBuilder.Add(data);
  3733. return TRuntimeNode(callableBuilder.Build(), false);
  3734. }
  3735. TRuntimeNode TProgramBuilder::Unpickle(TType* type, TRuntimeNode serialized) {
  3736. MKQL_ENSURE(AS_TYPE(TDataType, serialized)->GetSchemeType() == NUdf::TDataType<char*>::Id, "Expected String");
  3737. TCallableBuilder callableBuilder(Env, __func__, type);
  3738. callableBuilder.Add(TRuntimeNode(type, true));
  3739. callableBuilder.Add(serialized);
  3740. return TRuntimeNode(callableBuilder.Build(), false);
  3741. }
  3742. TRuntimeNode TProgramBuilder::Ascending(TRuntimeNode data) {
  3743. auto dataType = NewDataType(NUdf::EDataSlot::String);
  3744. TCallableBuilder callableBuilder(Env, __func__, dataType);
  3745. callableBuilder.Add(data);
  3746. return TRuntimeNode(callableBuilder.Build(), false);
  3747. }
  3748. TRuntimeNode TProgramBuilder::Descending(TRuntimeNode data) {
  3749. auto dataType = NewDataType(NUdf::EDataSlot::String);
  3750. TCallableBuilder callableBuilder(Env, __func__, dataType);
  3751. callableBuilder.Add(data);
  3752. return TRuntimeNode(callableBuilder.Build(), false);
  3753. }
  3754. TRuntimeNode TProgramBuilder::Convert(TRuntimeNode data, TType* type) {
  3755. if (data.GetStaticType()->IsSameType(*type)) {
  3756. return data;
  3757. }
  3758. bool isOptional;
  3759. const auto dataType = UnpackOptionalData(data, isOptional);
  3760. const std::array<TRuntimeNode, 1> args = {{ data }};
  3761. if (dataType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id) {
  3762. const auto targetSchemeType = UnpackOptionalData(type, isOptional)->GetSchemeType();
  3763. TStringStream str;
  3764. str << "To" << NUdf::GetDataTypeInfo(NUdf::GetDataSlot(targetSchemeType)).Name
  3765. << '_' << ::ToString(static_cast<const TDataDecimalType*>(dataType)->GetParams().second);
  3766. return Invoke(str.Str().c_str(), type, args);
  3767. }
  3768. return Invoke(__func__, type, args);
  3769. }
  3770. TRuntimeNode TProgramBuilder::ToDecimal(TRuntimeNode data, ui8 precision, ui8 scale) {
  3771. bool isOptional;
  3772. auto dataType = UnpackOptionalData(data, isOptional);
  3773. TType* decimal = TDataDecimalType::Create(precision, scale, Env);
  3774. if (isOptional)
  3775. decimal = TOptionalType::Create(decimal, Env);
  3776. const std::array<TRuntimeNode, 1> args = {{ data }};
  3777. if (dataType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id) {
  3778. const auto& params = static_cast<const TDataDecimalType*>(dataType)->GetParams();
  3779. if (precision - scale < params.first - params.second && scale != params.second) {
  3780. return ToDecimal(ToDecimal(data, precision - scale + params.second, params.second), precision, scale);
  3781. } else if (params.second < scale) {
  3782. return Invoke("ScaleUp_" + ::ToString(scale - params.second), decimal, args);
  3783. } else if (params.second > scale) {
  3784. return Invoke("ScaleDown_" + ::ToString(params.second - scale), decimal, args);
  3785. } else if (precision < params.first) {
  3786. return Invoke("CheckBounds_" + ::ToString(precision), decimal, args);
  3787. } else if (precision > params.first) {
  3788. return Invoke("Plus", decimal, args);
  3789. } else {
  3790. return data;
  3791. }
  3792. } else {
  3793. const auto digits = NUdf::GetDataTypeInfo(*dataType->GetDataSlot()).DecimalDigits;
  3794. MKQL_ENSURE(digits, "Can't cast into Decimal.");
  3795. if (digits <= precision && !scale)
  3796. return Invoke(__func__, decimal, args);
  3797. else
  3798. return ToDecimal(ToDecimal(data, digits, 0), precision, scale);
  3799. }
  3800. }
  3801. TRuntimeNode TProgramBuilder::ToIntegral(TRuntimeNode data, TType* type) {
  3802. bool isOptional;
  3803. auto dataType = UnpackOptionalData(data, isOptional);
  3804. if (dataType->GetSchemeType() == NUdf::TDataType<NUdf::TDecimal>::Id) {
  3805. const auto& params = static_cast<const TDataDecimalType*>(dataType)->GetParams();
  3806. if (params.second)
  3807. return ToIntegral(ToDecimal(data, params.first - params.second, 0), type);
  3808. }
  3809. const std::array<TRuntimeNode, 1> args = {{ data }};
  3810. return Invoke(__func__, type, args);
  3811. }
  3812. TRuntimeNode TProgramBuilder::ListIf(TRuntimeNode predicate, TRuntimeNode item) {
  3813. return If(predicate, NewList(item.GetStaticType(), {item}), NewEmptyList(item.GetStaticType()));
  3814. }
  3815. TRuntimeNode TProgramBuilder::AsList(TRuntimeNode item) {
  3816. TListLiteralBuilder builder(Env, item.GetStaticType());
  3817. builder.Add(item);
  3818. return TRuntimeNode(builder.Build(), true);
  3819. }
  3820. TRuntimeNode TProgramBuilder::AsList(const TArrayRef<const TRuntimeNode>& items) {
  3821. MKQL_ENSURE(!items.empty(), "required not empty list of items");
  3822. TListLiteralBuilder builder(Env, items[0].GetStaticType());
  3823. for (auto item : items) {
  3824. builder.Add(item);
  3825. }
  3826. return TRuntimeNode(builder.Build(), true);
  3827. }
  3828. TRuntimeNode TProgramBuilder::MapJoinCore(TRuntimeNode flow, TRuntimeNode dict, EJoinKind joinKind,
  3829. const TArrayRef<const ui32>& leftKeyColumns, const TArrayRef<const ui32>& leftRenames,
  3830. const TArrayRef<const ui32>& rightRenames, TType* returnType) {
  3831. MKQL_ENSURE(joinKind == EJoinKind::Inner || joinKind == EJoinKind::Left || joinKind == EJoinKind::LeftSemi || joinKind == EJoinKind::LeftOnly, "Unsupported join kind");
  3832. MKQL_ENSURE(!leftKeyColumns.empty(), "At least one key column must be specified");
  3833. MKQL_ENSURE(leftRenames.size() % 2U == 0U, "Expected even count");
  3834. MKQL_ENSURE(rightRenames.size() % 2U == 0U, "Expected even count");
  3835. TRuntimeNode::TList leftKeyColumnsNodes, leftRenamesNodes, rightRenamesNodes;
  3836. leftKeyColumnsNodes.reserve(leftKeyColumns.size());
  3837. std::transform(leftKeyColumns.cbegin(), leftKeyColumns.cend(), std::back_inserter(leftKeyColumnsNodes), [this](const ui32 idx) { return NewDataLiteral(idx); });
  3838. leftRenamesNodes.reserve(leftRenames.size());
  3839. std::transform(leftRenames.cbegin(), leftRenames.cend(), std::back_inserter(leftRenamesNodes), [this](const ui32 idx) { return NewDataLiteral(idx); });
  3840. rightRenamesNodes.reserve(rightRenames.size());
  3841. std::transform(rightRenames.cbegin(), rightRenames.cend(), std::back_inserter(rightRenamesNodes), [this](const ui32 idx) { return NewDataLiteral(idx); });
  3842. TCallableBuilder callableBuilder(Env, __func__, returnType);
  3843. callableBuilder.Add(flow);
  3844. callableBuilder.Add(dict);
  3845. callableBuilder.Add(NewDataLiteral((ui32)joinKind));
  3846. callableBuilder.Add(NewTuple(leftKeyColumnsNodes));
  3847. callableBuilder.Add(NewTuple(leftRenamesNodes));
  3848. callableBuilder.Add(NewTuple(rightRenamesNodes));
  3849. return TRuntimeNode(callableBuilder.Build(), false);
  3850. }
  3851. TRuntimeNode TProgramBuilder::CommonJoinCore(TRuntimeNode flow, EJoinKind joinKind,
  3852. const TArrayRef<const ui32>& leftColumns, const TArrayRef<const ui32>& rightColumns,
  3853. const TArrayRef<const ui32>& requiredColumns, const TArrayRef<const ui32>& keyColumns,
  3854. ui64 memLimit, std::optional<ui32> sortedTableOrder,
  3855. EAnyJoinSettings anyJoinSettings, const ui32 tableIndexField, TType* returnType) {
  3856. if constexpr (RuntimeVersion < 17U) {
  3857. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3858. }
  3859. MKQL_ENSURE(leftColumns.size() % 2U == 0U, "Expected even count");
  3860. MKQL_ENSURE(rightColumns.size() % 2U == 0U, "Expected even count");
  3861. TRuntimeNode::TList leftInputColumnsNodes, rightInputColumnsNodes, requiredColumnsNodes,
  3862. leftOutputColumnsNodes, rightOutputColumnsNodes, keyColumnsNodes;
  3863. bool s = false;
  3864. for (const auto idx : leftColumns) {
  3865. ((s = !s) ? leftInputColumnsNodes : leftOutputColumnsNodes).emplace_back(NewDataLiteral(idx));
  3866. }
  3867. for (const auto idx : rightColumns) {
  3868. ((s = !s) ? rightInputColumnsNodes : rightOutputColumnsNodes).emplace_back(NewDataLiteral(idx));
  3869. }
  3870. const std::unordered_set<ui32> requiredIndices(requiredColumns.cbegin(), requiredColumns.cend());
  3871. MKQL_ENSURE(requiredIndices.size() == requiredColumns.size(), "Duplication of requred columns.");
  3872. requiredColumnsNodes.reserve(requiredColumns.size());
  3873. std::transform(requiredColumns.cbegin(), requiredColumns.cend(), std::back_inserter(requiredColumnsNodes),
  3874. std::bind(&TProgramBuilder::NewDataLiteral<ui32>, this, std::placeholders::_1));
  3875. const std::unordered_set<ui32> keyIndices(keyColumns.cbegin(), keyColumns.cend());
  3876. MKQL_ENSURE(keyIndices.size() == keyColumns.size(), "Duplication of key columns.");
  3877. keyColumnsNodes.reserve(keyColumns.size());
  3878. std::transform(keyColumns.cbegin(), keyColumns.cend(), std::back_inserter(keyColumnsNodes),
  3879. std::bind(&TProgramBuilder::NewDataLiteral<ui32>, this, std::placeholders::_1));
  3880. TCallableBuilder callableBuilder(Env, __func__, returnType);
  3881. callableBuilder.Add(flow);
  3882. callableBuilder.Add(NewDataLiteral((ui32)joinKind));
  3883. callableBuilder.Add(NewTuple(leftInputColumnsNodes));
  3884. callableBuilder.Add(NewTuple(rightInputColumnsNodes));
  3885. callableBuilder.Add(NewTuple(requiredColumnsNodes));
  3886. callableBuilder.Add(NewTuple(leftOutputColumnsNodes));
  3887. callableBuilder.Add(NewTuple(rightOutputColumnsNodes));
  3888. callableBuilder.Add(NewTuple(keyColumnsNodes));
  3889. callableBuilder.Add(NewDataLiteral(memLimit));
  3890. callableBuilder.Add(sortedTableOrder ? NewDataLiteral(*sortedTableOrder) : NewVoid());
  3891. callableBuilder.Add(NewDataLiteral((ui32)anyJoinSettings));
  3892. callableBuilder.Add(NewDataLiteral(tableIndexField));
  3893. return TRuntimeNode(callableBuilder.Build(), false);
  3894. }
  3895. TRuntimeNode TProgramBuilder::WideCombiner(TRuntimeNode flow, i64 memLimit, const TWideLambda& extractor, const TBinaryWideLambda& init, const TTernaryWideLambda& update, const TBinaryWideLambda& finish) {
  3896. if constexpr (RuntimeVersion < 18U) {
  3897. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3898. }
  3899. if (memLimit < 0) {
  3900. if constexpr (RuntimeVersion < 46U) {
  3901. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__ << " with limit " << memLimit;
  3902. }
  3903. }
  3904. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3905. TRuntimeNode::TList itemArgs;
  3906. itemArgs.reserve(wideComponents.size());
  3907. auto i = 0U;
  3908. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3909. const auto keys = extractor(itemArgs);
  3910. TRuntimeNode::TList keyArgs;
  3911. keyArgs.reserve(keys.size());
  3912. std::transform(keys.cbegin(), keys.cend(), std::back_inserter(keyArgs), [&](TRuntimeNode key){ return Arg(key.GetStaticType()); } );
  3913. const auto first = init(keyArgs, itemArgs);
  3914. TRuntimeNode::TList stateArgs;
  3915. stateArgs.reserve(first.size());
  3916. std::transform(first.cbegin(), first.cend(), std::back_inserter(stateArgs), [&](TRuntimeNode state){ return Arg(state.GetStaticType()); } );
  3917. const auto next = update(keyArgs, itemArgs, stateArgs);
  3918. MKQL_ENSURE(next.size() == first.size(), "Mismatch init and update state size.");
  3919. TRuntimeNode::TList finishKeyArgs;
  3920. finishKeyArgs.reserve(keys.size());
  3921. std::transform(keys.cbegin(), keys.cend(), std::back_inserter(finishKeyArgs), [&](TRuntimeNode key){ return Arg(key.GetStaticType()); } );
  3922. TRuntimeNode::TList finishStateArgs;
  3923. finishStateArgs.reserve(next.size());
  3924. std::transform(next.cbegin(), next.cend(), std::back_inserter(finishStateArgs), [&](TRuntimeNode state){ return Arg(state.GetStaticType()); } );
  3925. const auto output = finish(finishKeyArgs, finishStateArgs);
  3926. std::vector<TType*> tupleItems;
  3927. tupleItems.reserve(output.size());
  3928. std::transform(output.cbegin(), output.cend(), std::back_inserter(tupleItems), std::bind(&TRuntimeNode::GetStaticType, std::placeholders::_1));
  3929. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewMultiType(tupleItems)));
  3930. callableBuilder.Add(flow);
  3931. if constexpr (RuntimeVersion < 46U)
  3932. callableBuilder.Add(NewDataLiteral(ui64(memLimit)));
  3933. else
  3934. callableBuilder.Add(NewDataLiteral(memLimit));
  3935. callableBuilder.Add(NewDataLiteral(ui32(keyArgs.size())));
  3936. callableBuilder.Add(NewDataLiteral(ui32(stateArgs.size())));
  3937. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3938. std::for_each(keys.cbegin(), keys.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3939. std::for_each(keyArgs.cbegin(), keyArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3940. std::for_each(first.cbegin(), first.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3941. std::for_each(stateArgs.cbegin(), stateArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3942. std::for_each(next.cbegin(), next.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3943. std::for_each(finishKeyArgs.cbegin(), finishKeyArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3944. std::for_each(finishStateArgs.cbegin(), finishStateArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3945. std::for_each(output.cbegin(), output.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3946. return TRuntimeNode(callableBuilder.Build(), false);
  3947. }
  3948. TRuntimeNode TProgramBuilder::WideLastCombinerCommon(const TStringBuf& funcName, TRuntimeNode flow, const TWideLambda& extractor, const TBinaryWideLambda& init, const TTernaryWideLambda& update, const TBinaryWideLambda& finish) {
  3949. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  3950. TRuntimeNode::TList itemArgs;
  3951. itemArgs.reserve(wideComponents.size());
  3952. auto i = 0U;
  3953. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  3954. const auto keys = extractor(itemArgs);
  3955. TRuntimeNode::TList keyArgs;
  3956. keyArgs.reserve(keys.size());
  3957. std::transform(keys.cbegin(), keys.cend(), std::back_inserter(keyArgs), [&](TRuntimeNode key){ return Arg(key.GetStaticType()); } );
  3958. const auto first = init(keyArgs, itemArgs);
  3959. TRuntimeNode::TList stateArgs;
  3960. stateArgs.reserve(first.size());
  3961. std::transform(first.cbegin(), first.cend(), std::back_inserter(stateArgs), [&](TRuntimeNode state){ return Arg(state.GetStaticType()); } );
  3962. const auto next = update(keyArgs, itemArgs, stateArgs);
  3963. MKQL_ENSURE(next.size() == first.size(), "Mismatch init and update state size.");
  3964. TRuntimeNode::TList finishKeyArgs;
  3965. finishKeyArgs.reserve(keys.size());
  3966. std::transform(keys.cbegin(), keys.cend(), std::back_inserter(finishKeyArgs), [&](TRuntimeNode key){ return Arg(key.GetStaticType()); } );
  3967. TRuntimeNode::TList finishStateArgs;
  3968. finishStateArgs.reserve(next.size());
  3969. std::transform(next.cbegin(), next.cend(), std::back_inserter(finishStateArgs), [&](TRuntimeNode state){ return Arg(state.GetStaticType()); } );
  3970. const auto output = finish(finishKeyArgs, finishStateArgs);
  3971. std::vector<TType*> tupleItems;
  3972. tupleItems.reserve(output.size());
  3973. std::transform(output.cbegin(), output.cend(), std::back_inserter(tupleItems), std::bind(&TRuntimeNode::GetStaticType, std::placeholders::_1));
  3974. TCallableBuilder callableBuilder(Env, funcName, NewFlowType(NewMultiType(tupleItems)));
  3975. callableBuilder.Add(flow);
  3976. callableBuilder.Add(NewDataLiteral(ui32(keyArgs.size())));
  3977. callableBuilder.Add(NewDataLiteral(ui32(stateArgs.size())));
  3978. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3979. std::for_each(keys.cbegin(), keys.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3980. std::for_each(keyArgs.cbegin(), keyArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3981. std::for_each(first.cbegin(), first.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3982. std::for_each(stateArgs.cbegin(), stateArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3983. std::for_each(next.cbegin(), next.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3984. std::for_each(finishKeyArgs.cbegin(), finishKeyArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3985. std::for_each(finishStateArgs.cbegin(), finishStateArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3986. std::for_each(output.cbegin(), output.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  3987. return TRuntimeNode(callableBuilder.Build(), false);
  3988. }
  3989. TRuntimeNode TProgramBuilder::WideLastCombiner(TRuntimeNode flow, const TWideLambda& extractor, const TBinaryWideLambda& init, const TTernaryWideLambda& update, const TBinaryWideLambda& finish) {
  3990. if constexpr (RuntimeVersion < 29U) {
  3991. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3992. }
  3993. return WideLastCombinerCommon(__func__, flow, extractor, init, update, finish);
  3994. }
  3995. TRuntimeNode TProgramBuilder::WideLastCombinerWithSpilling(TRuntimeNode flow, const TWideLambda& extractor, const TBinaryWideLambda& init, const TTernaryWideLambda& update, const TBinaryWideLambda& finish) {
  3996. if constexpr (RuntimeVersion < 49U) {
  3997. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  3998. }
  3999. return WideLastCombinerCommon(__func__, flow, extractor, init, update, finish);
  4000. }
  4001. TRuntimeNode TProgramBuilder::WideCondense1(TRuntimeNode flow, const TWideLambda& init, const TWideSwitchLambda& switcher, const TBinaryWideLambda& update, bool useCtx) {
  4002. if constexpr (RuntimeVersion < 18U) {
  4003. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4004. }
  4005. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  4006. TRuntimeNode::TList itemArgs;
  4007. itemArgs.reserve(wideComponents.size());
  4008. auto i = 0U;
  4009. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  4010. const auto first = init(itemArgs);
  4011. TRuntimeNode::TList stateArgs;
  4012. stateArgs.reserve(first.size());
  4013. std::transform(first.cbegin(), first.cend(), std::back_inserter(stateArgs), [&](TRuntimeNode state){ return Arg(state.GetStaticType()); } );
  4014. const auto chop = switcher(itemArgs, stateArgs);
  4015. const auto next = update(itemArgs, stateArgs);
  4016. MKQL_ENSURE(next.size() == first.size(), "Mismatch init and update state size.");
  4017. std::vector<TType*> tupleItems;
  4018. tupleItems.reserve(next.size());
  4019. std::transform(next.cbegin(), next.cend(), std::back_inserter(tupleItems), std::bind(&TRuntimeNode::GetStaticType, std::placeholders::_1));
  4020. TCallableBuilder callableBuilder(Env, __func__, NewFlowType(NewMultiType(tupleItems)));
  4021. callableBuilder.Add(flow);
  4022. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  4023. std::for_each(first.cbegin(), first.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  4024. std::for_each(stateArgs.cbegin(), stateArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  4025. callableBuilder.Add(chop);
  4026. std::for_each(next.cbegin(), next.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  4027. if (useCtx) {
  4028. MKQL_ENSURE(RuntimeVersion >= 30U, "Too old runtime version");
  4029. callableBuilder.Add(NewDataLiteral<bool>(useCtx));
  4030. }
  4031. return TRuntimeNode(callableBuilder.Build(), false);
  4032. }
  4033. TRuntimeNode TProgramBuilder::CombineCore(TRuntimeNode stream,
  4034. const TUnaryLambda& keyExtractor,
  4035. const TBinaryLambda& init,
  4036. const TTernaryLambda& update,
  4037. const TBinaryLambda& finish,
  4038. ui64 memLimit)
  4039. {
  4040. if constexpr (RuntimeVersion < 3U) {
  4041. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4042. }
  4043. const bool isStream = stream.GetStaticType()->IsStream();
  4044. const auto itemType = isStream ? AS_TYPE(TStreamType, stream)->GetItemType() : AS_TYPE(TFlowType, stream)->GetItemType();
  4045. const auto itemArg = Arg(itemType);
  4046. const auto key = keyExtractor(itemArg);
  4047. const auto keyType = key.GetStaticType();
  4048. const auto keyArg = Arg(keyType);
  4049. const auto stateInit = init(keyArg, itemArg);
  4050. const auto stateType = stateInit.GetStaticType();
  4051. const auto stateArg = Arg(stateType);
  4052. const auto stateUpdate = update(keyArg, itemArg, stateArg);
  4053. const auto finishItem = finish(keyArg, stateArg);
  4054. const auto finishType = finishItem.GetStaticType();
  4055. MKQL_ENSURE(finishType->IsList() || finishType->IsStream() || finishType->IsOptional(), "Expected list, stream or optional");
  4056. TType* retItemType = nullptr;
  4057. if (finishType->IsOptional()) {
  4058. retItemType = AS_TYPE(TOptionalType, finishType)->GetItemType();
  4059. } else if (finishType->IsList()) {
  4060. retItemType = AS_TYPE(TListType, finishType)->GetItemType();
  4061. } else if (finishType->IsStream()) {
  4062. retItemType = AS_TYPE(TStreamType, finishType)->GetItemType();
  4063. }
  4064. const auto resultStreamType = isStream ? NewStreamType(retItemType) : NewFlowType(retItemType);
  4065. TCallableBuilder callableBuilder(Env, __func__, resultStreamType);
  4066. callableBuilder.Add(stream);
  4067. callableBuilder.Add(itemArg);
  4068. callableBuilder.Add(key);
  4069. callableBuilder.Add(keyArg);
  4070. callableBuilder.Add(stateInit);
  4071. callableBuilder.Add(stateArg);
  4072. callableBuilder.Add(stateUpdate);
  4073. callableBuilder.Add(finishItem);
  4074. callableBuilder.Add(NewDataLiteral(memLimit));
  4075. return TRuntimeNode(callableBuilder.Build(), false);
  4076. }
  4077. TRuntimeNode TProgramBuilder::GroupingCore(TRuntimeNode stream,
  4078. const TBinaryLambda& groupSwitch,
  4079. const TUnaryLambda& keyExtractor,
  4080. const TUnaryLambda& handler)
  4081. {
  4082. if (handler && RuntimeVersion < 20U) {
  4083. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__ << " with handler";
  4084. }
  4085. auto itemType = AS_TYPE(TStreamType, stream)->GetItemType();
  4086. TRuntimeNode keyExtractorItemArg = Arg(itemType);
  4087. TRuntimeNode keyExtractorResult = keyExtractor(keyExtractorItemArg);
  4088. TRuntimeNode groupSwitchKeyArg = Arg(keyExtractorResult.GetStaticType());
  4089. TRuntimeNode groupSwitchItemArg = Arg(itemType);
  4090. TRuntimeNode groupSwitchResult = groupSwitch(groupSwitchKeyArg, groupSwitchItemArg);
  4091. MKQL_ENSURE(AS_TYPE(TDataType, groupSwitchResult)->GetSchemeType() == NUdf::TDataType<bool>::Id,
  4092. "Expected bool type");
  4093. TRuntimeNode handlerItemArg;
  4094. TRuntimeNode handlerResult;
  4095. if (handler) {
  4096. handlerItemArg = Arg(itemType);
  4097. handlerResult = handler(handlerItemArg);
  4098. itemType = handlerResult.GetStaticType();
  4099. }
  4100. const std::array<TType*, 2U> tupleItems = {{ keyExtractorResult.GetStaticType(), NewStreamType(itemType) }};
  4101. const auto finishType = NewStreamType(NewTupleType(tupleItems));
  4102. TCallableBuilder callableBuilder(Env, __func__, finishType);
  4103. callableBuilder.Add(stream);
  4104. callableBuilder.Add(keyExtractorResult);
  4105. callableBuilder.Add(groupSwitchResult);
  4106. callableBuilder.Add(keyExtractorItemArg);
  4107. callableBuilder.Add(groupSwitchKeyArg);
  4108. callableBuilder.Add(groupSwitchItemArg);
  4109. if (handler) {
  4110. callableBuilder.Add(handlerResult);
  4111. callableBuilder.Add(handlerItemArg);
  4112. }
  4113. return TRuntimeNode(callableBuilder.Build(), false);
  4114. }
  4115. TRuntimeNode TProgramBuilder::Chopper(TRuntimeNode flow, const TUnaryLambda& keyExtractor, const TBinaryLambda& groupSwitch, const TBinaryLambda& groupHandler) {
  4116. const auto flowType = flow.GetStaticType();
  4117. MKQL_ENSURE(flowType->IsFlow() || flowType->IsStream(), "Expected flow or stream.");
  4118. if constexpr (RuntimeVersion < 9U) {
  4119. return FlatMap(GroupingCore(flow, groupSwitch, keyExtractor),
  4120. [&](TRuntimeNode item) -> TRuntimeNode { return groupHandler(Nth(item, 0U), Nth(item, 1U)); }
  4121. );
  4122. }
  4123. const bool isStream = flowType->IsStream();
  4124. const auto itemType = isStream ? AS_TYPE(TStreamType, flow)->GetItemType() : AS_TYPE(TFlowType, flow)->GetItemType();
  4125. const auto itemArg = Arg(itemType);
  4126. const auto keyExtractorResult = keyExtractor(itemArg);
  4127. const auto keyArg = Arg(keyExtractorResult.GetStaticType());
  4128. const auto groupSwitchResult = groupSwitch(keyArg, itemArg);
  4129. const auto input = Arg(flowType);
  4130. const auto output = groupHandler(keyArg, input);
  4131. TCallableBuilder callableBuilder(Env, __func__, output.GetStaticType());
  4132. callableBuilder.Add(flow);
  4133. callableBuilder.Add(itemArg);
  4134. callableBuilder.Add(keyExtractorResult);
  4135. callableBuilder.Add(keyArg);
  4136. callableBuilder.Add(groupSwitchResult);
  4137. callableBuilder.Add(input);
  4138. callableBuilder.Add(output);
  4139. return TRuntimeNode(callableBuilder.Build(), false);
  4140. }
  4141. TRuntimeNode TProgramBuilder::WideChopper(TRuntimeNode flow, const TWideLambda& extractor, const TWideSwitchLambda& groupSwitch,
  4142. const std::function<TRuntimeNode (TRuntimeNode::TList, TRuntimeNode)>& groupHandler
  4143. ) {
  4144. if constexpr (RuntimeVersion < 18U) {
  4145. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4146. }
  4147. const auto wideComponents = GetWideComponents(AS_TYPE(TFlowType, flow.GetStaticType()));
  4148. TRuntimeNode::TList itemArgs, keyArgs;
  4149. itemArgs.reserve(wideComponents.size());
  4150. auto i = 0U;
  4151. std::generate_n(std::back_inserter(itemArgs), wideComponents.size(), [&](){ return Arg(wideComponents[i++]); });
  4152. const auto keys = extractor(itemArgs);
  4153. keyArgs.reserve(keys.size());
  4154. std::transform(keys.cbegin(), keys.cend(), std::back_inserter(keyArgs), [&](TRuntimeNode key){ return Arg(key.GetStaticType()); } );
  4155. const auto groupSwitchResult = groupSwitch(keyArgs, itemArgs);
  4156. const auto input = WideFlowArg(flow.GetStaticType());
  4157. const auto output = groupHandler(keyArgs, input);
  4158. TCallableBuilder callableBuilder(Env, __func__, output.GetStaticType());
  4159. callableBuilder.Add(flow);
  4160. std::for_each(itemArgs.cbegin(), itemArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  4161. std::for_each(keys.cbegin(), keys.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  4162. std::for_each(keyArgs.cbegin(), keyArgs.cend(), std::bind(&TCallableBuilder::Add, std::ref(callableBuilder), std::placeholders::_1));
  4163. callableBuilder.Add(groupSwitchResult);
  4164. callableBuilder.Add(input);
  4165. callableBuilder.Add(output);
  4166. return TRuntimeNode(callableBuilder.Build(), false);
  4167. }
  4168. TRuntimeNode TProgramBuilder::HoppingCore(TRuntimeNode list,
  4169. const TUnaryLambda& timeExtractor,
  4170. const TUnaryLambda& init,
  4171. const TBinaryLambda& update,
  4172. const TUnaryLambda& save,
  4173. const TUnaryLambda& load,
  4174. const TBinaryLambda& merge,
  4175. const TBinaryLambda& finish,
  4176. TRuntimeNode hop, TRuntimeNode interval, TRuntimeNode delay)
  4177. {
  4178. auto streamType = AS_TYPE(TStreamType, list);
  4179. auto itemType = AS_TYPE(TStructType, streamType->GetItemType());
  4180. auto timestampType = TOptionalType::Create(TDataType::Create(NUdf::TDataType<NUdf::TTimestamp>::Id, Env), Env);
  4181. TRuntimeNode itemArg = Arg(itemType);
  4182. auto outTime = timeExtractor(itemArg);
  4183. auto outStateInit = init(itemArg);
  4184. auto stateType = outStateInit.GetStaticType();
  4185. TRuntimeNode stateArg = Arg(stateType);
  4186. auto outStateUpdate = update(itemArg, stateArg);
  4187. auto hasSaveLoad = (bool)save;
  4188. TRuntimeNode saveArg, outSave, loadArg, outLoad;
  4189. if (hasSaveLoad) {
  4190. saveArg = Arg(stateType);
  4191. outSave = save(saveArg);
  4192. loadArg = Arg(outSave.GetStaticType());
  4193. outLoad = load(loadArg);
  4194. MKQL_ENSURE(outLoad.GetStaticType()->IsSameType(*stateType), "Loaded type is changed by the load handler");
  4195. } else {
  4196. saveArg = outSave = loadArg = outLoad = NewVoid();
  4197. }
  4198. TRuntimeNode state2Arg = Arg(stateType);
  4199. TRuntimeNode timeArg = Arg(timestampType);
  4200. auto outStateMerge = merge(stateArg, state2Arg);
  4201. auto outItemFinish = finish(stateArg, timeArg);
  4202. auto finishType = outItemFinish.GetStaticType();
  4203. MKQL_ENSURE(finishType->IsStruct(), "Expected struct type as finish lambda output");
  4204. auto resultType = TStreamType::Create(outItemFinish.GetStaticType(), Env);
  4205. TCallableBuilder callableBuilder(Env, __func__, resultType);
  4206. callableBuilder.Add(list);
  4207. callableBuilder.Add(itemArg);
  4208. callableBuilder.Add(stateArg);
  4209. callableBuilder.Add(state2Arg);
  4210. callableBuilder.Add(timeArg);
  4211. callableBuilder.Add(saveArg);
  4212. callableBuilder.Add(loadArg);
  4213. callableBuilder.Add(outTime);
  4214. callableBuilder.Add(outStateInit);
  4215. callableBuilder.Add(outStateUpdate);
  4216. callableBuilder.Add(outSave);
  4217. callableBuilder.Add(outLoad);
  4218. callableBuilder.Add(outStateMerge);
  4219. callableBuilder.Add(outItemFinish);
  4220. callableBuilder.Add(hop);
  4221. callableBuilder.Add(interval);
  4222. callableBuilder.Add(delay);
  4223. return TRuntimeNode(callableBuilder.Build(), false);
  4224. }
  4225. TRuntimeNode TProgramBuilder::MultiHoppingCore(TRuntimeNode list,
  4226. const TUnaryLambda& keyExtractor,
  4227. const TUnaryLambda& timeExtractor,
  4228. const TUnaryLambda& init,
  4229. const TBinaryLambda& update,
  4230. const TUnaryLambda& save,
  4231. const TUnaryLambda& load,
  4232. const TBinaryLambda& merge,
  4233. const TTernaryLambda& finish,
  4234. TRuntimeNode hop, TRuntimeNode interval, TRuntimeNode delay,
  4235. TRuntimeNode dataWatermarks, TRuntimeNode watermarksMode)
  4236. {
  4237. if constexpr (RuntimeVersion < 22U) {
  4238. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4239. }
  4240. auto streamType = AS_TYPE(TStreamType, list);
  4241. auto itemType = AS_TYPE(TStructType, streamType->GetItemType());
  4242. auto timestampType = TOptionalType::Create(TDataType::Create(NUdf::TDataType<NUdf::TTimestamp>::Id, Env), Env);
  4243. TRuntimeNode itemArg = Arg(itemType);
  4244. auto keyExtract = keyExtractor(itemArg);
  4245. auto keyType = keyExtract.GetStaticType();
  4246. TRuntimeNode keyArg = Arg(keyType);
  4247. auto outTime = timeExtractor(itemArg);
  4248. auto outStateInit = init(itemArg);
  4249. auto stateType = outStateInit.GetStaticType();
  4250. TRuntimeNode stateArg = Arg(stateType);
  4251. auto outStateUpdate = update(itemArg, stateArg);
  4252. auto hasSaveLoad = (bool)save;
  4253. TRuntimeNode saveArg, outSave, loadArg, outLoad;
  4254. if (hasSaveLoad) {
  4255. saveArg = Arg(stateType);
  4256. outSave = save(saveArg);
  4257. loadArg = Arg(outSave.GetStaticType());
  4258. outLoad = load(loadArg);
  4259. MKQL_ENSURE(outLoad.GetStaticType()->IsSameType(*stateType), "Loaded type is changed by the load handler");
  4260. } else {
  4261. saveArg = outSave = loadArg = outLoad = NewVoid();
  4262. }
  4263. TRuntimeNode state2Arg = Arg(stateType);
  4264. TRuntimeNode timeArg = Arg(timestampType);
  4265. auto outStateMerge = merge(stateArg, state2Arg);
  4266. auto outItemFinish = finish(keyArg, stateArg, timeArg);
  4267. auto finishType = outItemFinish.GetStaticType();
  4268. MKQL_ENSURE(finishType->IsStruct(), "Expected struct type as finish lambda output");
  4269. auto resultType = TStreamType::Create(outItemFinish.GetStaticType(), Env);
  4270. TCallableBuilder callableBuilder(Env, __func__, resultType);
  4271. callableBuilder.Add(list);
  4272. callableBuilder.Add(itemArg);
  4273. callableBuilder.Add(keyArg);
  4274. callableBuilder.Add(stateArg);
  4275. callableBuilder.Add(state2Arg);
  4276. callableBuilder.Add(timeArg);
  4277. callableBuilder.Add(saveArg);
  4278. callableBuilder.Add(loadArg);
  4279. callableBuilder.Add(keyExtract);
  4280. callableBuilder.Add(outTime);
  4281. callableBuilder.Add(outStateInit);
  4282. callableBuilder.Add(outStateUpdate);
  4283. callableBuilder.Add(outSave);
  4284. callableBuilder.Add(outLoad);
  4285. callableBuilder.Add(outStateMerge);
  4286. callableBuilder.Add(outItemFinish);
  4287. callableBuilder.Add(hop);
  4288. callableBuilder.Add(interval);
  4289. callableBuilder.Add(delay);
  4290. callableBuilder.Add(dataWatermarks);
  4291. callableBuilder.Add(watermarksMode);
  4292. return TRuntimeNode(callableBuilder.Build(), false);
  4293. }
  4294. TRuntimeNode TProgramBuilder::Default(TType* type) {
  4295. bool isOptional;
  4296. const auto targetType = UnpackOptionalData(type, isOptional);
  4297. if (isOptional) {
  4298. return NewOptional(Default(targetType));
  4299. }
  4300. const auto scheme = targetType->GetSchemeType();
  4301. const auto value = scheme == NUdf::TDataType<NUdf::TUuid>::Id ?
  4302. Env.NewStringValue("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"sv) :
  4303. scheme == NUdf::TDataType<NUdf::TDyNumber>::Id ? NUdf::TUnboxedValuePod::Embedded("\1") : NUdf::TUnboxedValuePod::Zero();
  4304. return TRuntimeNode(TDataLiteral::Create(value, targetType, Env), true);
  4305. }
  4306. TRuntimeNode TProgramBuilder::Cast(TRuntimeNode arg, TType* type) {
  4307. if (arg.GetStaticType()->IsSameType(*type)) {
  4308. return arg;
  4309. }
  4310. bool isOptional;
  4311. const auto targetType = UnpackOptionalData(type, isOptional);
  4312. const auto sourceType = UnpackOptionalData(arg, isOptional);
  4313. const auto sId = sourceType->GetSchemeType();
  4314. const auto tId = targetType->GetSchemeType();
  4315. if (sId == NUdf::TDataType<char*>::Id) {
  4316. if (tId != NUdf::TDataType<char*>::Id) {
  4317. return FromString(arg, type);
  4318. } else {
  4319. return arg;
  4320. }
  4321. }
  4322. if (sId == NUdf::TDataType<NUdf::TUtf8>::Id) {
  4323. if (tId != NUdf::TDataType<char*>::Id) {
  4324. return FromString(arg, type);
  4325. } else {
  4326. return ToString(arg);
  4327. }
  4328. }
  4329. if (tId == NUdf::TDataType<char*>::Id) {
  4330. return ToString(arg);
  4331. }
  4332. if (tId == NUdf::TDataType<NUdf::TUtf8>::Id) {
  4333. return ToString<true>(arg);
  4334. }
  4335. if (tId == NUdf::TDataType<NUdf::TDecimal>::Id) {
  4336. const auto& params = static_cast<const TDataDecimalType*>(targetType)->GetParams();
  4337. return ToDecimal(arg, params.first, params.second);
  4338. }
  4339. const auto options = NKikimr::NUdf::GetCastResult(*sourceType->GetDataSlot(), *targetType->GetDataSlot());
  4340. MKQL_ENSURE((*options & NKikimr::NUdf::ECastOptions::Undefined) ||
  4341. !(*options & NKikimr::NUdf::ECastOptions::Impossible),
  4342. "Impossible to cast " << *static_cast<TType*>(sourceType) << " into " << *static_cast<TType*>(targetType));
  4343. const bool useToIntegral = (*options & NKikimr::NUdf::ECastOptions::Undefined) ||
  4344. (*options & NKikimr::NUdf::ECastOptions::MayFail);
  4345. return useToIntegral ? ToIntegral(arg, type) : Convert(arg, type);
  4346. }
  4347. TRuntimeNode TProgramBuilder::RangeCreate(TRuntimeNode list) {
  4348. MKQL_ENSURE(list.GetStaticType()->IsList(), "Expecting list");
  4349. auto itemType = static_cast<TListType*>(list.GetStaticType())->GetItemType();
  4350. MKQL_ENSURE(itemType->IsTuple(), "Expecting list of tuples");
  4351. auto tupleType = static_cast<TTupleType*>(itemType);
  4352. MKQL_ENSURE(tupleType->GetElementsCount() == 2,
  4353. "Expecting list ot 2-element tuples, got: " << tupleType->GetElementsCount() << " elements");
  4354. MKQL_ENSURE(tupleType->GetElementType(0)->IsSameType(*tupleType->GetElementType(1)),
  4355. "Expecting list ot 2-element tuples of same type");
  4356. MKQL_ENSURE(tupleType->GetElementType(0)->IsTuple(),
  4357. "Expecting range boundary to be tuple");
  4358. auto boundaryType = static_cast<TTupleType*>(tupleType->GetElementType(0));
  4359. MKQL_ENSURE(boundaryType->GetElementsCount() >= 2,
  4360. "Range boundary should have at least 2 components, got: " << boundaryType->GetElementsCount());
  4361. auto lastComp = boundaryType->GetElementType(boundaryType->GetElementsCount() - 1);
  4362. std::vector<TType*> outputComponents;
  4363. for (ui32 i = 0; i < boundaryType->GetElementsCount() - 1; ++i) {
  4364. outputComponents.push_back(lastComp);
  4365. outputComponents.push_back(boundaryType->GetElementType(i));
  4366. }
  4367. outputComponents.push_back(lastComp);
  4368. auto outputBoundary = TTupleType::Create(outputComponents.size(), &outputComponents.front(), Env);
  4369. std::vector<TType*> outputRangeComps(2, outputBoundary);
  4370. auto outputRange = TTupleType::Create(outputRangeComps.size(), &outputRangeComps.front(), Env);
  4371. TCallableBuilder callableBuilder(Env, __func__, TListType::Create(outputRange, Env));
  4372. callableBuilder.Add(list);
  4373. return TRuntimeNode(callableBuilder.Build(), false);
  4374. }
  4375. TRuntimeNode TProgramBuilder::RangeUnion(const TArrayRef<const TRuntimeNode>& lists) {
  4376. return BuildRangeLogical(__func__, lists);
  4377. }
  4378. TRuntimeNode TProgramBuilder::RangeIntersect(const TArrayRef<const TRuntimeNode>& lists) {
  4379. return BuildRangeLogical(__func__, lists);
  4380. }
  4381. TRuntimeNode TProgramBuilder::RangeMultiply(const TArrayRef<const TRuntimeNode>& args) {
  4382. MKQL_ENSURE(args.size() >= 2, "Expecting at least two arguments");
  4383. bool unlimited = false;
  4384. if (args.front().GetStaticType()->IsVoid()) {
  4385. unlimited = true;
  4386. } else {
  4387. MKQL_ENSURE(args.front().GetStaticType()->IsData() &&
  4388. static_cast<TDataType*>(args.front().GetStaticType())->GetSchemeType() == NUdf::TDataType<ui64>::Id,
  4389. "Expected ui64 as first argument");
  4390. }
  4391. std::vector<TType*> outputComponents;
  4392. for (size_t i = 1; i < args.size(); ++i) {
  4393. const auto& list = args[i];
  4394. MKQL_ENSURE(list.GetStaticType()->IsList(), "Expecting list");
  4395. auto listItemType = static_cast<TListType*>(list.GetStaticType())->GetItemType();
  4396. MKQL_ENSURE(listItemType->IsTuple(), "Expecting list of tuples");
  4397. auto rangeType = static_cast<TTupleType*>(listItemType);
  4398. MKQL_ENSURE(rangeType->GetElementsCount() == 2, "Expecting list of 2-element tuples");
  4399. MKQL_ENSURE(rangeType->GetElementType(0)->IsTuple(), "Range boundary should be tuple");
  4400. auto boundaryType = static_cast<TTupleType*>(rangeType->GetElementType(0));
  4401. ui32 elementsCount = boundaryType->GetElementsCount();
  4402. MKQL_ENSURE(elementsCount >= 3 && elementsCount % 2 == 1, "Range boundary should have odd number components (at least 3)");
  4403. for (size_t j = 0; j < elementsCount - 1; ++j) {
  4404. outputComponents.push_back(boundaryType->GetElementType(j));
  4405. }
  4406. }
  4407. outputComponents.push_back(TDataType::Create(NUdf::TDataType<i32>::Id, Env));
  4408. auto outputBoundary = TTupleType::Create(outputComponents.size(), &outputComponents.front(), Env);
  4409. std::vector<TType*> outputRangeComps(2, outputBoundary);
  4410. auto outputRange = TTupleType::Create(outputRangeComps.size(), &outputRangeComps.front(), Env);
  4411. TCallableBuilder callableBuilder(Env, __func__, TListType::Create(outputRange, Env));
  4412. if (unlimited) {
  4413. callableBuilder.Add(NewDataLiteral<ui64>(std::numeric_limits<ui64>::max()));
  4414. } else {
  4415. callableBuilder.Add(args[0]);
  4416. }
  4417. for (size_t i = 1; i < args.size(); ++i) {
  4418. callableBuilder.Add(args[i]);
  4419. }
  4420. return TRuntimeNode(callableBuilder.Build(), false);
  4421. }
  4422. TRuntimeNode TProgramBuilder::RangeFinalize(TRuntimeNode list) {
  4423. MKQL_ENSURE(list.GetStaticType()->IsList(), "Expecting list");
  4424. auto listItemType = static_cast<TListType*>(list.GetStaticType())->GetItemType();
  4425. MKQL_ENSURE(listItemType->IsTuple(), "Expecting list of tuples");
  4426. auto rangeType = static_cast<TTupleType*>(listItemType);
  4427. MKQL_ENSURE(rangeType->GetElementsCount() == 2, "Expecting list of 2-element tuples");
  4428. MKQL_ENSURE(rangeType->GetElementType(0)->IsTuple(), "Range boundary should be tuple");
  4429. auto boundaryType = static_cast<TTupleType*>(rangeType->GetElementType(0));
  4430. ui32 elementsCount = boundaryType->GetElementsCount();
  4431. MKQL_ENSURE(elementsCount >= 3 && elementsCount % 2 == 1, "Range boundary should have odd number components (at least 3)");
  4432. std::vector<TType*> outputComponents;
  4433. for (ui32 i = 0; i < elementsCount; ++i) {
  4434. if (i % 2 == 1 || i + 1 == elementsCount) {
  4435. outputComponents.push_back(boundaryType->GetElementType(i));
  4436. }
  4437. }
  4438. auto outputBoundary = TTupleType::Create(outputComponents.size(), &outputComponents.front(), Env);
  4439. std::vector<TType*> outputRangeComps(2, outputBoundary);
  4440. auto outputRange = TTupleType::Create(outputRangeComps.size(), &outputRangeComps.front(), Env);
  4441. TCallableBuilder callableBuilder(Env, __func__, TListType::Create(outputRange, Env));
  4442. callableBuilder.Add(list);
  4443. return TRuntimeNode(callableBuilder.Build(), false);
  4444. }
  4445. TRuntimeNode TProgramBuilder::Round(const std::string_view& callableName, TRuntimeNode source, TType* targetType) {
  4446. const auto sourceType = source.GetStaticType();
  4447. MKQL_ENSURE(sourceType->IsData(), "Expecting first arg to be of Data type");
  4448. MKQL_ENSURE(targetType->IsData(), "Expecting second arg to be Data type");
  4449. const auto ss = *static_cast<TDataType*>(sourceType)->GetDataSlot();
  4450. const auto ts = *static_cast<TDataType*>(targetType)->GetDataSlot();
  4451. const auto options = NKikimr::NUdf::GetCastResult(ss, ts);
  4452. MKQL_ENSURE(!(*options & NKikimr::NUdf::ECastOptions::Impossible),
  4453. "Impossible to cast " << *sourceType << " into " << *targetType);
  4454. MKQL_ENSURE(*options & (NKikimr::NUdf::ECastOptions::MayFail |
  4455. NKikimr::NUdf::ECastOptions::MayLoseData |
  4456. NKikimr::NUdf::ECastOptions::AnywayLoseData),
  4457. "Rounding from " << *sourceType << " to " << *targetType << " is trivial");
  4458. TCallableBuilder callableBuilder(Env, callableName, TOptionalType::Create(targetType, Env));
  4459. callableBuilder.Add(source);
  4460. return TRuntimeNode(callableBuilder.Build(), false);
  4461. }
  4462. TRuntimeNode TProgramBuilder::NextValue(TRuntimeNode value) {
  4463. const auto valueType = value.GetStaticType();
  4464. MKQL_ENSURE(valueType->IsData(), "Expecting argument of Data type");
  4465. const auto slot = *static_cast<TDataType*>(valueType)->GetDataSlot();
  4466. MKQL_ENSURE(slot == NUdf::EDataSlot::String || slot == NUdf::EDataSlot::Utf8,
  4467. "Unsupported type: " << *valueType);
  4468. TCallableBuilder callableBuilder(Env, __func__, TOptionalType::Create(valueType, Env));
  4469. callableBuilder.Add(value);
  4470. return TRuntimeNode(callableBuilder.Build(), false);
  4471. }
  4472. TRuntimeNode TProgramBuilder::Nop(TRuntimeNode value, TType* returnType) {
  4473. if constexpr (RuntimeVersion < 35U) {
  4474. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4475. }
  4476. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4477. callableBuilder.Add(value);
  4478. return TRuntimeNode(callableBuilder.Build(), false);
  4479. }
  4480. bool TProgramBuilder::IsNull(TRuntimeNode arg) {
  4481. return arg.GetStaticType()->IsSameType(*NewNull().GetStaticType()); // TODO ->IsNull();
  4482. }
  4483. TRuntimeNode TProgramBuilder::Replicate(TRuntimeNode item, TRuntimeNode count, const std::string_view& file, ui32 row, ui32 column) {
  4484. MKQL_ENSURE(count.GetStaticType()->IsData(), "Expected data");
  4485. MKQL_ENSURE(static_cast<const TDataType&>(*count.GetStaticType()).GetSchemeType() == NUdf::TDataType<ui64>::Id, "Expected ui64");
  4486. const auto listType = TListType::Create(item.GetStaticType(), Env);
  4487. TCallableBuilder callableBuilder(Env, __func__, listType);
  4488. callableBuilder.Add(item);
  4489. callableBuilder.Add(count);
  4490. if constexpr (RuntimeVersion >= 2) {
  4491. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(file));
  4492. callableBuilder.Add(NewDataLiteral(row));
  4493. callableBuilder.Add(NewDataLiteral(column));
  4494. }
  4495. return TRuntimeNode(callableBuilder.Build(), false);
  4496. }
  4497. TRuntimeNode TProgramBuilder::PgConst(TPgType* pgType, const std::string_view& value, TRuntimeNode typeMod) {
  4498. if constexpr (RuntimeVersion < 30U) {
  4499. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4500. }
  4501. TCallableBuilder callableBuilder(Env, __func__, pgType);
  4502. callableBuilder.Add(NewDataLiteral(pgType->GetTypeId()));
  4503. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(value));
  4504. if (typeMod) {
  4505. callableBuilder.Add(typeMod);
  4506. }
  4507. return TRuntimeNode(callableBuilder.Build(), false);
  4508. }
  4509. TRuntimeNode TProgramBuilder::PgResolvedCall(bool useContext, const std::string_view& name,
  4510. ui32 id, const TArrayRef<const TRuntimeNode>& args,
  4511. TType* returnType, bool rangeFunction) {
  4512. if constexpr (RuntimeVersion < 45U) {
  4513. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4514. }
  4515. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4516. callableBuilder.Add(NewDataLiteral(useContext));
  4517. callableBuilder.Add(NewDataLiteral(rangeFunction));
  4518. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(name));
  4519. callableBuilder.Add(NewDataLiteral(id));
  4520. for (const auto& arg : args) {
  4521. callableBuilder.Add(arg);
  4522. }
  4523. return TRuntimeNode(callableBuilder.Build(), false);
  4524. }
  4525. TRuntimeNode TProgramBuilder::BlockPgResolvedCall(const std::string_view& name, ui32 id,
  4526. const TArrayRef<const TRuntimeNode>& args, TType* returnType) {
  4527. if constexpr (RuntimeVersion < 30U) {
  4528. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4529. }
  4530. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4531. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(name));
  4532. callableBuilder.Add(NewDataLiteral(id));
  4533. for (const auto& arg : args) {
  4534. callableBuilder.Add(arg);
  4535. }
  4536. return TRuntimeNode(callableBuilder.Build(), false);
  4537. }
  4538. TRuntimeNode TProgramBuilder::PgArray(const TArrayRef<const TRuntimeNode>& args, TType* returnType) {
  4539. if constexpr (RuntimeVersion < 30U) {
  4540. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4541. }
  4542. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4543. for (const auto& arg : args) {
  4544. callableBuilder.Add(arg);
  4545. }
  4546. return TRuntimeNode(callableBuilder.Build(), false);
  4547. }
  4548. TRuntimeNode TProgramBuilder::PgTableContent(
  4549. const std::string_view& cluster,
  4550. const std::string_view& table,
  4551. TType* returnType) {
  4552. if constexpr (RuntimeVersion < 47U) {
  4553. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4554. }
  4555. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4556. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(cluster));
  4557. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(table));
  4558. return TRuntimeNode(callableBuilder.Build(), false);
  4559. }
  4560. TRuntimeNode TProgramBuilder::PgToRecord(TRuntimeNode input, const TArrayRef<std::pair<std::string_view, std::string_view>>& members) {
  4561. if constexpr (RuntimeVersion < 48U) {
  4562. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4563. }
  4564. MKQL_ENSURE(input.GetStaticType()->IsStruct(), "Expected struct");
  4565. auto structType = AS_TYPE(TStructType, input.GetStaticType());
  4566. for (ui32 i = 0; i < structType->GetMembersCount(); ++i) {
  4567. auto itemType = structType->GetMemberType(i);
  4568. MKQL_ENSURE(itemType->IsNull() || itemType->IsPg(), "Expected null or pg");
  4569. }
  4570. auto returnType = NewPgType(NYql::NPg::LookupType("record").TypeId);
  4571. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4572. callableBuilder.Add(input);
  4573. TVector<TRuntimeNode> names;
  4574. for (const auto& x : members) {
  4575. names.push_back(NewDataLiteral<NUdf::EDataSlot::String>(x.first));
  4576. names.push_back(NewDataLiteral<NUdf::EDataSlot::String>(x.second));
  4577. }
  4578. callableBuilder.Add(NewTuple(names));
  4579. return TRuntimeNode(callableBuilder.Build(), false);
  4580. }
  4581. TRuntimeNode TProgramBuilder::PgCast(TRuntimeNode input, TType* returnType, TRuntimeNode typeMod) {
  4582. if constexpr (RuntimeVersion < 30U) {
  4583. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4584. }
  4585. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4586. callableBuilder.Add(input);
  4587. if (typeMod) {
  4588. callableBuilder.Add(typeMod);
  4589. }
  4590. return TRuntimeNode(callableBuilder.Build(), false);
  4591. }
  4592. TRuntimeNode TProgramBuilder::FromPg(TRuntimeNode input, TType* returnType) {
  4593. if constexpr (RuntimeVersion < 30U) {
  4594. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4595. }
  4596. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4597. callableBuilder.Add(input);
  4598. return TRuntimeNode(callableBuilder.Build(), false);
  4599. }
  4600. TRuntimeNode TProgramBuilder::ToPg(TRuntimeNode input, TType* returnType) {
  4601. if constexpr (RuntimeVersion < 30U) {
  4602. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4603. }
  4604. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4605. callableBuilder.Add(input);
  4606. return TRuntimeNode(callableBuilder.Build(), false);
  4607. }
  4608. TRuntimeNode TProgramBuilder::PgClone(TRuntimeNode input, const TArrayRef<const TRuntimeNode>& dependentNodes) {
  4609. if constexpr (RuntimeVersion < 38U) {
  4610. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4611. }
  4612. TCallableBuilder callableBuilder(Env, __func__, input.GetStaticType());
  4613. callableBuilder.Add(input);
  4614. for (const auto& node : dependentNodes) {
  4615. callableBuilder.Add(node);
  4616. }
  4617. return TRuntimeNode(callableBuilder.Build(), false);
  4618. }
  4619. TRuntimeNode TProgramBuilder::WithContext(TRuntimeNode input, const std::string_view& contextType) {
  4620. if constexpr (RuntimeVersion < 30U) {
  4621. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4622. }
  4623. TCallableBuilder callableBuilder(Env, __func__, input.GetStaticType());
  4624. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(contextType));
  4625. callableBuilder.Add(input);
  4626. return TRuntimeNode(callableBuilder.Build(), false);
  4627. }
  4628. TRuntimeNode TProgramBuilder::PgInternal0(TType* returnType) {
  4629. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4630. return TRuntimeNode(callableBuilder.Build(), false);
  4631. }
  4632. TRuntimeNode TProgramBuilder::BlockIf(TRuntimeNode condition, TRuntimeNode thenBranch, TRuntimeNode elseBranch) {
  4633. const auto conditionType = AS_TYPE(TBlockType, condition.GetStaticType());
  4634. MKQL_ENSURE(AS_TYPE(TDataType, conditionType->GetItemType())->GetSchemeType() == NUdf::TDataType<bool>::Id,
  4635. "Expected bool as first argument");
  4636. const auto thenType = AS_TYPE(TBlockType, thenBranch.GetStaticType());
  4637. const auto elseType = AS_TYPE(TBlockType, elseBranch.GetStaticType());
  4638. MKQL_ENSURE(thenType->GetItemType()->IsSameType(*elseType->GetItemType()), "Different return types in branches.");
  4639. auto returnType = NewBlockType(thenType->GetItemType(), GetResultShape({conditionType, thenType, elseType}));
  4640. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4641. callableBuilder.Add(condition);
  4642. callableBuilder.Add(thenBranch);
  4643. callableBuilder.Add(elseBranch);
  4644. return TRuntimeNode(callableBuilder.Build(), false);
  4645. }
  4646. TRuntimeNode TProgramBuilder::BlockJust(TRuntimeNode data) {
  4647. const auto initialType = AS_TYPE(TBlockType, data.GetStaticType());
  4648. auto returnType = NewBlockType(NewOptionalType(initialType->GetItemType()), initialType->GetShape());
  4649. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4650. callableBuilder.Add(data);
  4651. return TRuntimeNode(callableBuilder.Build(), false);
  4652. }
  4653. TRuntimeNode TProgramBuilder::BlockFunc(const std::string_view& funcName, TType* returnType, const TArrayRef<const TRuntimeNode>& args) {
  4654. for (const auto& arg : args) {
  4655. MKQL_ENSURE(arg.GetStaticType()->IsBlock(), "Expected Block type");
  4656. }
  4657. TCallableBuilder builder(Env, __func__, returnType);
  4658. builder.Add(NewDataLiteral<NUdf::EDataSlot::String>(funcName));
  4659. for (const auto& arg : args) {
  4660. builder.Add(arg);
  4661. }
  4662. return TRuntimeNode(builder.Build(), false);
  4663. }
  4664. TRuntimeNode TProgramBuilder::BlockBitCast(TRuntimeNode value, TType* targetType) {
  4665. MKQL_ENSURE(value.GetStaticType()->IsBlock(), "Expected Block type");
  4666. auto returnType = TBlockType::Create(targetType, AS_TYPE(TBlockType, value.GetStaticType())->GetShape(), Env);
  4667. TCallableBuilder builder(Env, __func__, returnType);
  4668. builder.Add(value);
  4669. builder.Add(TRuntimeNode(targetType, true));
  4670. return TRuntimeNode(builder.Build(), false);
  4671. }
  4672. TRuntimeNode TProgramBuilder::BuildBlockCombineAll(const std::string_view& callableName, TRuntimeNode input, std::optional<ui32> filterColumn,
  4673. const TArrayRef<const TAggInfo>& aggs, TType* returnType) {
  4674. const auto inputType = input.GetStaticType();
  4675. MKQL_ENSURE(inputType->IsStream() || inputType->IsFlow(), "Expected either stream or flow as input type");
  4676. MKQL_ENSURE(returnType->IsStream() || returnType->IsFlow(), "Expected either stream or flow as return type");
  4677. TCallableBuilder builder(Env, callableName, returnType);
  4678. builder.Add(input);
  4679. if (!filterColumn) {
  4680. builder.Add(NewEmptyOptionalDataLiteral(NUdf::TDataType<ui32>::Id));
  4681. } else {
  4682. builder.Add(NewOptional(NewDataLiteral<ui32>(*filterColumn)));
  4683. }
  4684. TVector<TRuntimeNode> aggsNodes;
  4685. for (const auto& agg : aggs) {
  4686. TVector<TRuntimeNode> params;
  4687. params.push_back(NewDataLiteral<NUdf::EDataSlot::String>(agg.Name));
  4688. for (const auto& col : agg.ArgsColumns) {
  4689. params.push_back(NewDataLiteral<ui32>(col));
  4690. }
  4691. aggsNodes.push_back(NewTuple(params));
  4692. }
  4693. builder.Add(NewTuple(aggsNodes));
  4694. return TRuntimeNode(builder.Build(), false);
  4695. }
  4696. TRuntimeNode TProgramBuilder::BlockCombineAll(TRuntimeNode stream, std::optional<ui32> filterColumn,
  4697. const TArrayRef<const TAggInfo>& aggs, TType* returnType) {
  4698. if constexpr (RuntimeVersion < 31U) {
  4699. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4700. }
  4701. MKQL_ENSURE(stream.GetStaticType()->IsStream(), "Expected stream as input type");
  4702. MKQL_ENSURE(returnType->IsStream(), "Expected stream as return type");
  4703. if constexpr (RuntimeVersion < 52U) {
  4704. const auto flowReturnType = NewFlowType(AS_TYPE(TStreamType, returnType)->GetItemType());
  4705. return FromFlow(BuildBlockCombineAll(__func__, ToFlow(stream), filterColumn, aggs, flowReturnType));
  4706. } else {
  4707. return BuildBlockCombineAll(__func__, stream, filterColumn, aggs, returnType);
  4708. }
  4709. }
  4710. TRuntimeNode TProgramBuilder::BuildBlockCombineHashed(const std::string_view& callableName, TRuntimeNode input, std::optional<ui32> filterColumn,
  4711. const TArrayRef<ui32>& keys, const TArrayRef<const TAggInfo>& aggs, TType* returnType) {
  4712. const auto inputType = input.GetStaticType();
  4713. MKQL_ENSURE(inputType->IsStream() || inputType->IsFlow(), "Expected either stream or flow as input type");
  4714. MKQL_ENSURE(returnType->IsStream() || returnType->IsFlow(), "Expected either stream or flow as return type");
  4715. TCallableBuilder builder(Env, callableName, returnType);
  4716. builder.Add(input);
  4717. if (!filterColumn) {
  4718. builder.Add(NewEmptyOptionalDataLiteral(NUdf::TDataType<ui32>::Id));
  4719. } else {
  4720. builder.Add(NewOptional(NewDataLiteral<ui32>(*filterColumn)));
  4721. }
  4722. TVector<TRuntimeNode> keyNodes;
  4723. for (const auto& key : keys) {
  4724. keyNodes.push_back(NewDataLiteral<ui32>(key));
  4725. }
  4726. builder.Add(NewTuple(keyNodes));
  4727. TVector<TRuntimeNode> aggsNodes;
  4728. for (const auto& agg : aggs) {
  4729. TVector<TRuntimeNode> params;
  4730. params.push_back(NewDataLiteral<NUdf::EDataSlot::String>(agg.Name));
  4731. for (const auto& col : agg.ArgsColumns) {
  4732. params.push_back(NewDataLiteral<ui32>(col));
  4733. }
  4734. aggsNodes.push_back(NewTuple(params));
  4735. }
  4736. builder.Add(NewTuple(aggsNodes));
  4737. return TRuntimeNode(builder.Build(), false);
  4738. }
  4739. TRuntimeNode TProgramBuilder::BlockCombineHashed(TRuntimeNode stream, std::optional<ui32> filterColumn, const TArrayRef<ui32>& keys,
  4740. const TArrayRef<const TAggInfo>& aggs, TType* returnType) {
  4741. if constexpr (RuntimeVersion < 31U) {
  4742. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4743. }
  4744. MKQL_ENSURE(stream.GetStaticType()->IsStream(), "Expected stream as input type");
  4745. MKQL_ENSURE(returnType->IsStream(), "Expected stream as return type");
  4746. if constexpr (RuntimeVersion < 52U) {
  4747. const auto flowReturnType = NewFlowType(AS_TYPE(TStreamType, returnType)->GetItemType());
  4748. return FromFlow(BuildBlockCombineHashed(__func__, ToFlow(stream), filterColumn, keys, aggs, flowReturnType));
  4749. } else {
  4750. return BuildBlockCombineHashed(__func__, stream, filterColumn, keys, aggs, returnType);
  4751. }
  4752. }
  4753. TRuntimeNode TProgramBuilder::BuildBlockMergeFinalizeHashed(const std::string_view& callableName, TRuntimeNode input, const TArrayRef<ui32>& keys,
  4754. const TArrayRef<const TAggInfo>& aggs, TType* returnType) {
  4755. const auto inputType = input.GetStaticType();
  4756. MKQL_ENSURE(inputType->IsStream() || inputType->IsFlow(), "Expected either stream or flow as input type");
  4757. MKQL_ENSURE(returnType->IsStream() || returnType->IsFlow(), "Expected either stream or flow as return type");
  4758. TCallableBuilder builder(Env, callableName, returnType);
  4759. builder.Add(input);
  4760. TVector<TRuntimeNode> keyNodes;
  4761. for (const auto& key : keys) {
  4762. keyNodes.push_back(NewDataLiteral<ui32>(key));
  4763. }
  4764. builder.Add(NewTuple(keyNodes));
  4765. TVector<TRuntimeNode> aggsNodes;
  4766. for (const auto& agg : aggs) {
  4767. TVector<TRuntimeNode> params;
  4768. params.push_back(NewDataLiteral<NUdf::EDataSlot::String>(agg.Name));
  4769. for (const auto& col : agg.ArgsColumns) {
  4770. params.push_back(NewDataLiteral<ui32>(col));
  4771. }
  4772. aggsNodes.push_back(NewTuple(params));
  4773. }
  4774. builder.Add(NewTuple(aggsNodes));
  4775. return TRuntimeNode(builder.Build(), false);
  4776. }
  4777. TRuntimeNode TProgramBuilder::BlockMergeFinalizeHashed(TRuntimeNode stream, const TArrayRef<ui32>& keys,
  4778. const TArrayRef<const TAggInfo>& aggs, TType* returnType) {
  4779. if constexpr (RuntimeVersion < 31U) {
  4780. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4781. }
  4782. MKQL_ENSURE(stream.GetStaticType()->IsStream(), "Expected stream as input type");
  4783. MKQL_ENSURE(returnType->IsStream(), "Expected stream as return type");
  4784. if constexpr (RuntimeVersion < 52U) {
  4785. const auto flowReturnType = NewFlowType(AS_TYPE(TStreamType, returnType)->GetItemType());
  4786. return FromFlow(BuildBlockMergeFinalizeHashed(__func__, ToFlow(stream), keys, aggs, flowReturnType));
  4787. } else {
  4788. return BuildBlockMergeFinalizeHashed(__func__, stream, keys, aggs, returnType);
  4789. }
  4790. }
  4791. TRuntimeNode TProgramBuilder::BuildBlockMergeManyFinalizeHashed(const std::string_view& callableName, TRuntimeNode input, const TArrayRef<ui32>& keys,
  4792. const TArrayRef<const TAggInfo>& aggs, ui32 streamIndex, const TVector<TVector<ui32>>& streams, TType* returnType) {
  4793. const auto inputType = input.GetStaticType();
  4794. MKQL_ENSURE(inputType->IsStream() || inputType->IsFlow(), "Expected either stream or flow as input type");
  4795. MKQL_ENSURE(returnType->IsStream() || returnType->IsFlow(), "Expected either stream or flow as return type");
  4796. TCallableBuilder builder(Env, callableName, returnType);
  4797. builder.Add(input);
  4798. TVector<TRuntimeNode> keyNodes;
  4799. for (const auto& key : keys) {
  4800. keyNodes.push_back(NewDataLiteral<ui32>(key));
  4801. }
  4802. builder.Add(NewTuple(keyNodes));
  4803. TVector<TRuntimeNode> aggsNodes;
  4804. for (const auto& agg : aggs) {
  4805. TVector<TRuntimeNode> params;
  4806. params.push_back(NewDataLiteral<NUdf::EDataSlot::String>(agg.Name));
  4807. for (const auto& col : agg.ArgsColumns) {
  4808. params.push_back(NewDataLiteral<ui32>(col));
  4809. }
  4810. aggsNodes.push_back(NewTuple(params));
  4811. }
  4812. builder.Add(NewTuple(aggsNodes));
  4813. builder.Add(NewDataLiteral<ui32>(streamIndex));
  4814. TVector<TRuntimeNode> streamsNodes;
  4815. for (const auto& s : streams) {
  4816. TVector<TRuntimeNode> streamNodes;
  4817. for (const auto& i : s) {
  4818. streamNodes.push_back(NewDataLiteral<ui32>(i));
  4819. }
  4820. streamsNodes.push_back(NewTuple(streamNodes));
  4821. }
  4822. builder.Add(NewTuple(streamsNodes));
  4823. return TRuntimeNode(builder.Build(), false);
  4824. }
  4825. TRuntimeNode TProgramBuilder::BlockMergeManyFinalizeHashed(TRuntimeNode stream, const TArrayRef<ui32>& keys,
  4826. const TArrayRef<const TAggInfo>& aggs, ui32 streamIndex, const TVector<TVector<ui32>>& streams, TType* returnType) {
  4827. if constexpr (RuntimeVersion < 31U) {
  4828. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4829. }
  4830. MKQL_ENSURE(stream.GetStaticType()->IsStream(), "Expected stream as input type");
  4831. MKQL_ENSURE(returnType->IsStream(), "Expected stream as return type");
  4832. if constexpr (RuntimeVersion < 52U) {
  4833. const auto flowReturnType = NewFlowType(AS_TYPE(TStreamType, returnType)->GetItemType());
  4834. return FromFlow(BuildBlockMergeManyFinalizeHashed(__func__, ToFlow(stream), keys, aggs, streamIndex, streams, flowReturnType));
  4835. } else {
  4836. return BuildBlockMergeManyFinalizeHashed(__func__, stream, keys, aggs, streamIndex, streams, returnType);
  4837. }
  4838. }
  4839. TRuntimeNode TProgramBuilder::ScalarApply(const TArrayRef<const TRuntimeNode>& args, const TArrayLambda& handler) {
  4840. if constexpr (RuntimeVersion < 39U) {
  4841. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4842. }
  4843. MKQL_ENSURE(!args.empty(), "Required at least one argument");
  4844. TVector<TRuntimeNode> lambdaArgs;
  4845. bool scalarOnly = true;
  4846. std::shared_ptr<arrow::DataType> arrowType;
  4847. for (const auto& arg : args) {
  4848. auto blockType = AS_TYPE(TBlockType, arg.GetStaticType());
  4849. scalarOnly = scalarOnly && blockType->GetShape() == TBlockType::EShape::Scalar;
  4850. MKQL_ENSURE(ConvertArrowType(blockType->GetItemType(), arrowType), "Unsupported arrow type");
  4851. lambdaArgs.emplace_back(Arg(blockType->GetItemType()));
  4852. }
  4853. auto ret = handler(lambdaArgs);
  4854. MKQL_ENSURE(ConvertArrowType(ret.GetStaticType(), arrowType), "Unsupported arrow type");
  4855. auto returnType = NewBlockType(ret.GetStaticType(), scalarOnly ? TBlockType::EShape::Scalar : TBlockType::EShape::Many);
  4856. TCallableBuilder builder(Env, __func__, returnType);
  4857. for (const auto& arg : args) {
  4858. builder.Add(arg);
  4859. }
  4860. for (const auto& arg : lambdaArgs) {
  4861. builder.Add(arg);
  4862. }
  4863. builder.Add(ret);
  4864. return TRuntimeNode(builder.Build(), false);
  4865. }
  4866. TRuntimeNode TProgramBuilder::BlockMapJoinCore(TRuntimeNode stream, TRuntimeNode dict,
  4867. EJoinKind joinKind, const TArrayRef<const ui32>& leftKeyColumns,
  4868. const TArrayRef<const ui32>& leftKeyDrops, TType* returnType
  4869. ) {
  4870. if constexpr (RuntimeVersion < 51U) {
  4871. THROW yexception() << "Runtime version (" << RuntimeVersion << ") too old for " << __func__;
  4872. }
  4873. MKQL_ENSURE(joinKind == EJoinKind::Inner || joinKind == EJoinKind::Left ||
  4874. joinKind == EJoinKind::LeftSemi || joinKind == EJoinKind::LeftOnly,
  4875. "Unsupported join kind");
  4876. MKQL_ENSURE(!leftKeyColumns.empty(), "At least one key column must be specified");
  4877. const THashSet<ui32> leftKeySet(leftKeyColumns.cbegin(), leftKeyColumns.cend());
  4878. for (const auto& drop : leftKeyDrops) {
  4879. MKQL_ENSURE(leftKeySet.contains(drop),
  4880. "Only key columns has to be specified in drop column set");
  4881. }
  4882. TRuntimeNode::TList leftKeyColumnsNodes;
  4883. leftKeyColumnsNodes.reserve(leftKeyColumns.size());
  4884. std::transform(leftKeyColumns.cbegin(), leftKeyColumns.cend(),
  4885. std::back_inserter(leftKeyColumnsNodes), [this](const ui32 idx) {
  4886. return NewDataLiteral(idx);
  4887. });
  4888. TRuntimeNode::TList leftKeyDropsNodes;
  4889. leftKeyDropsNodes.reserve(leftKeyDrops.size());
  4890. std::transform(leftKeyDrops.cbegin(), leftKeyDrops.cend(),
  4891. std::back_inserter(leftKeyDropsNodes), [this](const ui32 idx) {
  4892. return NewDataLiteral(idx);
  4893. });
  4894. TCallableBuilder callableBuilder(Env, __func__, returnType);
  4895. callableBuilder.Add(stream);
  4896. callableBuilder.Add(dict);
  4897. callableBuilder.Add(NewDataLiteral((ui32)joinKind));
  4898. callableBuilder.Add(NewTuple(leftKeyColumnsNodes));
  4899. callableBuilder.Add(NewTuple(leftKeyDropsNodes));
  4900. return TRuntimeNode(callableBuilder.Build(), false);
  4901. }
  4902. namespace {
  4903. using namespace NYql::NMatchRecognize;
  4904. TRuntimeNode PatternToRuntimeNode(const TRowPattern& pattern, const TProgramBuilder& programBuilder) {
  4905. const auto& env = programBuilder.GetTypeEnvironment();
  4906. TTupleLiteralBuilder patternBuilder(env);
  4907. for (const auto& term: pattern) {
  4908. TTupleLiteralBuilder termBuilder(env);
  4909. for (const auto& factor: term) {
  4910. TTupleLiteralBuilder factorBuilder(env);
  4911. factorBuilder.Add(factor.Primary.index() == 0 ?
  4912. programBuilder.NewDataLiteral<NUdf::EDataSlot::String>(std::get<0>(factor.Primary)) :
  4913. PatternToRuntimeNode(std::get<1>(factor.Primary), programBuilder)
  4914. );
  4915. factorBuilder.Add(programBuilder.NewDataLiteral<ui64>(factor.QuantityMin));
  4916. factorBuilder.Add(programBuilder.NewDataLiteral<ui64>(factor.QuantityMax));
  4917. factorBuilder.Add(programBuilder.NewDataLiteral<bool>(factor.Greedy));
  4918. factorBuilder.Add(programBuilder.NewDataLiteral<bool>(factor.Output));
  4919. factorBuilder.Add(programBuilder.NewDataLiteral<bool>(factor.Unused));
  4920. termBuilder.Add({factorBuilder.Build(), true});
  4921. }
  4922. patternBuilder.Add({termBuilder.Build(), true});
  4923. }
  4924. return {patternBuilder.Build(), true};
  4925. };
  4926. } //namespace
  4927. TRuntimeNode TProgramBuilder::MatchRecognizeCore(
  4928. TRuntimeNode inputStream,
  4929. const TUnaryLambda& getPartitionKeySelectorNode,
  4930. const TArrayRef<TStringBuf>& partitionColumns,
  4931. const TArrayRef<std::pair<TStringBuf, TBinaryLambda>>& getMeasures,
  4932. const NYql::NMatchRecognize::TRowPattern& pattern,
  4933. const TArrayRef<std::pair<TStringBuf, TTernaryLambda>>& getDefines,
  4934. bool streamingMode,
  4935. const NYql::NMatchRecognize::TAfterMatchSkipTo& skipTo
  4936. ) {
  4937. MKQL_ENSURE(RuntimeVersion >= 42, "MatchRecognize is not supported in runtime version " << RuntimeVersion);
  4938. const auto inputRowType = AS_TYPE(TStructType, AS_TYPE(TFlowType, inputStream.GetStaticType())->GetItemType());
  4939. const auto inputRowArg = Arg(inputRowType);
  4940. const auto partitionKeySelectorNode = getPartitionKeySelectorNode(inputRowArg);
  4941. TStructTypeBuilder indexRangeTypeBuilder(Env);
  4942. indexRangeTypeBuilder.Add("From", TDataType::Create(NUdf::TDataType<ui64>::Id, Env));
  4943. indexRangeTypeBuilder.Add("To", TDataType::Create(NUdf::TDataType<ui64>::Id, Env));
  4944. const auto& rangeList = TListType::Create(indexRangeTypeBuilder.Build(), Env);
  4945. TStructTypeBuilder matchedVarsTypeBuilder(Env);
  4946. for (const auto& var: GetPatternVars(pattern)) {
  4947. matchedVarsTypeBuilder.Add(var, rangeList);
  4948. }
  4949. TRuntimeNode matchedVarsArg = Arg(matchedVarsTypeBuilder.Build());
  4950. //---These vars may be empty in case of no measures
  4951. TRuntimeNode measureInputDataArg;
  4952. std::vector<TRuntimeNode> specialColumnIndexesInMeasureInputDataRow;
  4953. TVector<TRuntimeNode> measures;
  4954. TVector<TType*> measureTypes;
  4955. //---
  4956. if (getMeasures.empty()) {
  4957. measureInputDataArg = Arg(Env.GetTypeOfVoidLazy());
  4958. } else {
  4959. using NYql::NMatchRecognize::EMeasureInputDataSpecialColumns;
  4960. measures.reserve(getMeasures.size());
  4961. measureTypes.reserve(getMeasures.size());
  4962. specialColumnIndexesInMeasureInputDataRow.resize(static_cast<size_t>(NYql::NMatchRecognize::EMeasureInputDataSpecialColumns::Last));
  4963. TStructTypeBuilder measureInputDataRowTypeBuilder(Env);
  4964. for (ui32 i = 0; i != inputRowType->GetMembersCount(); ++i) {
  4965. measureInputDataRowTypeBuilder.Add(inputRowType->GetMemberName(i), inputRowType->GetMemberType(i));
  4966. }
  4967. measureInputDataRowTypeBuilder.Add(
  4968. MeasureInputDataSpecialColumnName(EMeasureInputDataSpecialColumns::Classifier),
  4969. TDataType::Create(NUdf::TDataType<NYql::NUdf::TUtf8>::Id, Env)
  4970. );
  4971. measureInputDataRowTypeBuilder.Add(
  4972. MeasureInputDataSpecialColumnName(EMeasureInputDataSpecialColumns::MatchNumber),
  4973. TDataType::Create(NUdf::TDataType<ui64>::Id, Env)
  4974. );
  4975. const auto measureInputDataRowType = measureInputDataRowTypeBuilder.Build();
  4976. for (ui32 i = 0; i != measureInputDataRowType->GetMembersCount(); ++i) {
  4977. //assume a few, if grows, it's better to use a lookup table here
  4978. static_assert(static_cast<size_t>(EMeasureInputDataSpecialColumns::Last) < 5);
  4979. for (size_t j = 0; j != static_cast<size_t>(EMeasureInputDataSpecialColumns::Last); ++j) {
  4980. if (measureInputDataRowType->GetMemberName(i) ==
  4981. NYql::NMatchRecognize::MeasureInputDataSpecialColumnName(static_cast<EMeasureInputDataSpecialColumns>(j)))
  4982. specialColumnIndexesInMeasureInputDataRow[j] = NewDataLiteral<ui32>(i);
  4983. }
  4984. }
  4985. measureInputDataArg = Arg(TListType::Create(measureInputDataRowType, Env));
  4986. for (size_t i = 0; i != getMeasures.size(); ++i) {
  4987. measures.push_back(getMeasures[i].second(measureInputDataArg, matchedVarsArg));
  4988. measureTypes.push_back(measures[i].GetStaticType());
  4989. }
  4990. }
  4991. TStructTypeBuilder outputRowTypeBuilder(Env);
  4992. THashMap<TStringBuf, size_t> partitionColumnLookup;
  4993. for (size_t i = 0; i != partitionColumns.size(); ++i) {
  4994. const auto& name = partitionColumns[i];
  4995. partitionColumnLookup[name] = i;
  4996. outputRowTypeBuilder.Add(
  4997. name,
  4998. AS_TYPE(TTupleType, partitionKeySelectorNode.GetStaticType())->GetElementType(i)
  4999. );
  5000. }
  5001. THashMap<TStringBuf, size_t> measureColumnLookup;
  5002. for (size_t i = 0; i != measures.size(); ++i) {
  5003. const auto& name = getMeasures[i].first;
  5004. measureColumnLookup[name] = i;
  5005. outputRowTypeBuilder.Add(
  5006. name,
  5007. measures[i].GetStaticType()
  5008. );
  5009. }
  5010. auto outputRowType = outputRowTypeBuilder.Build();
  5011. std::vector<TRuntimeNode> partitionColumnIndexes(partitionColumnLookup.size());
  5012. std::vector<TRuntimeNode> measureColumnIndexes(measureColumnLookup.size());
  5013. for (ui32 i = 0; i != outputRowType->GetMembersCount(); ++i) {
  5014. if (auto it = partitionColumnLookup.find(outputRowType->GetMemberName(i)); it != partitionColumnLookup.end()) {
  5015. partitionColumnIndexes[it->second] = NewDataLiteral<ui32>(i);
  5016. }
  5017. else if (auto it = measureColumnLookup.find(outputRowType->GetMemberName(i)); it != measureColumnLookup.end()) {
  5018. measureColumnIndexes[it->second] = NewDataLiteral<ui32>(i);
  5019. }
  5020. }
  5021. auto outputType = (TType*)TFlowType::Create(outputRowType, Env);
  5022. THashMap<TStringBuf , size_t> patternVarLookup;
  5023. for (ui32 i = 0; i != AS_TYPE(TStructType, matchedVarsArg.GetStaticType())->GetMembersCount(); ++i){
  5024. patternVarLookup[AS_TYPE(TStructType, matchedVarsArg.GetStaticType())->GetMemberName(i)] = i;
  5025. }
  5026. THashMap<TStringBuf , size_t> defineLookup;
  5027. for (size_t i = 0; i != getDefines.size(); ++i) {
  5028. defineLookup[getDefines[i].first] = i;
  5029. }
  5030. std::vector<TRuntimeNode> defineNames(patternVarLookup.size());
  5031. std::vector<TRuntimeNode> defineNodes(patternVarLookup.size());
  5032. const auto& inputDataArg = Arg(TListType::Create(inputRowType, Env));
  5033. const auto& currentRowIndexArg = Arg(TDataType::Create(NUdf::TDataType<ui64>::Id, Env));
  5034. for (const auto& [v, i]: patternVarLookup) {
  5035. defineNames[i] = NewDataLiteral<NUdf::EDataSlot::String>(v);
  5036. if (const auto it = defineLookup.find(v); it != defineLookup.end()) {
  5037. defineNodes[i] = getDefines[it->second].second(inputDataArg, matchedVarsArg, currentRowIndexArg);
  5038. }
  5039. else { //no predicate for var
  5040. if ("$" == v || "^" == v) {
  5041. //DO nothing, //will be handled in a specific way
  5042. }
  5043. else { // a var without a predicate matches any row
  5044. defineNodes[i] = NewDataLiteral<bool>(true);
  5045. }
  5046. }
  5047. }
  5048. TCallableBuilder callableBuilder(GetTypeEnvironment(), "MatchRecognizeCore", outputType);
  5049. auto indexType = TDataType::Create(NUdf::TDataType<ui32>::Id, Env);
  5050. auto indexListType = TListType::Create(indexType, Env);
  5051. callableBuilder.Add(inputStream);
  5052. callableBuilder.Add(inputRowArg);
  5053. callableBuilder.Add(partitionKeySelectorNode);
  5054. callableBuilder.Add(TRuntimeNode(TListLiteral::Create(partitionColumnIndexes.data(), partitionColumnIndexes.size(), indexListType, Env), true));
  5055. callableBuilder.Add(measureInputDataArg);
  5056. callableBuilder.Add(TRuntimeNode(TListLiteral::Create(
  5057. specialColumnIndexesInMeasureInputDataRow.data(), specialColumnIndexesInMeasureInputDataRow.size(),
  5058. indexListType, Env
  5059. ),
  5060. true));
  5061. callableBuilder.Add(NewDataLiteral<ui32>(inputRowType->GetMembersCount()));
  5062. callableBuilder.Add(matchedVarsArg);
  5063. callableBuilder.Add(TRuntimeNode(TListLiteral::Create(measureColumnIndexes.data(), measureColumnIndexes.size(), indexListType, Env), true));
  5064. for (const auto& m: measures) {
  5065. callableBuilder.Add(m);
  5066. }
  5067. callableBuilder.Add(PatternToRuntimeNode(pattern, *this));
  5068. callableBuilder.Add(currentRowIndexArg);
  5069. callableBuilder.Add(inputDataArg);
  5070. const auto stringType = NewDataType(NUdf::EDataSlot::String);
  5071. callableBuilder.Add(TRuntimeNode(TListLiteral::Create(defineNames.begin(), defineNames.size(), TListType::Create(stringType, Env), Env), true));
  5072. for (const auto& d: defineNodes) {
  5073. callableBuilder.Add(d);
  5074. }
  5075. callableBuilder.Add(NewDataLiteral(streamingMode));
  5076. if (RuntimeVersion >= 52U) {
  5077. callableBuilder.Add(NewDataLiteral(static_cast<i32>(skipTo.To)));
  5078. callableBuilder.Add(NewDataLiteral<NUdf::EDataSlot::String>(skipTo.Var));
  5079. }
  5080. return TRuntimeNode(callableBuilder.Build(), false);
  5081. }
  5082. TRuntimeNode TProgramBuilder::TimeOrderRecover(
  5083. TRuntimeNode inputStream,
  5084. const TUnaryLambda& getTimeExtractor,
  5085. TRuntimeNode delay,
  5086. TRuntimeNode ahead,
  5087. TRuntimeNode rowLimit
  5088. )
  5089. {
  5090. MKQL_ENSURE(RuntimeVersion >= 44, "TimeOrderRecover is not supported in runtime version " << RuntimeVersion);
  5091. auto& inputRowType = *static_cast<TStructType*>(AS_TYPE(TStructType, AS_TYPE(TFlowType, inputStream.GetStaticType())->GetItemType()));
  5092. const auto inputRowArg = Arg(&inputRowType);
  5093. TStructTypeBuilder outputRowTypeBuilder(Env);
  5094. outputRowTypeBuilder.Reserve(inputRowType.GetMembersCount() + 1);
  5095. const ui32 inputRowColumnCount = inputRowType.GetMembersCount();
  5096. for (ui32 i = 0; i != inputRowColumnCount; ++i) {
  5097. outputRowTypeBuilder.Add(inputRowType.GetMemberName(i), inputRowType.GetMemberType(i));
  5098. }
  5099. using NYql::NTimeOrderRecover::OUT_OF_ORDER_MARKER;
  5100. outputRowTypeBuilder.Add(OUT_OF_ORDER_MARKER, TDataType::Create(NUdf::TDataType<bool>::Id, Env));
  5101. const auto outputRowType = outputRowTypeBuilder.Build();
  5102. const auto outOfOrderColumnIndex = outputRowType->GetMemberIndex(OUT_OF_ORDER_MARKER);
  5103. TCallableBuilder callableBuilder(GetTypeEnvironment(), "TimeOrderRecover", TFlowType::Create(outputRowType, Env));
  5104. callableBuilder.Add(inputStream);
  5105. callableBuilder.Add(inputRowArg);
  5106. callableBuilder.Add(getTimeExtractor(inputRowArg));
  5107. callableBuilder.Add(NewDataLiteral(inputRowColumnCount));
  5108. callableBuilder.Add(NewDataLiteral(outOfOrderColumnIndex));
  5109. callableBuilder.Add(delay),
  5110. callableBuilder.Add(ahead),
  5111. callableBuilder.Add(rowLimit);
  5112. return TRuntimeNode(callableBuilder.Build(), false);
  5113. }
  5114. bool CanExportType(TType* type, const TTypeEnvironment& env) {
  5115. if (type->GetKind() == TType::EKind::Type) {
  5116. return false; // Type of Type
  5117. }
  5118. TExploringNodeVisitor explorer;
  5119. explorer.Walk(type, env);
  5120. bool canExport = true;
  5121. for (auto& node : explorer.GetNodes()) {
  5122. switch (static_cast<TType*>(node)->GetKind()) {
  5123. case TType::EKind::Void:
  5124. node->SetCookie(1);
  5125. break;
  5126. case TType::EKind::Data:
  5127. node->SetCookie(1);
  5128. break;
  5129. case TType::EKind::Pg:
  5130. node->SetCookie(1);
  5131. break;
  5132. case TType::EKind::Optional: {
  5133. auto optionalType = static_cast<TOptionalType*>(node);
  5134. if (!optionalType->GetItemType()->GetCookie()) {
  5135. canExport = false;
  5136. } else {
  5137. node->SetCookie(1);
  5138. }
  5139. break;
  5140. }
  5141. case TType::EKind::List: {
  5142. auto listType = static_cast<TListType*>(node);
  5143. if (!listType->GetItemType()->GetCookie()) {
  5144. canExport = false;
  5145. } else {
  5146. node->SetCookie(1);
  5147. }
  5148. break;
  5149. }
  5150. case TType::EKind::Struct: {
  5151. auto structType = static_cast<TStructType*>(node);
  5152. for (ui32 index = 0; index < structType->GetMembersCount(); ++index) {
  5153. if (!structType->GetMemberType(index)->GetCookie()) {
  5154. canExport = false;
  5155. break;
  5156. }
  5157. }
  5158. if (canExport) {
  5159. node->SetCookie(1);
  5160. }
  5161. break;
  5162. }
  5163. case TType::EKind::Tuple: {
  5164. auto tupleType = static_cast<TTupleType*>(node);
  5165. for (ui32 index = 0; index < tupleType->GetElementsCount(); ++index) {
  5166. if (!tupleType->GetElementType(index)->GetCookie()) {
  5167. canExport = false;
  5168. break;
  5169. }
  5170. }
  5171. if (canExport) {
  5172. node->SetCookie(1);
  5173. }
  5174. break;
  5175. }
  5176. case TType::EKind::Dict: {
  5177. auto dictType = static_cast<TDictType*>(node);
  5178. if (!dictType->GetKeyType()->GetCookie() || !dictType->GetPayloadType()->GetCookie()) {
  5179. canExport = false;
  5180. } else {
  5181. node->SetCookie(1);
  5182. }
  5183. break;
  5184. }
  5185. case TType::EKind::Variant: {
  5186. auto variantType = static_cast<TVariantType*>(node);
  5187. TType* innerType = variantType->GetUnderlyingType();
  5188. if (innerType->IsStruct()) {
  5189. auto structType = static_cast<TStructType*>(innerType);
  5190. for (ui32 index = 0; index < structType->GetMembersCount(); ++index) {
  5191. if (!structType->GetMemberType(index)->GetCookie()) {
  5192. canExport = false;
  5193. break;
  5194. }
  5195. }
  5196. }
  5197. if (innerType->IsTuple()) {
  5198. auto tupleType = static_cast<TTupleType*>(innerType);
  5199. for (ui32 index = 0; index < tupleType->GetElementsCount(); ++index) {
  5200. if (!tupleType->GetElementType(index)->GetCookie()) {
  5201. canExport = false;
  5202. break;
  5203. }
  5204. }
  5205. }
  5206. if (canExport) {
  5207. node->SetCookie(1);
  5208. }
  5209. break;
  5210. }
  5211. case TType::EKind::Type:
  5212. break;
  5213. default:
  5214. canExport = false;
  5215. }
  5216. if (!canExport) {
  5217. break;
  5218. }
  5219. }
  5220. for (auto& node : explorer.GetNodes()) {
  5221. node->SetCookie(0);
  5222. }
  5223. return canExport;
  5224. }
  5225. }
  5226. }