isl_scheduler.c 228 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661
  1. /*
  2. * Copyright 2011 INRIA Saclay
  3. * Copyright 2012-2014 Ecole Normale Superieure
  4. * Copyright 2015-2016 Sven Verdoolaege
  5. * Copyright 2016 INRIA Paris
  6. * Copyright 2017 Sven Verdoolaege
  7. *
  8. * Use of this software is governed by the MIT license
  9. *
  10. * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France,
  11. * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod,
  12. * 91893 Orsay, France
  13. * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France
  14. * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12,
  15. * CS 42112, 75589 Paris Cedex 12, France
  16. */
  17. #include <isl_ctx_private.h>
  18. #include <isl_map_private.h>
  19. #include <isl_space_private.h>
  20. #include <isl_aff_private.h>
  21. #include <isl/hash.h>
  22. #include <isl/id.h>
  23. #include <isl/constraint.h>
  24. #include <isl/schedule.h>
  25. #include <isl_schedule_constraints.h>
  26. #include <isl/schedule_node.h>
  27. #include <isl_mat_private.h>
  28. #include <isl_vec_private.h>
  29. #include <isl/set.h>
  30. #include <isl_union_set_private.h>
  31. #include <isl_seq.h>
  32. #include <isl_tab.h>
  33. #include <isl_dim_map.h>
  34. #include <isl/map_to_basic_set.h>
  35. #include <isl_sort.h>
  36. #include <isl_options_private.h>
  37. #include <isl_tarjan.h>
  38. #include <isl_morph.h>
  39. #include <isl/ilp.h>
  40. #include <isl_val_private.h>
  41. /*
  42. * The scheduling algorithm implemented in this file was inspired by
  43. * Bondhugula et al., "Automatic Transformations for Communication-Minimized
  44. * Parallelization and Locality Optimization in the Polyhedral Model".
  45. *
  46. * For a detailed description of the variant implemented in isl,
  47. * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017).
  48. */
  49. /* Internal information about a node that is used during the construction
  50. * of a schedule.
  51. * space represents the original space in which the domain lives;
  52. * that is, the space is not affected by compression
  53. * sched is a matrix representation of the schedule being constructed
  54. * for this node; if compressed is set, then this schedule is
  55. * defined over the compressed domain space
  56. * sched_map is an isl_map representation of the same (partial) schedule
  57. * sched_map may be NULL; if compressed is set, then this map
  58. * is defined over the uncompressed domain space
  59. * rank is the number of linearly independent rows in the linear part
  60. * of sched
  61. * the rows of "vmap" represent a change of basis for the node
  62. * variables; the first rank rows span the linear part of
  63. * the schedule rows; the remaining rows are linearly independent
  64. * the rows of "indep" represent linear combinations of the schedule
  65. * coefficients that are non-zero when the schedule coefficients are
  66. * linearly independent of previously computed schedule rows.
  67. * start is the first variable in the LP problem in the sequences that
  68. * represents the schedule coefficients of this node
  69. * nvar is the dimension of the (compressed) domain
  70. * nparam is the number of parameters or 0 if we are not constructing
  71. * a parametric schedule
  72. *
  73. * If compressed is set, then hull represents the constraints
  74. * that were used to derive the compression, while compress and
  75. * decompress map the original space to the compressed space and
  76. * vice versa.
  77. *
  78. * scc is the index of SCC (or WCC) this node belongs to
  79. *
  80. * "cluster" is only used inside extract_clusters and identifies
  81. * the cluster of SCCs that the node belongs to.
  82. *
  83. * coincident contains a boolean for each of the rows of the schedule,
  84. * indicating whether the corresponding scheduling dimension satisfies
  85. * the coincidence constraints in the sense that the corresponding
  86. * dependence distances are zero.
  87. *
  88. * If the schedule_treat_coalescing option is set, then
  89. * "sizes" contains the sizes of the (compressed) instance set
  90. * in each direction. If there is no fixed size in a given direction,
  91. * then the corresponding size value is set to infinity.
  92. * If the schedule_treat_coalescing option or the schedule_max_coefficient
  93. * option is set, then "max" contains the maximal values for
  94. * schedule coefficients of the (compressed) variables. If no bound
  95. * needs to be imposed on a particular variable, then the corresponding
  96. * value is negative.
  97. * If not NULL, then "bounds" contains a non-parametric set
  98. * in the compressed space that is bounded by the size in each direction.
  99. */
  100. struct isl_sched_node {
  101. isl_space *space;
  102. int compressed;
  103. isl_set *hull;
  104. isl_multi_aff *compress;
  105. isl_pw_multi_aff *decompress;
  106. isl_mat *sched;
  107. isl_map *sched_map;
  108. int rank;
  109. isl_mat *indep;
  110. isl_mat *vmap;
  111. int start;
  112. int nvar;
  113. int nparam;
  114. int scc;
  115. int cluster;
  116. int *coincident;
  117. isl_multi_val *sizes;
  118. isl_basic_set *bounds;
  119. isl_vec *max;
  120. };
  121. static isl_bool node_has_tuples(const void *entry, const void *val)
  122. {
  123. struct isl_sched_node *node = (struct isl_sched_node *)entry;
  124. isl_space *space = (isl_space *) val;
  125. return isl_space_has_equal_tuples(node->space, space);
  126. }
  127. static int node_scc_exactly(struct isl_sched_node *node, int scc)
  128. {
  129. return node->scc == scc;
  130. }
  131. static int node_scc_at_most(struct isl_sched_node *node, int scc)
  132. {
  133. return node->scc <= scc;
  134. }
  135. static int node_scc_at_least(struct isl_sched_node *node, int scc)
  136. {
  137. return node->scc >= scc;
  138. }
  139. /* An edge in the dependence graph. An edge may be used to
  140. * ensure validity of the generated schedule, to minimize the dependence
  141. * distance or both
  142. *
  143. * map is the dependence relation, with i -> j in the map if j depends on i
  144. * tagged_condition and tagged_validity contain the union of all tagged
  145. * condition or conditional validity dependence relations that
  146. * specialize the dependence relation "map"; that is,
  147. * if (i -> a) -> (j -> b) is an element of "tagged_condition"
  148. * or "tagged_validity", then i -> j is an element of "map".
  149. * If these fields are NULL, then they represent the empty relation.
  150. * src is the source node
  151. * dst is the sink node
  152. *
  153. * types is a bit vector containing the types of this edge.
  154. * validity is set if the edge is used to ensure correctness
  155. * coincidence is used to enforce zero dependence distances
  156. * proximity is set if the edge is used to minimize dependence distances
  157. * condition is set if the edge represents a condition
  158. * for a conditional validity schedule constraint
  159. * local can only be set for condition edges and indicates that
  160. * the dependence distance over the edge should be zero
  161. * conditional_validity is set if the edge is used to conditionally
  162. * ensure correctness
  163. *
  164. * For validity edges, start and end mark the sequence of inequality
  165. * constraints in the LP problem that encode the validity constraint
  166. * corresponding to this edge.
  167. *
  168. * During clustering, an edge may be marked "no_merge" if it should
  169. * not be used to merge clusters.
  170. * The weight is also only used during clustering and it is
  171. * an indication of how many schedule dimensions on either side
  172. * of the schedule constraints can be aligned.
  173. * If the weight is negative, then this means that this edge was postponed
  174. * by has_bounded_distances or any_no_merge. The original weight can
  175. * be retrieved by adding 1 + graph->max_weight, with "graph"
  176. * the graph containing this edge.
  177. */
  178. struct isl_sched_edge {
  179. isl_map *map;
  180. isl_union_map *tagged_condition;
  181. isl_union_map *tagged_validity;
  182. struct isl_sched_node *src;
  183. struct isl_sched_node *dst;
  184. unsigned types;
  185. int start;
  186. int end;
  187. int no_merge;
  188. int weight;
  189. };
  190. /* Is "edge" marked as being of type "type"?
  191. */
  192. static int is_type(struct isl_sched_edge *edge, enum isl_edge_type type)
  193. {
  194. return ISL_FL_ISSET(edge->types, 1 << type);
  195. }
  196. /* Mark "edge" as being of type "type".
  197. */
  198. static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type)
  199. {
  200. ISL_FL_SET(edge->types, 1 << type);
  201. }
  202. /* No longer mark "edge" as being of type "type"?
  203. */
  204. static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type)
  205. {
  206. ISL_FL_CLR(edge->types, 1 << type);
  207. }
  208. /* Is "edge" marked as a validity edge?
  209. */
  210. static int is_validity(struct isl_sched_edge *edge)
  211. {
  212. return is_type(edge, isl_edge_validity);
  213. }
  214. /* Mark "edge" as a validity edge.
  215. */
  216. static void set_validity(struct isl_sched_edge *edge)
  217. {
  218. set_type(edge, isl_edge_validity);
  219. }
  220. /* Is "edge" marked as a proximity edge?
  221. */
  222. static int is_proximity(struct isl_sched_edge *edge)
  223. {
  224. return is_type(edge, isl_edge_proximity);
  225. }
  226. /* Is "edge" marked as a local edge?
  227. */
  228. static int is_local(struct isl_sched_edge *edge)
  229. {
  230. return is_type(edge, isl_edge_local);
  231. }
  232. /* Mark "edge" as a local edge.
  233. */
  234. static void set_local(struct isl_sched_edge *edge)
  235. {
  236. set_type(edge, isl_edge_local);
  237. }
  238. /* No longer mark "edge" as a local edge.
  239. */
  240. static void clear_local(struct isl_sched_edge *edge)
  241. {
  242. clear_type(edge, isl_edge_local);
  243. }
  244. /* Is "edge" marked as a coincidence edge?
  245. */
  246. static int is_coincidence(struct isl_sched_edge *edge)
  247. {
  248. return is_type(edge, isl_edge_coincidence);
  249. }
  250. /* Is "edge" marked as a condition edge?
  251. */
  252. static int is_condition(struct isl_sched_edge *edge)
  253. {
  254. return is_type(edge, isl_edge_condition);
  255. }
  256. /* Is "edge" marked as a conditional validity edge?
  257. */
  258. static int is_conditional_validity(struct isl_sched_edge *edge)
  259. {
  260. return is_type(edge, isl_edge_conditional_validity);
  261. }
  262. /* Is "edge" of a type that can appear multiple times between
  263. * the same pair of nodes?
  264. *
  265. * Condition edges and conditional validity edges may have tagged
  266. * dependence relations, in which case an edge is added for each
  267. * pair of tags.
  268. */
  269. static int is_multi_edge_type(struct isl_sched_edge *edge)
  270. {
  271. return is_condition(edge) || is_conditional_validity(edge);
  272. }
  273. /* Internal information about the dependence graph used during
  274. * the construction of the schedule.
  275. *
  276. * intra_hmap is a cache, mapping dependence relations to their dual,
  277. * for dependences from a node to itself, possibly without
  278. * coefficients for the parameters
  279. * intra_hmap_param is a cache, mapping dependence relations to their dual,
  280. * for dependences from a node to itself, including coefficients
  281. * for the parameters
  282. * inter_hmap is a cache, mapping dependence relations to their dual,
  283. * for dependences between distinct nodes
  284. * if compression is involved then the key for these maps
  285. * is the original, uncompressed dependence relation, while
  286. * the value is the dual of the compressed dependence relation.
  287. *
  288. * n is the number of nodes
  289. * node is the list of nodes
  290. * maxvar is the maximal number of variables over all nodes
  291. * max_row is the allocated number of rows in the schedule
  292. * n_row is the current (maximal) number of linearly independent
  293. * rows in the node schedules
  294. * n_total_row is the current number of rows in the node schedules
  295. * band_start is the starting row in the node schedules of the current band
  296. * root is set to the original dependence graph from which this graph
  297. * is derived through splitting. If this graph is not the result of
  298. * splitting, then the root field points to the graph itself.
  299. *
  300. * sorted contains a list of node indices sorted according to the
  301. * SCC to which a node belongs
  302. *
  303. * n_edge is the number of edges
  304. * edge is the list of edges
  305. * max_edge contains the maximal number of edges of each type;
  306. * in particular, it contains the number of edges in the inital graph.
  307. * edge_table contains pointers into the edge array, hashed on the source
  308. * and sink spaces; there is one such table for each type;
  309. * a given edge may be referenced from more than one table
  310. * if the corresponding relation appears in more than one of the
  311. * sets of dependences; however, for each type there is only
  312. * a single edge between a given pair of source and sink space
  313. * in the entire graph
  314. *
  315. * node_table contains pointers into the node array, hashed on the space tuples
  316. *
  317. * region contains a list of variable sequences that should be non-trivial
  318. *
  319. * lp contains the (I)LP problem used to obtain new schedule rows
  320. *
  321. * src_scc and dst_scc are the source and sink SCCs of an edge with
  322. * conflicting constraints
  323. *
  324. * scc represents the number of components
  325. * weak is set if the components are weakly connected
  326. *
  327. * max_weight is used during clustering and represents the maximal
  328. * weight of the relevant proximity edges.
  329. */
  330. struct isl_sched_graph {
  331. isl_map_to_basic_set *intra_hmap;
  332. isl_map_to_basic_set *intra_hmap_param;
  333. isl_map_to_basic_set *inter_hmap;
  334. struct isl_sched_node *node;
  335. int n;
  336. int maxvar;
  337. int max_row;
  338. int n_row;
  339. int *sorted;
  340. int n_total_row;
  341. int band_start;
  342. struct isl_sched_graph *root;
  343. struct isl_sched_edge *edge;
  344. int n_edge;
  345. int max_edge[isl_edge_last + 1];
  346. struct isl_hash_table *edge_table[isl_edge_last + 1];
  347. struct isl_hash_table *node_table;
  348. struct isl_trivial_region *region;
  349. isl_basic_set *lp;
  350. int src_scc;
  351. int dst_scc;
  352. int scc;
  353. int weak;
  354. int max_weight;
  355. };
  356. /* Initialize node_table based on the list of nodes.
  357. */
  358. static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph)
  359. {
  360. int i;
  361. graph->node_table = isl_hash_table_alloc(ctx, graph->n);
  362. if (!graph->node_table)
  363. return -1;
  364. for (i = 0; i < graph->n; ++i) {
  365. struct isl_hash_table_entry *entry;
  366. uint32_t hash;
  367. hash = isl_space_get_tuple_hash(graph->node[i].space);
  368. entry = isl_hash_table_find(ctx, graph->node_table, hash,
  369. &node_has_tuples,
  370. graph->node[i].space, 1);
  371. if (!entry)
  372. return -1;
  373. entry->data = &graph->node[i];
  374. }
  375. return 0;
  376. }
  377. /* Return a pointer to the node that lives within the given space,
  378. * an invalid node if there is no such node, or NULL in case of error.
  379. */
  380. static struct isl_sched_node *graph_find_node(isl_ctx *ctx,
  381. struct isl_sched_graph *graph, __isl_keep isl_space *space)
  382. {
  383. struct isl_hash_table_entry *entry;
  384. uint32_t hash;
  385. if (!space)
  386. return NULL;
  387. hash = isl_space_get_tuple_hash(space);
  388. entry = isl_hash_table_find(ctx, graph->node_table, hash,
  389. &node_has_tuples, space, 0);
  390. if (!entry)
  391. return NULL;
  392. if (entry == isl_hash_table_entry_none)
  393. return graph->node + graph->n;
  394. return entry->data;
  395. }
  396. /* Is "node" a node in "graph"?
  397. */
  398. static int is_node(struct isl_sched_graph *graph,
  399. struct isl_sched_node *node)
  400. {
  401. return node && node >= &graph->node[0] && node < &graph->node[graph->n];
  402. }
  403. static isl_bool edge_has_src_and_dst(const void *entry, const void *val)
  404. {
  405. const struct isl_sched_edge *edge = entry;
  406. const struct isl_sched_edge *temp = val;
  407. return isl_bool_ok(edge->src == temp->src && edge->dst == temp->dst);
  408. }
  409. /* Add the given edge to graph->edge_table[type].
  410. */
  411. static isl_stat graph_edge_table_add(isl_ctx *ctx,
  412. struct isl_sched_graph *graph, enum isl_edge_type type,
  413. struct isl_sched_edge *edge)
  414. {
  415. struct isl_hash_table_entry *entry;
  416. uint32_t hash;
  417. hash = isl_hash_init();
  418. hash = isl_hash_builtin(hash, edge->src);
  419. hash = isl_hash_builtin(hash, edge->dst);
  420. entry = isl_hash_table_find(ctx, graph->edge_table[type], hash,
  421. &edge_has_src_and_dst, edge, 1);
  422. if (!entry)
  423. return isl_stat_error;
  424. entry->data = edge;
  425. return isl_stat_ok;
  426. }
  427. /* Add "edge" to all relevant edge tables.
  428. * That is, for every type of the edge, add it to the corresponding table.
  429. */
  430. static isl_stat graph_edge_tables_add(isl_ctx *ctx,
  431. struct isl_sched_graph *graph, struct isl_sched_edge *edge)
  432. {
  433. enum isl_edge_type t;
  434. for (t = isl_edge_first; t <= isl_edge_last; ++t) {
  435. if (!is_type(edge, t))
  436. continue;
  437. if (graph_edge_table_add(ctx, graph, t, edge) < 0)
  438. return isl_stat_error;
  439. }
  440. return isl_stat_ok;
  441. }
  442. /* Allocate the edge_tables based on the maximal number of edges of
  443. * each type.
  444. */
  445. static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph)
  446. {
  447. int i;
  448. for (i = 0; i <= isl_edge_last; ++i) {
  449. graph->edge_table[i] = isl_hash_table_alloc(ctx,
  450. graph->max_edge[i]);
  451. if (!graph->edge_table[i])
  452. return -1;
  453. }
  454. return 0;
  455. }
  456. /* If graph->edge_table[type] contains an edge from the given source
  457. * to the given destination, then return the hash table entry of this edge.
  458. * Otherwise, return NULL.
  459. */
  460. static struct isl_hash_table_entry *graph_find_edge_entry(
  461. struct isl_sched_graph *graph,
  462. enum isl_edge_type type,
  463. struct isl_sched_node *src, struct isl_sched_node *dst)
  464. {
  465. isl_ctx *ctx = isl_space_get_ctx(src->space);
  466. uint32_t hash;
  467. struct isl_sched_edge temp = { .src = src, .dst = dst };
  468. hash = isl_hash_init();
  469. hash = isl_hash_builtin(hash, temp.src);
  470. hash = isl_hash_builtin(hash, temp.dst);
  471. return isl_hash_table_find(ctx, graph->edge_table[type], hash,
  472. &edge_has_src_and_dst, &temp, 0);
  473. }
  474. /* If graph->edge_table[type] contains an edge from the given source
  475. * to the given destination, then return this edge.
  476. * Return "none" if no such edge can be found.
  477. * Return NULL on error.
  478. */
  479. static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph,
  480. enum isl_edge_type type,
  481. struct isl_sched_node *src, struct isl_sched_node *dst,
  482. struct isl_sched_edge *none)
  483. {
  484. struct isl_hash_table_entry *entry;
  485. entry = graph_find_edge_entry(graph, type, src, dst);
  486. if (!entry)
  487. return NULL;
  488. if (entry == isl_hash_table_entry_none)
  489. return none;
  490. return entry->data;
  491. }
  492. /* Check whether the dependence graph has an edge of the given type
  493. * between the given two nodes.
  494. */
  495. static isl_bool graph_has_edge(struct isl_sched_graph *graph,
  496. enum isl_edge_type type,
  497. struct isl_sched_node *src, struct isl_sched_node *dst)
  498. {
  499. struct isl_sched_edge dummy;
  500. struct isl_sched_edge *edge;
  501. isl_bool empty;
  502. edge = graph_find_edge(graph, type, src, dst, &dummy);
  503. if (!edge)
  504. return isl_bool_error;
  505. if (edge == &dummy)
  506. return isl_bool_false;
  507. empty = isl_map_plain_is_empty(edge->map);
  508. return isl_bool_not(empty);
  509. }
  510. /* Look for any edge with the same src, dst and map fields as "model".
  511. *
  512. * Return the matching edge if one can be found.
  513. * Return "model" if no matching edge is found.
  514. * Return NULL on error.
  515. */
  516. static struct isl_sched_edge *graph_find_matching_edge(
  517. struct isl_sched_graph *graph, struct isl_sched_edge *model)
  518. {
  519. enum isl_edge_type i;
  520. struct isl_sched_edge *edge;
  521. for (i = isl_edge_first; i <= isl_edge_last; ++i) {
  522. int is_equal;
  523. edge = graph_find_edge(graph, i, model->src, model->dst, model);
  524. if (!edge)
  525. return NULL;
  526. if (edge == model)
  527. continue;
  528. is_equal = isl_map_plain_is_equal(model->map, edge->map);
  529. if (is_equal < 0)
  530. return NULL;
  531. if (is_equal)
  532. return edge;
  533. }
  534. return model;
  535. }
  536. /* Remove the given edge from all the edge_tables that refer to it.
  537. */
  538. static isl_stat graph_remove_edge(struct isl_sched_graph *graph,
  539. struct isl_sched_edge *edge)
  540. {
  541. isl_ctx *ctx = isl_map_get_ctx(edge->map);
  542. enum isl_edge_type i;
  543. for (i = isl_edge_first; i <= isl_edge_last; ++i) {
  544. struct isl_hash_table_entry *entry;
  545. entry = graph_find_edge_entry(graph, i, edge->src, edge->dst);
  546. if (!entry)
  547. return isl_stat_error;
  548. if (entry == isl_hash_table_entry_none)
  549. continue;
  550. if (entry->data != edge)
  551. continue;
  552. isl_hash_table_remove(ctx, graph->edge_table[i], entry);
  553. }
  554. return isl_stat_ok;
  555. }
  556. /* Check whether the dependence graph has any edge
  557. * between the given two nodes.
  558. */
  559. static isl_bool graph_has_any_edge(struct isl_sched_graph *graph,
  560. struct isl_sched_node *src, struct isl_sched_node *dst)
  561. {
  562. enum isl_edge_type i;
  563. isl_bool r;
  564. for (i = isl_edge_first; i <= isl_edge_last; ++i) {
  565. r = graph_has_edge(graph, i, src, dst);
  566. if (r < 0 || r)
  567. return r;
  568. }
  569. return r;
  570. }
  571. /* Check whether the dependence graph has a validity edge
  572. * between the given two nodes.
  573. *
  574. * Conditional validity edges are essentially validity edges that
  575. * can be ignored if the corresponding condition edges are iteration private.
  576. * Here, we are only checking for the presence of validity
  577. * edges, so we need to consider the conditional validity edges too.
  578. * In particular, this function is used during the detection
  579. * of strongly connected components and we cannot ignore
  580. * conditional validity edges during this detection.
  581. */
  582. static isl_bool graph_has_validity_edge(struct isl_sched_graph *graph,
  583. struct isl_sched_node *src, struct isl_sched_node *dst)
  584. {
  585. isl_bool r;
  586. r = graph_has_edge(graph, isl_edge_validity, src, dst);
  587. if (r < 0 || r)
  588. return r;
  589. return graph_has_edge(graph, isl_edge_conditional_validity, src, dst);
  590. }
  591. /* Perform all the required memory allocations for a schedule graph "graph"
  592. * with "n_node" nodes and "n_edge" edge and initialize the corresponding
  593. * fields.
  594. */
  595. static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph,
  596. int n_node, int n_edge)
  597. {
  598. int i;
  599. graph->n = n_node;
  600. graph->n_edge = n_edge;
  601. graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n);
  602. graph->sorted = isl_calloc_array(ctx, int, graph->n);
  603. graph->region = isl_alloc_array(ctx,
  604. struct isl_trivial_region, graph->n);
  605. graph->edge = isl_calloc_array(ctx,
  606. struct isl_sched_edge, graph->n_edge);
  607. graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
  608. graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
  609. graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, 2 * n_edge);
  610. if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) ||
  611. !graph->sorted)
  612. return isl_stat_error;
  613. for(i = 0; i < graph->n; ++i)
  614. graph->sorted[i] = i;
  615. return isl_stat_ok;
  616. }
  617. /* Free the memory associated to node "node" in "graph".
  618. * The "coincident" field is shared by nodes in a graph and its subgraph.
  619. * It therefore only needs to be freed for the original dependence graph,
  620. * i.e., one that is not the result of splitting.
  621. */
  622. static void clear_node(struct isl_sched_graph *graph,
  623. struct isl_sched_node *node)
  624. {
  625. isl_space_free(node->space);
  626. isl_set_free(node->hull);
  627. isl_multi_aff_free(node->compress);
  628. isl_pw_multi_aff_free(node->decompress);
  629. isl_mat_free(node->sched);
  630. isl_map_free(node->sched_map);
  631. isl_mat_free(node->indep);
  632. isl_mat_free(node->vmap);
  633. if (graph->root == graph)
  634. free(node->coincident);
  635. isl_multi_val_free(node->sizes);
  636. isl_basic_set_free(node->bounds);
  637. isl_vec_free(node->max);
  638. }
  639. static void graph_free(isl_ctx *ctx, struct isl_sched_graph *graph)
  640. {
  641. int i;
  642. isl_map_to_basic_set_free(graph->intra_hmap);
  643. isl_map_to_basic_set_free(graph->intra_hmap_param);
  644. isl_map_to_basic_set_free(graph->inter_hmap);
  645. if (graph->node)
  646. for (i = 0; i < graph->n; ++i)
  647. clear_node(graph, &graph->node[i]);
  648. free(graph->node);
  649. free(graph->sorted);
  650. if (graph->edge)
  651. for (i = 0; i < graph->n_edge; ++i) {
  652. isl_map_free(graph->edge[i].map);
  653. isl_union_map_free(graph->edge[i].tagged_condition);
  654. isl_union_map_free(graph->edge[i].tagged_validity);
  655. }
  656. free(graph->edge);
  657. free(graph->region);
  658. for (i = 0; i <= isl_edge_last; ++i)
  659. isl_hash_table_free(ctx, graph->edge_table[i]);
  660. isl_hash_table_free(ctx, graph->node_table);
  661. isl_basic_set_free(graph->lp);
  662. }
  663. /* For each "set" on which this function is called, increment
  664. * graph->n by one and update graph->maxvar.
  665. */
  666. static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user)
  667. {
  668. struct isl_sched_graph *graph = user;
  669. isl_size nvar = isl_set_dim(set, isl_dim_set);
  670. graph->n++;
  671. if (nvar > graph->maxvar)
  672. graph->maxvar = nvar;
  673. isl_set_free(set);
  674. if (nvar < 0)
  675. return isl_stat_error;
  676. return isl_stat_ok;
  677. }
  678. /* Compute the number of rows that should be allocated for the schedule.
  679. * In particular, we need one row for each variable or one row
  680. * for each basic map in the dependences.
  681. * Note that it is practically impossible to exhaust both
  682. * the number of dependences and the number of variables.
  683. */
  684. static isl_stat compute_max_row(struct isl_sched_graph *graph,
  685. __isl_keep isl_schedule_constraints *sc)
  686. {
  687. int n_edge;
  688. isl_stat r;
  689. isl_union_set *domain;
  690. graph->n = 0;
  691. graph->maxvar = 0;
  692. domain = isl_schedule_constraints_get_domain(sc);
  693. r = isl_union_set_foreach_set(domain, &init_n_maxvar, graph);
  694. isl_union_set_free(domain);
  695. if (r < 0)
  696. return isl_stat_error;
  697. n_edge = isl_schedule_constraints_n_basic_map(sc);
  698. if (n_edge < 0)
  699. return isl_stat_error;
  700. graph->max_row = n_edge + graph->maxvar;
  701. return isl_stat_ok;
  702. }
  703. /* Does "bset" have any defining equalities for its set variables?
  704. */
  705. static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset)
  706. {
  707. int i;
  708. isl_size n;
  709. n = isl_basic_set_dim(bset, isl_dim_set);
  710. if (n < 0)
  711. return isl_bool_error;
  712. for (i = 0; i < n; ++i) {
  713. isl_bool has;
  714. has = isl_basic_set_has_defining_equality(bset, isl_dim_set, i,
  715. NULL);
  716. if (has < 0 || has)
  717. return has;
  718. }
  719. return isl_bool_false;
  720. }
  721. /* Set the entries of node->max to the value of the schedule_max_coefficient
  722. * option, if set.
  723. */
  724. static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node)
  725. {
  726. int max;
  727. max = isl_options_get_schedule_max_coefficient(ctx);
  728. if (max == -1)
  729. return isl_stat_ok;
  730. node->max = isl_vec_alloc(ctx, node->nvar);
  731. node->max = isl_vec_set_si(node->max, max);
  732. if (!node->max)
  733. return isl_stat_error;
  734. return isl_stat_ok;
  735. }
  736. /* Set the entries of node->max to the minimum of the schedule_max_coefficient
  737. * option (if set) and half of the minimum of the sizes in the other
  738. * dimensions. Round up when computing the half such that
  739. * if the minimum of the sizes is one, half of the size is taken to be one
  740. * rather than zero.
  741. * If the global minimum is unbounded (i.e., if both
  742. * the schedule_max_coefficient is not set and the sizes in the other
  743. * dimensions are unbounded), then store a negative value.
  744. * If the schedule coefficient is close to the size of the instance set
  745. * in another dimension, then the schedule may represent a loop
  746. * coalescing transformation (especially if the coefficient
  747. * in that other dimension is one). Forcing the coefficient to be
  748. * smaller than or equal to half the minimal size should avoid this
  749. * situation.
  750. */
  751. static isl_stat compute_max_coefficient(isl_ctx *ctx,
  752. struct isl_sched_node *node)
  753. {
  754. int max;
  755. int i, j;
  756. isl_vec *v;
  757. max = isl_options_get_schedule_max_coefficient(ctx);
  758. v = isl_vec_alloc(ctx, node->nvar);
  759. if (!v)
  760. return isl_stat_error;
  761. for (i = 0; i < node->nvar; ++i) {
  762. isl_int_set_si(v->el[i], max);
  763. isl_int_mul_si(v->el[i], v->el[i], 2);
  764. }
  765. for (i = 0; i < node->nvar; ++i) {
  766. isl_val *size;
  767. size = isl_multi_val_get_val(node->sizes, i);
  768. if (!size)
  769. goto error;
  770. if (!isl_val_is_int(size)) {
  771. isl_val_free(size);
  772. continue;
  773. }
  774. for (j = 0; j < node->nvar; ++j) {
  775. if (j == i)
  776. continue;
  777. if (isl_int_is_neg(v->el[j]) ||
  778. isl_int_gt(v->el[j], size->n))
  779. isl_int_set(v->el[j], size->n);
  780. }
  781. isl_val_free(size);
  782. }
  783. for (i = 0; i < node->nvar; ++i)
  784. isl_int_cdiv_q_ui(v->el[i], v->el[i], 2);
  785. node->max = v;
  786. return isl_stat_ok;
  787. error:
  788. isl_vec_free(v);
  789. return isl_stat_error;
  790. }
  791. /* Construct an identifier for node "node", which will represent "set".
  792. * The name of the identifier is either "compressed" or
  793. * "compressed_<name>", with <name> the name of the space of "set".
  794. * The user pointer of the identifier points to "node".
  795. */
  796. static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set,
  797. struct isl_sched_node *node)
  798. {
  799. isl_bool has_name;
  800. isl_ctx *ctx;
  801. isl_id *id;
  802. isl_printer *p;
  803. const char *name;
  804. char *id_name;
  805. has_name = isl_set_has_tuple_name(set);
  806. if (has_name < 0)
  807. return NULL;
  808. ctx = isl_set_get_ctx(set);
  809. if (!has_name)
  810. return isl_id_alloc(ctx, "compressed", node);
  811. p = isl_printer_to_str(ctx);
  812. name = isl_set_get_tuple_name(set);
  813. p = isl_printer_print_str(p, "compressed_");
  814. p = isl_printer_print_str(p, name);
  815. id_name = isl_printer_get_str(p);
  816. isl_printer_free(p);
  817. id = isl_id_alloc(ctx, id_name, node);
  818. free(id_name);
  819. return id;
  820. }
  821. /* Construct a map that isolates the variable in position "pos" in "set".
  822. *
  823. * That is, construct
  824. *
  825. * [i_0, ..., i_pos-1, i_pos+1, ...] -> [i_pos]
  826. */
  827. static __isl_give isl_map *isolate(__isl_take isl_set *set, int pos)
  828. {
  829. isl_map *map;
  830. map = isl_set_project_onto_map(set, isl_dim_set, pos, 1);
  831. map = isl_map_project_out(map, isl_dim_in, pos, 1);
  832. return map;
  833. }
  834. /* Compute and return the size of "set" in dimension "dim".
  835. * The size is taken to be the difference in values for that variable
  836. * for fixed values of the other variables.
  837. * This assumes that "set" is convex.
  838. * In particular, the variable is first isolated from the other variables
  839. * in the range of a map
  840. *
  841. * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim]
  842. *
  843. * and then duplicated
  844. *
  845. * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']]
  846. *
  847. * The shared variables are then projected out and the maximal value
  848. * of i_dim' - i_dim is computed.
  849. */
  850. static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim)
  851. {
  852. isl_map *map;
  853. isl_local_space *ls;
  854. isl_aff *obj;
  855. isl_val *v;
  856. map = isolate(set, dim);
  857. map = isl_map_range_product(map, isl_map_copy(map));
  858. map = isl_set_unwrap(isl_map_range(map));
  859. set = isl_map_deltas(map);
  860. ls = isl_local_space_from_space(isl_set_get_space(set));
  861. obj = isl_aff_var_on_domain(ls, isl_dim_set, 0);
  862. v = isl_set_max_val(set, obj);
  863. isl_aff_free(obj);
  864. isl_set_free(set);
  865. return v;
  866. }
  867. /* Perform a compression on "node" where "hull" represents the constraints
  868. * that were used to derive the compression, while "compress" and
  869. * "decompress" map the original space to the compressed space and
  870. * vice versa.
  871. *
  872. * If "node" was not compressed already, then simply store
  873. * the compression information.
  874. * Otherwise the "original" space is actually the result
  875. * of a previous compression, which is then combined
  876. * with the present compression.
  877. *
  878. * The dimensionality of the compressed domain is also adjusted.
  879. * Other information, such as the sizes and the maximal coefficient values,
  880. * has not been computed yet and therefore does not need to be adjusted.
  881. */
  882. static isl_stat compress_node(struct isl_sched_node *node,
  883. __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
  884. __isl_take isl_pw_multi_aff *decompress)
  885. {
  886. node->nvar = isl_multi_aff_dim(compress, isl_dim_out);
  887. if (!node->compressed) {
  888. node->compressed = 1;
  889. node->hull = hull;
  890. node->compress = compress;
  891. node->decompress = decompress;
  892. } else {
  893. hull = isl_set_preimage_multi_aff(hull,
  894. isl_multi_aff_copy(node->compress));
  895. node->hull = isl_set_intersect(node->hull, hull);
  896. node->compress = isl_multi_aff_pullback_multi_aff(
  897. compress, node->compress);
  898. node->decompress = isl_pw_multi_aff_pullback_pw_multi_aff(
  899. node->decompress, decompress);
  900. }
  901. if (!node->hull || !node->compress || !node->decompress)
  902. return isl_stat_error;
  903. return isl_stat_ok;
  904. }
  905. /* Given that dimension "pos" in "set" has a fixed value
  906. * in terms of the other dimensions, (further) compress "node"
  907. * by projecting out this dimension.
  908. * "set" may be the result of a previous compression.
  909. * "uncompressed" is the original domain (without compression).
  910. *
  911. * The compression function simply projects out the dimension.
  912. * The decompression function adds back the dimension
  913. * in the right position as an expression of the other dimensions
  914. * derived from "set".
  915. * As in extract_node, the compressed space has an identifier
  916. * that references "node" such that each compressed space is unique and
  917. * such that the node can be recovered from the compressed space.
  918. *
  919. * The constraint removed through the compression is added to the "hull"
  920. * such that only edges that relate to the original domains
  921. * are taken into account.
  922. * In particular, it is obtained by composing compression and decompression and
  923. * taking the relation among the variables in the range.
  924. */
  925. static isl_stat project_out_fixed(struct isl_sched_node *node,
  926. __isl_keep isl_set *uncompressed, __isl_take isl_set *set, int pos)
  927. {
  928. isl_id *id;
  929. isl_space *space;
  930. isl_set *domain;
  931. isl_map *map;
  932. isl_multi_aff *compress;
  933. isl_pw_multi_aff *decompress, *pma;
  934. isl_multi_pw_aff *mpa;
  935. isl_set *hull;
  936. map = isolate(isl_set_copy(set), pos);
  937. pma = isl_pw_multi_aff_from_map(map);
  938. domain = isl_pw_multi_aff_domain(isl_pw_multi_aff_copy(pma));
  939. pma = isl_pw_multi_aff_gist(pma, domain);
  940. space = isl_pw_multi_aff_get_domain_space(pma);
  941. mpa = isl_multi_pw_aff_identity(isl_space_map_from_set(space));
  942. mpa = isl_multi_pw_aff_range_splice(mpa, pos,
  943. isl_multi_pw_aff_from_pw_multi_aff(pma));
  944. decompress = isl_pw_multi_aff_from_multi_pw_aff(mpa);
  945. space = isl_set_get_space(set);
  946. compress = isl_multi_aff_project_out_map(space, isl_dim_set, pos, 1);
  947. id = construct_compressed_id(uncompressed, node);
  948. compress = isl_multi_aff_set_tuple_id(compress, isl_dim_out, id);
  949. space = isl_space_reverse(isl_multi_aff_get_space(compress));
  950. decompress = isl_pw_multi_aff_reset_space(decompress, space);
  951. pma = isl_pw_multi_aff_pullback_multi_aff(
  952. isl_pw_multi_aff_copy(decompress), isl_multi_aff_copy(compress));
  953. hull = isl_map_range(isl_map_from_pw_multi_aff(pma));
  954. isl_set_free(set);
  955. return compress_node(node, hull, compress, decompress);
  956. }
  957. /* Compute the size of the compressed domain in each dimension and
  958. * store the results in node->sizes.
  959. * "uncompressed" is the original domain (without compression).
  960. *
  961. * First compress the domain if needed and then compute the size
  962. * in each direction.
  963. * If the domain is not convex, then the sizes are computed
  964. * on a convex superset in order to avoid picking up sizes
  965. * that are valid for the individual disjuncts, but not for
  966. * the domain as a whole.
  967. *
  968. * If any of the sizes turns out to be zero, then this means
  969. * that this dimension has a fixed value in terms of
  970. * the other dimensions. Perform an (extra) compression
  971. * to remove this dimension.
  972. */
  973. static isl_stat compute_sizes(struct isl_sched_node *node,
  974. __isl_keep isl_set *uncompressed)
  975. {
  976. int j;
  977. isl_size n;
  978. isl_multi_val *mv;
  979. isl_set *set = isl_set_copy(uncompressed);
  980. if (node->compressed)
  981. set = isl_set_preimage_pw_multi_aff(set,
  982. isl_pw_multi_aff_copy(node->decompress));
  983. set = isl_set_from_basic_set(isl_set_simple_hull(set));
  984. mv = isl_multi_val_zero(isl_set_get_space(set));
  985. n = isl_set_dim(set, isl_dim_set);
  986. if (n < 0)
  987. mv = isl_multi_val_free(mv);
  988. for (j = 0; j < n; ++j) {
  989. isl_bool is_zero;
  990. isl_val *v;
  991. v = compute_size(isl_set_copy(set), j);
  992. is_zero = isl_val_is_zero(v);
  993. mv = isl_multi_val_set_val(mv, j, v);
  994. if (is_zero >= 0 && is_zero) {
  995. isl_multi_val_free(mv);
  996. if (project_out_fixed(node, uncompressed, set, j) < 0)
  997. return isl_stat_error;
  998. return compute_sizes(node, uncompressed);
  999. }
  1000. }
  1001. node->sizes = mv;
  1002. isl_set_free(set);
  1003. if (!node->sizes)
  1004. return isl_stat_error;
  1005. return isl_stat_ok;
  1006. }
  1007. /* Compute the size of the instance set "set" of "node", after compression,
  1008. * as well as bounds on the corresponding coefficients, if needed.
  1009. *
  1010. * The sizes are needed when the schedule_treat_coalescing option is set.
  1011. * The bounds are needed when the schedule_treat_coalescing option or
  1012. * the schedule_max_coefficient option is set.
  1013. *
  1014. * If the schedule_treat_coalescing option is not set, then at most
  1015. * the bounds need to be set and this is done in set_max_coefficient.
  1016. * Otherwise, compute the size of the compressed domain
  1017. * in each direction and store the results in node->size.
  1018. * Finally, set the bounds on the coefficients based on the sizes
  1019. * and the schedule_max_coefficient option in compute_max_coefficient.
  1020. */
  1021. static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node,
  1022. __isl_take isl_set *set)
  1023. {
  1024. isl_stat r;
  1025. if (!isl_options_get_schedule_treat_coalescing(ctx)) {
  1026. isl_set_free(set);
  1027. return set_max_coefficient(ctx, node);
  1028. }
  1029. r = compute_sizes(node, set);
  1030. isl_set_free(set);
  1031. if (r < 0)
  1032. return isl_stat_error;
  1033. return compute_max_coefficient(ctx, node);
  1034. }
  1035. /* Add a new node to the graph representing the given instance set.
  1036. * "nvar" is the (possibly compressed) number of variables and
  1037. * may be smaller than then number of set variables in "set"
  1038. * if "compressed" is set.
  1039. * If "compressed" is set, then "hull" represents the constraints
  1040. * that were used to derive the compression, while "compress" and
  1041. * "decompress" map the original space to the compressed space and
  1042. * vice versa.
  1043. * If "compressed" is not set, then "hull", "compress" and "decompress"
  1044. * should be NULL.
  1045. *
  1046. * Compute the size of the instance set and bounds on the coefficients,
  1047. * if needed.
  1048. */
  1049. static isl_stat add_node(struct isl_sched_graph *graph,
  1050. __isl_take isl_set *set, int nvar, int compressed,
  1051. __isl_take isl_set *hull, __isl_take isl_multi_aff *compress,
  1052. __isl_take isl_pw_multi_aff *decompress)
  1053. {
  1054. isl_size nparam;
  1055. isl_ctx *ctx;
  1056. isl_mat *sched;
  1057. isl_space *space;
  1058. int *coincident;
  1059. struct isl_sched_node *node;
  1060. nparam = isl_set_dim(set, isl_dim_param);
  1061. if (nparam < 0)
  1062. goto error;
  1063. ctx = isl_set_get_ctx(set);
  1064. if (!ctx->opt->schedule_parametric)
  1065. nparam = 0;
  1066. sched = isl_mat_alloc(ctx, 0, 1 + nparam + nvar);
  1067. node = &graph->node[graph->n];
  1068. graph->n++;
  1069. space = isl_set_get_space(set);
  1070. node->space = space;
  1071. node->nvar = nvar;
  1072. node->nparam = nparam;
  1073. node->sched = sched;
  1074. node->sched_map = NULL;
  1075. coincident = isl_calloc_array(ctx, int, graph->max_row);
  1076. node->coincident = coincident;
  1077. node->compressed = compressed;
  1078. node->hull = hull;
  1079. node->compress = compress;
  1080. node->decompress = decompress;
  1081. if (compute_sizes_and_max(ctx, node, set) < 0)
  1082. return isl_stat_error;
  1083. if (!space || !sched || (graph->max_row && !coincident))
  1084. return isl_stat_error;
  1085. if (compressed && (!hull || !compress || !decompress))
  1086. return isl_stat_error;
  1087. return isl_stat_ok;
  1088. error:
  1089. isl_set_free(set);
  1090. isl_set_free(hull);
  1091. isl_multi_aff_free(compress);
  1092. isl_pw_multi_aff_free(decompress);
  1093. return isl_stat_error;
  1094. }
  1095. /* Add a new node to the graph representing the given set.
  1096. *
  1097. * If any of the set variables is defined by an equality, then
  1098. * we perform variable compression such that we can perform
  1099. * the scheduling on the compressed domain.
  1100. * In this case, an identifier is used that references the new node
  1101. * such that each compressed space is unique and
  1102. * such that the node can be recovered from the compressed space.
  1103. */
  1104. static isl_stat extract_node(__isl_take isl_set *set, void *user)
  1105. {
  1106. isl_size nvar;
  1107. isl_bool has_equality;
  1108. isl_id *id;
  1109. isl_basic_set *hull;
  1110. isl_set *hull_set;
  1111. isl_morph *morph;
  1112. isl_multi_aff *compress, *decompress_ma;
  1113. isl_pw_multi_aff *decompress;
  1114. struct isl_sched_graph *graph = user;
  1115. hull = isl_set_affine_hull(isl_set_copy(set));
  1116. hull = isl_basic_set_remove_divs(hull);
  1117. nvar = isl_set_dim(set, isl_dim_set);
  1118. has_equality = has_any_defining_equality(hull);
  1119. if (nvar < 0 || has_equality < 0)
  1120. goto error;
  1121. if (!has_equality) {
  1122. isl_basic_set_free(hull);
  1123. return add_node(graph, set, nvar, 0, NULL, NULL, NULL);
  1124. }
  1125. id = construct_compressed_id(set, &graph->node[graph->n]);
  1126. morph = isl_basic_set_variable_compression_with_id(hull, id);
  1127. isl_id_free(id);
  1128. nvar = isl_morph_ran_dim(morph, isl_dim_set);
  1129. if (nvar < 0)
  1130. set = isl_set_free(set);
  1131. compress = isl_morph_get_var_multi_aff(morph);
  1132. morph = isl_morph_inverse(morph);
  1133. decompress_ma = isl_morph_get_var_multi_aff(morph);
  1134. decompress = isl_pw_multi_aff_from_multi_aff(decompress_ma);
  1135. isl_morph_free(morph);
  1136. hull_set = isl_set_from_basic_set(hull);
  1137. return add_node(graph, set, nvar, 1, hull_set, compress, decompress);
  1138. error:
  1139. isl_basic_set_free(hull);
  1140. isl_set_free(set);
  1141. return isl_stat_error;
  1142. }
  1143. struct isl_extract_edge_data {
  1144. enum isl_edge_type type;
  1145. struct isl_sched_graph *graph;
  1146. };
  1147. /* Merge edge2 into edge1, freeing the contents of edge2.
  1148. * Return 0 on success and -1 on failure.
  1149. *
  1150. * edge1 and edge2 are assumed to have the same value for the map field.
  1151. */
  1152. static int merge_edge(struct isl_sched_edge *edge1,
  1153. struct isl_sched_edge *edge2)
  1154. {
  1155. edge1->types |= edge2->types;
  1156. isl_map_free(edge2->map);
  1157. if (is_condition(edge2)) {
  1158. if (!edge1->tagged_condition)
  1159. edge1->tagged_condition = edge2->tagged_condition;
  1160. else
  1161. edge1->tagged_condition =
  1162. isl_union_map_union(edge1->tagged_condition,
  1163. edge2->tagged_condition);
  1164. }
  1165. if (is_conditional_validity(edge2)) {
  1166. if (!edge1->tagged_validity)
  1167. edge1->tagged_validity = edge2->tagged_validity;
  1168. else
  1169. edge1->tagged_validity =
  1170. isl_union_map_union(edge1->tagged_validity,
  1171. edge2->tagged_validity);
  1172. }
  1173. if (is_condition(edge2) && !edge1->tagged_condition)
  1174. return -1;
  1175. if (is_conditional_validity(edge2) && !edge1->tagged_validity)
  1176. return -1;
  1177. return 0;
  1178. }
  1179. /* Insert dummy tags in domain and range of "map".
  1180. *
  1181. * In particular, if "map" is of the form
  1182. *
  1183. * A -> B
  1184. *
  1185. * then return
  1186. *
  1187. * [A -> dummy_tag] -> [B -> dummy_tag]
  1188. *
  1189. * where the dummy_tags are identical and equal to any dummy tags
  1190. * introduced by any other call to this function.
  1191. */
  1192. static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map)
  1193. {
  1194. static char dummy;
  1195. isl_ctx *ctx;
  1196. isl_id *id;
  1197. isl_space *space;
  1198. isl_set *domain, *range;
  1199. ctx = isl_map_get_ctx(map);
  1200. id = isl_id_alloc(ctx, NULL, &dummy);
  1201. space = isl_space_params(isl_map_get_space(map));
  1202. space = isl_space_set_from_params(space);
  1203. space = isl_space_set_tuple_id(space, isl_dim_set, id);
  1204. space = isl_space_map_from_set(space);
  1205. domain = isl_map_wrap(map);
  1206. range = isl_map_wrap(isl_map_universe(space));
  1207. map = isl_map_from_domain_and_range(domain, range);
  1208. map = isl_map_zip(map);
  1209. return map;
  1210. }
  1211. /* Given that at least one of "src" or "dst" is compressed, return
  1212. * a map between the spaces of these nodes restricted to the affine
  1213. * hull that was used in the compression.
  1214. */
  1215. static __isl_give isl_map *extract_hull(struct isl_sched_node *src,
  1216. struct isl_sched_node *dst)
  1217. {
  1218. isl_set *dom, *ran;
  1219. if (src->compressed)
  1220. dom = isl_set_copy(src->hull);
  1221. else
  1222. dom = isl_set_universe(isl_space_copy(src->space));
  1223. if (dst->compressed)
  1224. ran = isl_set_copy(dst->hull);
  1225. else
  1226. ran = isl_set_universe(isl_space_copy(dst->space));
  1227. return isl_map_from_domain_and_range(dom, ran);
  1228. }
  1229. /* Intersect the domains of the nested relations in domain and range
  1230. * of "tagged" with "map".
  1231. */
  1232. static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged,
  1233. __isl_keep isl_map *map)
  1234. {
  1235. isl_set *set;
  1236. tagged = isl_map_zip(tagged);
  1237. set = isl_map_wrap(isl_map_copy(map));
  1238. tagged = isl_map_intersect_domain(tagged, set);
  1239. tagged = isl_map_zip(tagged);
  1240. return tagged;
  1241. }
  1242. /* Return a pointer to the node that lives in the domain space of "map",
  1243. * an invalid node if there is no such node, or NULL in case of error.
  1244. */
  1245. static struct isl_sched_node *find_domain_node(isl_ctx *ctx,
  1246. struct isl_sched_graph *graph, __isl_keep isl_map *map)
  1247. {
  1248. struct isl_sched_node *node;
  1249. isl_space *space;
  1250. space = isl_space_domain(isl_map_get_space(map));
  1251. node = graph_find_node(ctx, graph, space);
  1252. isl_space_free(space);
  1253. return node;
  1254. }
  1255. /* Return a pointer to the node that lives in the range space of "map",
  1256. * an invalid node if there is no such node, or NULL in case of error.
  1257. */
  1258. static struct isl_sched_node *find_range_node(isl_ctx *ctx,
  1259. struct isl_sched_graph *graph, __isl_keep isl_map *map)
  1260. {
  1261. struct isl_sched_node *node;
  1262. isl_space *space;
  1263. space = isl_space_range(isl_map_get_space(map));
  1264. node = graph_find_node(ctx, graph, space);
  1265. isl_space_free(space);
  1266. return node;
  1267. }
  1268. /* Refrain from adding a new edge based on "map".
  1269. * Instead, just free the map.
  1270. * "tagged" is either a copy of "map" with additional tags or NULL.
  1271. */
  1272. static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged)
  1273. {
  1274. isl_map_free(map);
  1275. isl_map_free(tagged);
  1276. return isl_stat_ok;
  1277. }
  1278. /* Add a new edge to the graph based on the given map
  1279. * and add it to data->graph->edge_table[data->type].
  1280. * If a dependence relation of a given type happens to be identical
  1281. * to one of the dependence relations of a type that was added before,
  1282. * then we don't create a new edge, but instead mark the original edge
  1283. * as also representing a dependence of the current type.
  1284. *
  1285. * Edges of type isl_edge_condition or isl_edge_conditional_validity
  1286. * may be specified as "tagged" dependence relations. That is, "map"
  1287. * may contain elements (i -> a) -> (j -> b), where i -> j denotes
  1288. * the dependence on iterations and a and b are tags.
  1289. * edge->map is set to the relation containing the elements i -> j,
  1290. * while edge->tagged_condition and edge->tagged_validity contain
  1291. * the union of all the "map" relations
  1292. * for which extract_edge is called that result in the same edge->map.
  1293. *
  1294. * If the source or the destination node is compressed, then
  1295. * intersect both "map" and "tagged" with the constraints that
  1296. * were used to construct the compression.
  1297. * This ensures that there are no schedule constraints defined
  1298. * outside of these domains, while the scheduler no longer has
  1299. * any control over those outside parts.
  1300. */
  1301. static isl_stat extract_edge(__isl_take isl_map *map, void *user)
  1302. {
  1303. isl_bool empty;
  1304. isl_ctx *ctx = isl_map_get_ctx(map);
  1305. struct isl_extract_edge_data *data = user;
  1306. struct isl_sched_graph *graph = data->graph;
  1307. struct isl_sched_node *src, *dst;
  1308. struct isl_sched_edge *edge;
  1309. isl_map *tagged = NULL;
  1310. if (data->type == isl_edge_condition ||
  1311. data->type == isl_edge_conditional_validity) {
  1312. if (isl_map_can_zip(map)) {
  1313. tagged = isl_map_copy(map);
  1314. map = isl_set_unwrap(isl_map_domain(isl_map_zip(map)));
  1315. } else {
  1316. tagged = insert_dummy_tags(isl_map_copy(map));
  1317. }
  1318. }
  1319. src = find_domain_node(ctx, graph, map);
  1320. dst = find_range_node(ctx, graph, map);
  1321. if (!src || !dst)
  1322. goto error;
  1323. if (!is_node(graph, src) || !is_node(graph, dst))
  1324. return skip_edge(map, tagged);
  1325. if (src->compressed || dst->compressed) {
  1326. isl_map *hull;
  1327. hull = extract_hull(src, dst);
  1328. if (tagged)
  1329. tagged = map_intersect_domains(tagged, hull);
  1330. map = isl_map_intersect(map, hull);
  1331. }
  1332. empty = isl_map_plain_is_empty(map);
  1333. if (empty < 0)
  1334. goto error;
  1335. if (empty)
  1336. return skip_edge(map, tagged);
  1337. graph->edge[graph->n_edge].src = src;
  1338. graph->edge[graph->n_edge].dst = dst;
  1339. graph->edge[graph->n_edge].map = map;
  1340. graph->edge[graph->n_edge].types = 0;
  1341. graph->edge[graph->n_edge].tagged_condition = NULL;
  1342. graph->edge[graph->n_edge].tagged_validity = NULL;
  1343. set_type(&graph->edge[graph->n_edge], data->type);
  1344. if (data->type == isl_edge_condition)
  1345. graph->edge[graph->n_edge].tagged_condition =
  1346. isl_union_map_from_map(tagged);
  1347. if (data->type == isl_edge_conditional_validity)
  1348. graph->edge[graph->n_edge].tagged_validity =
  1349. isl_union_map_from_map(tagged);
  1350. edge = graph_find_matching_edge(graph, &graph->edge[graph->n_edge]);
  1351. if (!edge) {
  1352. graph->n_edge++;
  1353. return isl_stat_error;
  1354. }
  1355. if (edge == &graph->edge[graph->n_edge])
  1356. return graph_edge_table_add(ctx, graph, data->type,
  1357. &graph->edge[graph->n_edge++]);
  1358. if (merge_edge(edge, &graph->edge[graph->n_edge]) < 0)
  1359. return isl_stat_error;
  1360. return graph_edge_table_add(ctx, graph, data->type, edge);
  1361. error:
  1362. isl_map_free(map);
  1363. isl_map_free(tagged);
  1364. return isl_stat_error;
  1365. }
  1366. /* Initialize the schedule graph "graph" from the schedule constraints "sc".
  1367. *
  1368. * The context is included in the domain before the nodes of
  1369. * the graphs are extracted in order to be able to exploit
  1370. * any possible additional equalities.
  1371. * Note that this intersection is only performed locally here.
  1372. */
  1373. static isl_stat graph_init(struct isl_sched_graph *graph,
  1374. __isl_keep isl_schedule_constraints *sc)
  1375. {
  1376. isl_ctx *ctx;
  1377. isl_union_set *domain;
  1378. isl_union_map *c;
  1379. struct isl_extract_edge_data data;
  1380. enum isl_edge_type i;
  1381. isl_stat r;
  1382. isl_size n;
  1383. if (!sc)
  1384. return isl_stat_error;
  1385. ctx = isl_schedule_constraints_get_ctx(sc);
  1386. domain = isl_schedule_constraints_get_domain(sc);
  1387. n = isl_union_set_n_set(domain);
  1388. graph->n = n;
  1389. isl_union_set_free(domain);
  1390. if (n < 0)
  1391. return isl_stat_error;
  1392. n = isl_schedule_constraints_n_map(sc);
  1393. if (n < 0 || graph_alloc(ctx, graph, graph->n, n) < 0)
  1394. return isl_stat_error;
  1395. if (compute_max_row(graph, sc) < 0)
  1396. return isl_stat_error;
  1397. graph->root = graph;
  1398. graph->n = 0;
  1399. domain = isl_schedule_constraints_get_domain(sc);
  1400. domain = isl_union_set_intersect_params(domain,
  1401. isl_schedule_constraints_get_context(sc));
  1402. r = isl_union_set_foreach_set(domain, &extract_node, graph);
  1403. isl_union_set_free(domain);
  1404. if (r < 0)
  1405. return isl_stat_error;
  1406. if (graph_init_table(ctx, graph) < 0)
  1407. return isl_stat_error;
  1408. for (i = isl_edge_first; i <= isl_edge_last; ++i) {
  1409. isl_size n;
  1410. c = isl_schedule_constraints_get(sc, i);
  1411. n = isl_union_map_n_map(c);
  1412. graph->max_edge[i] = n;
  1413. isl_union_map_free(c);
  1414. if (n < 0)
  1415. return isl_stat_error;
  1416. }
  1417. if (graph_init_edge_tables(ctx, graph) < 0)
  1418. return isl_stat_error;
  1419. graph->n_edge = 0;
  1420. data.graph = graph;
  1421. for (i = isl_edge_first; i <= isl_edge_last; ++i) {
  1422. isl_stat r;
  1423. data.type = i;
  1424. c = isl_schedule_constraints_get(sc, i);
  1425. r = isl_union_map_foreach_map(c, &extract_edge, &data);
  1426. isl_union_map_free(c);
  1427. if (r < 0)
  1428. return isl_stat_error;
  1429. }
  1430. return isl_stat_ok;
  1431. }
  1432. /* Check whether there is any dependence from node[j] to node[i]
  1433. * or from node[i] to node[j].
  1434. */
  1435. static isl_bool node_follows_weak(int i, int j, void *user)
  1436. {
  1437. isl_bool f;
  1438. struct isl_sched_graph *graph = user;
  1439. f = graph_has_any_edge(graph, &graph->node[j], &graph->node[i]);
  1440. if (f < 0 || f)
  1441. return f;
  1442. return graph_has_any_edge(graph, &graph->node[i], &graph->node[j]);
  1443. }
  1444. /* Check whether there is a (conditional) validity dependence from node[j]
  1445. * to node[i], forcing node[i] to follow node[j].
  1446. */
  1447. static isl_bool node_follows_strong(int i, int j, void *user)
  1448. {
  1449. struct isl_sched_graph *graph = user;
  1450. return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
  1451. }
  1452. /* Use Tarjan's algorithm for computing the strongly connected components
  1453. * in the dependence graph only considering those edges defined by "follows".
  1454. */
  1455. static isl_stat detect_ccs(isl_ctx *ctx, struct isl_sched_graph *graph,
  1456. isl_bool (*follows)(int i, int j, void *user))
  1457. {
  1458. int i, n;
  1459. struct isl_tarjan_graph *g = NULL;
  1460. g = isl_tarjan_graph_init(ctx, graph->n, follows, graph);
  1461. if (!g)
  1462. return isl_stat_error;
  1463. graph->scc = 0;
  1464. i = 0;
  1465. n = graph->n;
  1466. while (n) {
  1467. while (g->order[i] != -1) {
  1468. graph->node[g->order[i]].scc = graph->scc;
  1469. --n;
  1470. ++i;
  1471. }
  1472. ++i;
  1473. graph->scc++;
  1474. }
  1475. isl_tarjan_graph_free(g);
  1476. return isl_stat_ok;
  1477. }
  1478. /* Apply Tarjan's algorithm to detect the strongly connected components
  1479. * in the dependence graph.
  1480. * Only consider the (conditional) validity dependences and clear "weak".
  1481. */
  1482. static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph)
  1483. {
  1484. graph->weak = 0;
  1485. return detect_ccs(ctx, graph, &node_follows_strong);
  1486. }
  1487. /* Apply Tarjan's algorithm to detect the (weakly) connected components
  1488. * in the dependence graph.
  1489. * Consider all dependences and set "weak".
  1490. */
  1491. static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph)
  1492. {
  1493. graph->weak = 1;
  1494. return detect_ccs(ctx, graph, &node_follows_weak);
  1495. }
  1496. static int cmp_scc(const void *a, const void *b, void *data)
  1497. {
  1498. struct isl_sched_graph *graph = data;
  1499. const int *i1 = a;
  1500. const int *i2 = b;
  1501. return graph->node[*i1].scc - graph->node[*i2].scc;
  1502. }
  1503. /* Sort the elements of graph->sorted according to the corresponding SCCs.
  1504. */
  1505. static int sort_sccs(struct isl_sched_graph *graph)
  1506. {
  1507. return isl_sort(graph->sorted, graph->n, sizeof(int), &cmp_scc, graph);
  1508. }
  1509. /* Return a non-parametric set in the compressed space of "node" that is
  1510. * bounded by the size in each direction
  1511. *
  1512. * { [x] : -S_i <= x_i <= S_i }
  1513. *
  1514. * If S_i is infinity in direction i, then there are no constraints
  1515. * in that direction.
  1516. *
  1517. * Cache the result in node->bounds.
  1518. */
  1519. static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node)
  1520. {
  1521. isl_space *space;
  1522. isl_basic_set *bounds;
  1523. int i;
  1524. if (node->bounds)
  1525. return isl_basic_set_copy(node->bounds);
  1526. if (node->compressed)
  1527. space = isl_pw_multi_aff_get_domain_space(node->decompress);
  1528. else
  1529. space = isl_space_copy(node->space);
  1530. space = isl_space_drop_all_params(space);
  1531. bounds = isl_basic_set_universe(space);
  1532. for (i = 0; i < node->nvar; ++i) {
  1533. isl_val *size;
  1534. size = isl_multi_val_get_val(node->sizes, i);
  1535. if (!size)
  1536. return isl_basic_set_free(bounds);
  1537. if (!isl_val_is_int(size)) {
  1538. isl_val_free(size);
  1539. continue;
  1540. }
  1541. bounds = isl_basic_set_upper_bound_val(bounds, isl_dim_set, i,
  1542. isl_val_copy(size));
  1543. bounds = isl_basic_set_lower_bound_val(bounds, isl_dim_set, i,
  1544. isl_val_neg(size));
  1545. }
  1546. node->bounds = isl_basic_set_copy(bounds);
  1547. return bounds;
  1548. }
  1549. /* Compress the dependence relation "map", if needed, i.e.,
  1550. * when the source node "src" and/or the destination node "dst"
  1551. * has been compressed.
  1552. */
  1553. static __isl_give isl_map *compress(__isl_take isl_map *map,
  1554. struct isl_sched_node *src, struct isl_sched_node *dst)
  1555. {
  1556. if (src->compressed)
  1557. map = isl_map_preimage_domain_pw_multi_aff(map,
  1558. isl_pw_multi_aff_copy(src->decompress));
  1559. if (dst->compressed)
  1560. map = isl_map_preimage_range_pw_multi_aff(map,
  1561. isl_pw_multi_aff_copy(dst->decompress));
  1562. return map;
  1563. }
  1564. /* Drop some constraints from "delta" that could be exploited
  1565. * to construct loop coalescing schedules.
  1566. * In particular, drop those constraint that bound the difference
  1567. * to the size of the domain.
  1568. * First project out the parameters to improve the effectiveness.
  1569. */
  1570. static __isl_give isl_set *drop_coalescing_constraints(
  1571. __isl_take isl_set *delta, struct isl_sched_node *node)
  1572. {
  1573. isl_size nparam;
  1574. isl_basic_set *bounds;
  1575. nparam = isl_set_dim(delta, isl_dim_param);
  1576. if (nparam < 0)
  1577. return isl_set_free(delta);
  1578. bounds = get_size_bounds(node);
  1579. delta = isl_set_project_out(delta, isl_dim_param, 0, nparam);
  1580. delta = isl_set_remove_divs(delta);
  1581. delta = isl_set_plain_gist_basic_set(delta, bounds);
  1582. return delta;
  1583. }
  1584. /* Given a dependence relation R from "node" to itself,
  1585. * construct the set of coefficients of valid constraints for elements
  1586. * in that dependence relation.
  1587. * In particular, the result contains tuples of coefficients
  1588. * c_0, c_n, c_x such that
  1589. *
  1590. * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R
  1591. *
  1592. * or, equivalently,
  1593. *
  1594. * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
  1595. *
  1596. * We choose here to compute the dual of delta R.
  1597. * Alternatively, we could have computed the dual of R, resulting
  1598. * in a set of tuples c_0, c_n, c_x, c_y, and then
  1599. * plugged in (c_0, c_n, c_x, -c_x).
  1600. *
  1601. * If "need_param" is set, then the resulting coefficients effectively
  1602. * include coefficients for the parameters c_n. Otherwise, they may
  1603. * have been projected out already.
  1604. * Since the constraints may be different for these two cases,
  1605. * they are stored in separate caches.
  1606. * In particular, if no parameter coefficients are required and
  1607. * the schedule_treat_coalescing option is set, then the parameters
  1608. * are projected out and some constraints that could be exploited
  1609. * to construct coalescing schedules are removed before the dual
  1610. * is computed.
  1611. *
  1612. * If "node" has been compressed, then the dependence relation
  1613. * is also compressed before the set of coefficients is computed.
  1614. */
  1615. static __isl_give isl_basic_set *intra_coefficients(
  1616. struct isl_sched_graph *graph, struct isl_sched_node *node,
  1617. __isl_take isl_map *map, int need_param)
  1618. {
  1619. isl_ctx *ctx;
  1620. isl_set *delta;
  1621. isl_map *key;
  1622. isl_basic_set *coef;
  1623. isl_maybe_isl_basic_set m;
  1624. isl_map_to_basic_set **hmap = &graph->intra_hmap;
  1625. int treat;
  1626. if (!map)
  1627. return NULL;
  1628. ctx = isl_map_get_ctx(map);
  1629. treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx);
  1630. if (!treat)
  1631. hmap = &graph->intra_hmap_param;
  1632. m = isl_map_to_basic_set_try_get(*hmap, map);
  1633. if (m.valid < 0 || m.valid) {
  1634. isl_map_free(map);
  1635. return m.value;
  1636. }
  1637. key = isl_map_copy(map);
  1638. map = compress(map, node, node);
  1639. delta = isl_map_deltas(map);
  1640. if (treat)
  1641. delta = drop_coalescing_constraints(delta, node);
  1642. delta = isl_set_remove_divs(delta);
  1643. coef = isl_set_coefficients(delta);
  1644. *hmap = isl_map_to_basic_set_set(*hmap, key, isl_basic_set_copy(coef));
  1645. return coef;
  1646. }
  1647. /* Given a dependence relation R, construct the set of coefficients
  1648. * of valid constraints for elements in that dependence relation.
  1649. * In particular, the result contains tuples of coefficients
  1650. * c_0, c_n, c_x, c_y such that
  1651. *
  1652. * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
  1653. *
  1654. * If the source or destination nodes of "edge" have been compressed,
  1655. * then the dependence relation is also compressed before
  1656. * the set of coefficients is computed.
  1657. */
  1658. static __isl_give isl_basic_set *inter_coefficients(
  1659. struct isl_sched_graph *graph, struct isl_sched_edge *edge,
  1660. __isl_take isl_map *map)
  1661. {
  1662. isl_set *set;
  1663. isl_map *key;
  1664. isl_basic_set *coef;
  1665. isl_maybe_isl_basic_set m;
  1666. m = isl_map_to_basic_set_try_get(graph->inter_hmap, map);
  1667. if (m.valid < 0 || m.valid) {
  1668. isl_map_free(map);
  1669. return m.value;
  1670. }
  1671. key = isl_map_copy(map);
  1672. map = compress(map, edge->src, edge->dst);
  1673. set = isl_map_wrap(isl_map_remove_divs(map));
  1674. coef = isl_set_coefficients(set);
  1675. graph->inter_hmap = isl_map_to_basic_set_set(graph->inter_hmap, key,
  1676. isl_basic_set_copy(coef));
  1677. return coef;
  1678. }
  1679. /* Return the position of the coefficients of the variables in
  1680. * the coefficients constraints "coef".
  1681. *
  1682. * The space of "coef" is of the form
  1683. *
  1684. * { coefficients[[cst, params] -> S] }
  1685. *
  1686. * Return the position of S.
  1687. */
  1688. static isl_size coef_var_offset(__isl_keep isl_basic_set *coef)
  1689. {
  1690. isl_size offset;
  1691. isl_space *space;
  1692. space = isl_space_unwrap(isl_basic_set_get_space(coef));
  1693. offset = isl_space_dim(space, isl_dim_in);
  1694. isl_space_free(space);
  1695. return offset;
  1696. }
  1697. /* Return the offset of the coefficient of the constant term of "node"
  1698. * within the (I)LP.
  1699. *
  1700. * Within each node, the coefficients have the following order:
  1701. * - positive and negative parts of c_i_x
  1702. * - c_i_n (if parametric)
  1703. * - c_i_0
  1704. */
  1705. static int node_cst_coef_offset(struct isl_sched_node *node)
  1706. {
  1707. return node->start + 2 * node->nvar + node->nparam;
  1708. }
  1709. /* Return the offset of the coefficients of the parameters of "node"
  1710. * within the (I)LP.
  1711. *
  1712. * Within each node, the coefficients have the following order:
  1713. * - positive and negative parts of c_i_x
  1714. * - c_i_n (if parametric)
  1715. * - c_i_0
  1716. */
  1717. static int node_par_coef_offset(struct isl_sched_node *node)
  1718. {
  1719. return node->start + 2 * node->nvar;
  1720. }
  1721. /* Return the offset of the coefficients of the variables of "node"
  1722. * within the (I)LP.
  1723. *
  1724. * Within each node, the coefficients have the following order:
  1725. * - positive and negative parts of c_i_x
  1726. * - c_i_n (if parametric)
  1727. * - c_i_0
  1728. */
  1729. static int node_var_coef_offset(struct isl_sched_node *node)
  1730. {
  1731. return node->start;
  1732. }
  1733. /* Return the position of the pair of variables encoding
  1734. * coefficient "i" of "node".
  1735. *
  1736. * The order of these variable pairs is the opposite of
  1737. * that of the coefficients, with 2 variables per coefficient.
  1738. */
  1739. static int node_var_coef_pos(struct isl_sched_node *node, int i)
  1740. {
  1741. return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i);
  1742. }
  1743. /* Construct an isl_dim_map for mapping constraints on coefficients
  1744. * for "node" to the corresponding positions in graph->lp.
  1745. * "offset" is the offset of the coefficients for the variables
  1746. * in the input constraints.
  1747. * "s" is the sign of the mapping.
  1748. *
  1749. * The input constraints are given in terms of the coefficients
  1750. * (c_0, c_x) or (c_0, c_n, c_x).
  1751. * The mapping produced by this function essentially plugs in
  1752. * (0, c_i_x^+ - c_i_x^-) if s = 1 and
  1753. * (0, -c_i_x^+ + c_i_x^-) if s = -1 or
  1754. * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and
  1755. * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1.
  1756. * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
  1757. * Furthermore, the order of these pairs is the opposite of that
  1758. * of the corresponding coefficients.
  1759. *
  1760. * The caller can extend the mapping to also map the other coefficients
  1761. * (and therefore not plug in 0).
  1762. */
  1763. static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx,
  1764. struct isl_sched_graph *graph, struct isl_sched_node *node,
  1765. int offset, int s)
  1766. {
  1767. int pos;
  1768. isl_size total;
  1769. isl_dim_map *dim_map;
  1770. total = isl_basic_set_dim(graph->lp, isl_dim_all);
  1771. if (!node || total < 0)
  1772. return NULL;
  1773. pos = node_var_coef_pos(node, 0);
  1774. dim_map = isl_dim_map_alloc(ctx, total);
  1775. isl_dim_map_range(dim_map, pos, -2, offset, 1, node->nvar, -s);
  1776. isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, node->nvar, s);
  1777. return dim_map;
  1778. }
  1779. /* Construct an isl_dim_map for mapping constraints on coefficients
  1780. * for "src" (node i) and "dst" (node j) to the corresponding positions
  1781. * in graph->lp.
  1782. * "offset" is the offset of the coefficients for the variables of "src"
  1783. * in the input constraints.
  1784. * "s" is the sign of the mapping.
  1785. *
  1786. * The input constraints are given in terms of the coefficients
  1787. * (c_0, c_n, c_x, c_y).
  1788. * The mapping produced by this function essentially plugs in
  1789. * (c_j_0 - c_i_0, c_j_n - c_i_n,
  1790. * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and
  1791. * (-c_j_0 + c_i_0, -c_j_n + c_i_n,
  1792. * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1.
  1793. * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
  1794. * Furthermore, the order of these pairs is the opposite of that
  1795. * of the corresponding coefficients.
  1796. *
  1797. * The caller can further extend the mapping.
  1798. */
  1799. static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx,
  1800. struct isl_sched_graph *graph, struct isl_sched_node *src,
  1801. struct isl_sched_node *dst, int offset, int s)
  1802. {
  1803. int pos;
  1804. isl_size total;
  1805. isl_dim_map *dim_map;
  1806. total = isl_basic_set_dim(graph->lp, isl_dim_all);
  1807. if (!src || !dst || total < 0)
  1808. return NULL;
  1809. dim_map = isl_dim_map_alloc(ctx, total);
  1810. pos = node_cst_coef_offset(dst);
  1811. isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s);
  1812. pos = node_par_coef_offset(dst);
  1813. isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s);
  1814. pos = node_var_coef_pos(dst, 0);
  1815. isl_dim_map_range(dim_map, pos, -2, offset + src->nvar, 1,
  1816. dst->nvar, -s);
  1817. isl_dim_map_range(dim_map, pos + 1, -2, offset + src->nvar, 1,
  1818. dst->nvar, s);
  1819. pos = node_cst_coef_offset(src);
  1820. isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, -s);
  1821. pos = node_par_coef_offset(src);
  1822. isl_dim_map_range(dim_map, pos, 1, 1, 1, src->nparam, -s);
  1823. pos = node_var_coef_pos(src, 0);
  1824. isl_dim_map_range(dim_map, pos, -2, offset, 1, src->nvar, s);
  1825. isl_dim_map_range(dim_map, pos + 1, -2, offset, 1, src->nvar, -s);
  1826. return dim_map;
  1827. }
  1828. /* Add the constraints from "src" to "dst" using "dim_map",
  1829. * after making sure there is enough room in "dst" for the extra constraints.
  1830. */
  1831. static __isl_give isl_basic_set *add_constraints_dim_map(
  1832. __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src,
  1833. __isl_take isl_dim_map *dim_map)
  1834. {
  1835. isl_size n_eq, n_ineq;
  1836. n_eq = isl_basic_set_n_equality(src);
  1837. n_ineq = isl_basic_set_n_inequality(src);
  1838. if (n_eq < 0 || n_ineq < 0)
  1839. dst = isl_basic_set_free(dst);
  1840. dst = isl_basic_set_extend_constraints(dst, n_eq, n_ineq);
  1841. dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map);
  1842. return dst;
  1843. }
  1844. /* Add constraints to graph->lp that force validity for the given
  1845. * dependence from a node i to itself.
  1846. * That is, add constraints that enforce
  1847. *
  1848. * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x)
  1849. * = c_i_x (y - x) >= 0
  1850. *
  1851. * for each (x,y) in R.
  1852. * We obtain general constraints on coefficients (c_0, c_x)
  1853. * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-),
  1854. * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative.
  1855. * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart.
  1856. * Note that the result of intra_coefficients may also contain
  1857. * parameter coefficients c_n, in which case 0 is plugged in for them as well.
  1858. */
  1859. static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph,
  1860. struct isl_sched_edge *edge)
  1861. {
  1862. isl_size offset;
  1863. isl_map *map = isl_map_copy(edge->map);
  1864. isl_ctx *ctx = isl_map_get_ctx(map);
  1865. isl_dim_map *dim_map;
  1866. isl_basic_set *coef;
  1867. struct isl_sched_node *node = edge->src;
  1868. coef = intra_coefficients(graph, node, map, 0);
  1869. offset = coef_var_offset(coef);
  1870. if (offset < 0)
  1871. coef = isl_basic_set_free(coef);
  1872. if (!coef)
  1873. return isl_stat_error;
  1874. dim_map = intra_dim_map(ctx, graph, node, offset, 1);
  1875. graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
  1876. return isl_stat_ok;
  1877. }
  1878. /* Add constraints to graph->lp that force validity for the given
  1879. * dependence from node i to node j.
  1880. * That is, add constraints that enforce
  1881. *
  1882. * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0
  1883. *
  1884. * for each (x,y) in R.
  1885. * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
  1886. * of valid constraints for R and then plug in
  1887. * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-),
  1888. * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative.
  1889. * In graph->lp, the c_*^- appear before their c_*^+ counterpart.
  1890. */
  1891. static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph,
  1892. struct isl_sched_edge *edge)
  1893. {
  1894. isl_size offset;
  1895. isl_map *map;
  1896. isl_ctx *ctx;
  1897. isl_dim_map *dim_map;
  1898. isl_basic_set *coef;
  1899. struct isl_sched_node *src = edge->src;
  1900. struct isl_sched_node *dst = edge->dst;
  1901. if (!graph->lp)
  1902. return isl_stat_error;
  1903. map = isl_map_copy(edge->map);
  1904. ctx = isl_map_get_ctx(map);
  1905. coef = inter_coefficients(graph, edge, map);
  1906. offset = coef_var_offset(coef);
  1907. if (offset < 0)
  1908. coef = isl_basic_set_free(coef);
  1909. if (!coef)
  1910. return isl_stat_error;
  1911. dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
  1912. edge->start = graph->lp->n_ineq;
  1913. graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
  1914. if (!graph->lp)
  1915. return isl_stat_error;
  1916. edge->end = graph->lp->n_ineq;
  1917. return isl_stat_ok;
  1918. }
  1919. /* Add constraints to graph->lp that bound the dependence distance for the given
  1920. * dependence from a node i to itself.
  1921. * If s = 1, we add the constraint
  1922. *
  1923. * c_i_x (y - x) <= m_0 + m_n n
  1924. *
  1925. * or
  1926. *
  1927. * -c_i_x (y - x) + m_0 + m_n n >= 0
  1928. *
  1929. * for each (x,y) in R.
  1930. * If s = -1, we add the constraint
  1931. *
  1932. * -c_i_x (y - x) <= m_0 + m_n n
  1933. *
  1934. * or
  1935. *
  1936. * c_i_x (y - x) + m_0 + m_n n >= 0
  1937. *
  1938. * for each (x,y) in R.
  1939. * We obtain general constraints on coefficients (c_0, c_n, c_x)
  1940. * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x),
  1941. * with each coefficient (except m_0) represented as a pair of non-negative
  1942. * coefficients.
  1943. *
  1944. *
  1945. * If "local" is set, then we add constraints
  1946. *
  1947. * c_i_x (y - x) <= 0
  1948. *
  1949. * or
  1950. *
  1951. * -c_i_x (y - x) <= 0
  1952. *
  1953. * instead, forcing the dependence distance to be (less than or) equal to 0.
  1954. * That is, we plug in (0, 0, -s * c_i_x),
  1955. * intra_coefficients is not required to have c_n in its result when
  1956. * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in.
  1957. * Note that dependences marked local are treated as validity constraints
  1958. * by add_all_validity_constraints and therefore also have
  1959. * their distances bounded by 0 from below.
  1960. */
  1961. static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph,
  1962. struct isl_sched_edge *edge, int s, int local)
  1963. {
  1964. isl_size offset;
  1965. isl_size nparam;
  1966. isl_map *map = isl_map_copy(edge->map);
  1967. isl_ctx *ctx = isl_map_get_ctx(map);
  1968. isl_dim_map *dim_map;
  1969. isl_basic_set *coef;
  1970. struct isl_sched_node *node = edge->src;
  1971. coef = intra_coefficients(graph, node, map, !local);
  1972. nparam = isl_space_dim(node->space, isl_dim_param);
  1973. offset = coef_var_offset(coef);
  1974. if (nparam < 0 || offset < 0)
  1975. coef = isl_basic_set_free(coef);
  1976. if (!coef)
  1977. return isl_stat_error;
  1978. dim_map = intra_dim_map(ctx, graph, node, offset, -s);
  1979. if (!local) {
  1980. isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
  1981. isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
  1982. isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
  1983. }
  1984. graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
  1985. return isl_stat_ok;
  1986. }
  1987. /* Add constraints to graph->lp that bound the dependence distance for the given
  1988. * dependence from node i to node j.
  1989. * If s = 1, we add the constraint
  1990. *
  1991. * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)
  1992. * <= m_0 + m_n n
  1993. *
  1994. * or
  1995. *
  1996. * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) +
  1997. * m_0 + m_n n >= 0
  1998. *
  1999. * for each (x,y) in R.
  2000. * If s = -1, we add the constraint
  2001. *
  2002. * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x))
  2003. * <= m_0 + m_n n
  2004. *
  2005. * or
  2006. *
  2007. * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) +
  2008. * m_0 + m_n n >= 0
  2009. *
  2010. * for each (x,y) in R.
  2011. * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y)
  2012. * of valid constraints for R and then plug in
  2013. * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n,
  2014. * s*c_i_x, -s*c_j_x)
  2015. * with each coefficient (except m_0, c_*_0 and c_*_n)
  2016. * represented as a pair of non-negative coefficients.
  2017. *
  2018. *
  2019. * If "local" is set (and s = 1), then we add constraints
  2020. *
  2021. * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0
  2022. *
  2023. * or
  2024. *
  2025. * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0
  2026. *
  2027. * instead, forcing the dependence distance to be (less than or) equal to 0.
  2028. * That is, we plug in
  2029. * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x).
  2030. * Note that dependences marked local are treated as validity constraints
  2031. * by add_all_validity_constraints and therefore also have
  2032. * their distances bounded by 0 from below.
  2033. */
  2034. static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph,
  2035. struct isl_sched_edge *edge, int s, int local)
  2036. {
  2037. isl_size offset;
  2038. isl_size nparam;
  2039. isl_map *map = isl_map_copy(edge->map);
  2040. isl_ctx *ctx = isl_map_get_ctx(map);
  2041. isl_dim_map *dim_map;
  2042. isl_basic_set *coef;
  2043. struct isl_sched_node *src = edge->src;
  2044. struct isl_sched_node *dst = edge->dst;
  2045. coef = inter_coefficients(graph, edge, map);
  2046. nparam = isl_space_dim(src->space, isl_dim_param);
  2047. offset = coef_var_offset(coef);
  2048. if (nparam < 0 || offset < 0)
  2049. coef = isl_basic_set_free(coef);
  2050. if (!coef)
  2051. return isl_stat_error;
  2052. dim_map = inter_dim_map(ctx, graph, src, dst, offset, -s);
  2053. if (!local) {
  2054. isl_dim_map_range(dim_map, 1, 0, 0, 0, 1, 1);
  2055. isl_dim_map_range(dim_map, 4, 2, 1, 1, nparam, -1);
  2056. isl_dim_map_range(dim_map, 5, 2, 1, 1, nparam, 1);
  2057. }
  2058. graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
  2059. return isl_stat_ok;
  2060. }
  2061. /* Should the distance over "edge" be forced to zero?
  2062. * That is, is it marked as a local edge?
  2063. * If "use_coincidence" is set, then coincidence edges are treated
  2064. * as local edges.
  2065. */
  2066. static int force_zero(struct isl_sched_edge *edge, int use_coincidence)
  2067. {
  2068. return is_local(edge) || (use_coincidence && is_coincidence(edge));
  2069. }
  2070. /* Add all validity constraints to graph->lp.
  2071. *
  2072. * An edge that is forced to be local needs to have its dependence
  2073. * distances equal to zero. We take care of bounding them by 0 from below
  2074. * here. add_all_proximity_constraints takes care of bounding them by 0
  2075. * from above.
  2076. *
  2077. * If "use_coincidence" is set, then we treat coincidence edges as local edges.
  2078. * Otherwise, we ignore them.
  2079. */
  2080. static int add_all_validity_constraints(struct isl_sched_graph *graph,
  2081. int use_coincidence)
  2082. {
  2083. int i;
  2084. for (i = 0; i < graph->n_edge; ++i) {
  2085. struct isl_sched_edge *edge = &graph->edge[i];
  2086. int zero;
  2087. zero = force_zero(edge, use_coincidence);
  2088. if (!is_validity(edge) && !zero)
  2089. continue;
  2090. if (edge->src != edge->dst)
  2091. continue;
  2092. if (add_intra_validity_constraints(graph, edge) < 0)
  2093. return -1;
  2094. }
  2095. for (i = 0; i < graph->n_edge; ++i) {
  2096. struct isl_sched_edge *edge = &graph->edge[i];
  2097. int zero;
  2098. zero = force_zero(edge, use_coincidence);
  2099. if (!is_validity(edge) && !zero)
  2100. continue;
  2101. if (edge->src == edge->dst)
  2102. continue;
  2103. if (add_inter_validity_constraints(graph, edge) < 0)
  2104. return -1;
  2105. }
  2106. return 0;
  2107. }
  2108. /* Add constraints to graph->lp that bound the dependence distance
  2109. * for all dependence relations.
  2110. * If a given proximity dependence is identical to a validity
  2111. * dependence, then the dependence distance is already bounded
  2112. * from below (by zero), so we only need to bound the distance
  2113. * from above. (This includes the case of "local" dependences
  2114. * which are treated as validity dependence by add_all_validity_constraints.)
  2115. * Otherwise, we need to bound the distance both from above and from below.
  2116. *
  2117. * If "use_coincidence" is set, then we treat coincidence edges as local edges.
  2118. * Otherwise, we ignore them.
  2119. */
  2120. static int add_all_proximity_constraints(struct isl_sched_graph *graph,
  2121. int use_coincidence)
  2122. {
  2123. int i;
  2124. for (i = 0; i < graph->n_edge; ++i) {
  2125. struct isl_sched_edge *edge = &graph->edge[i];
  2126. int zero;
  2127. zero = force_zero(edge, use_coincidence);
  2128. if (!is_proximity(edge) && !zero)
  2129. continue;
  2130. if (edge->src == edge->dst &&
  2131. add_intra_proximity_constraints(graph, edge, 1, zero) < 0)
  2132. return -1;
  2133. if (edge->src != edge->dst &&
  2134. add_inter_proximity_constraints(graph, edge, 1, zero) < 0)
  2135. return -1;
  2136. if (is_validity(edge) || zero)
  2137. continue;
  2138. if (edge->src == edge->dst &&
  2139. add_intra_proximity_constraints(graph, edge, -1, 0) < 0)
  2140. return -1;
  2141. if (edge->src != edge->dst &&
  2142. add_inter_proximity_constraints(graph, edge, -1, 0) < 0)
  2143. return -1;
  2144. }
  2145. return 0;
  2146. }
  2147. /* Normalize the rows of "indep" such that all rows are lexicographically
  2148. * positive and such that each row contains as many final zeros as possible,
  2149. * given the choice for the previous rows.
  2150. * Do this by performing elementary row operations.
  2151. */
  2152. static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep)
  2153. {
  2154. indep = isl_mat_reverse_gauss(indep);
  2155. indep = isl_mat_lexnonneg_rows(indep);
  2156. return indep;
  2157. }
  2158. /* Extract the linear part of the current schedule for node "node".
  2159. */
  2160. static __isl_give isl_mat *extract_linear_schedule(struct isl_sched_node *node)
  2161. {
  2162. isl_size n_row = isl_mat_rows(node->sched);
  2163. if (n_row < 0)
  2164. return NULL;
  2165. return isl_mat_sub_alloc(node->sched, 0, n_row,
  2166. 1 + node->nparam, node->nvar);
  2167. }
  2168. /* Compute a basis for the rows in the linear part of the schedule
  2169. * and extend this basis to a full basis. The remaining rows
  2170. * can then be used to force linear independence from the rows
  2171. * in the schedule.
  2172. *
  2173. * In particular, given the schedule rows S, we compute
  2174. *
  2175. * S = H Q
  2176. * S U = H
  2177. *
  2178. * with H the Hermite normal form of S. That is, all but the
  2179. * first rank columns of H are zero and so each row in S is
  2180. * a linear combination of the first rank rows of Q.
  2181. * The matrix Q can be used as a variable transformation
  2182. * that isolates the directions of S in the first rank rows.
  2183. * Transposing S U = H yields
  2184. *
  2185. * U^T S^T = H^T
  2186. *
  2187. * with all but the first rank rows of H^T zero.
  2188. * The last rows of U^T are therefore linear combinations
  2189. * of schedule coefficients that are all zero on schedule
  2190. * coefficients that are linearly dependent on the rows of S.
  2191. * At least one of these combinations is non-zero on
  2192. * linearly independent schedule coefficients.
  2193. * The rows are normalized to involve as few of the last
  2194. * coefficients as possible and to have a positive initial value.
  2195. */
  2196. static int node_update_vmap(struct isl_sched_node *node)
  2197. {
  2198. isl_mat *H, *U, *Q;
  2199. H = extract_linear_schedule(node);
  2200. H = isl_mat_left_hermite(H, 0, &U, &Q);
  2201. isl_mat_free(node->indep);
  2202. isl_mat_free(node->vmap);
  2203. node->vmap = Q;
  2204. node->indep = isl_mat_transpose(U);
  2205. node->rank = isl_mat_initial_non_zero_cols(H);
  2206. node->indep = isl_mat_drop_rows(node->indep, 0, node->rank);
  2207. node->indep = normalize_independent(node->indep);
  2208. isl_mat_free(H);
  2209. if (!node->indep || !node->vmap || node->rank < 0)
  2210. return -1;
  2211. return 0;
  2212. }
  2213. /* Is "edge" marked as a validity or a conditional validity edge?
  2214. */
  2215. static int is_any_validity(struct isl_sched_edge *edge)
  2216. {
  2217. return is_validity(edge) || is_conditional_validity(edge);
  2218. }
  2219. /* How many times should we count the constraints in "edge"?
  2220. *
  2221. * We count as follows
  2222. * validity -> 1 (>= 0)
  2223. * validity+proximity -> 2 (>= 0 and upper bound)
  2224. * proximity -> 2 (lower and upper bound)
  2225. * local(+any) -> 2 (>= 0 and <= 0)
  2226. *
  2227. * If an edge is only marked conditional_validity then it counts
  2228. * as zero since it is only checked afterwards.
  2229. *
  2230. * If "use_coincidence" is set, then we treat coincidence edges as local edges.
  2231. * Otherwise, we ignore them.
  2232. */
  2233. static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence)
  2234. {
  2235. if (is_proximity(edge) || force_zero(edge, use_coincidence))
  2236. return 2;
  2237. if (is_validity(edge))
  2238. return 1;
  2239. return 0;
  2240. }
  2241. /* How many times should the constraints in "edge" be counted
  2242. * as a parametric intra-node constraint?
  2243. *
  2244. * Only proximity edges that are not forced zero need
  2245. * coefficient constraints that include coefficients for parameters.
  2246. * If the edge is also a validity edge, then only
  2247. * an upper bound is introduced. Otherwise, both lower and upper bounds
  2248. * are introduced.
  2249. */
  2250. static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge,
  2251. int use_coincidence)
  2252. {
  2253. if (edge->src != edge->dst)
  2254. return 0;
  2255. if (!is_proximity(edge))
  2256. return 0;
  2257. if (force_zero(edge, use_coincidence))
  2258. return 0;
  2259. if (is_validity(edge))
  2260. return 1;
  2261. else
  2262. return 2;
  2263. }
  2264. /* Add "f" times the number of equality and inequality constraints of "bset"
  2265. * to "n_eq" and "n_ineq" and free "bset".
  2266. */
  2267. static isl_stat update_count(__isl_take isl_basic_set *bset,
  2268. int f, int *n_eq, int *n_ineq)
  2269. {
  2270. isl_size eq, ineq;
  2271. eq = isl_basic_set_n_equality(bset);
  2272. ineq = isl_basic_set_n_inequality(bset);
  2273. isl_basic_set_free(bset);
  2274. if (eq < 0 || ineq < 0)
  2275. return isl_stat_error;
  2276. *n_eq += eq;
  2277. *n_ineq += ineq;
  2278. return isl_stat_ok;
  2279. }
  2280. /* Count the number of equality and inequality constraints
  2281. * that will be added for the given map.
  2282. *
  2283. * The edges that require parameter coefficients are counted separately.
  2284. *
  2285. * "use_coincidence" is set if we should take into account coincidence edges.
  2286. */
  2287. static isl_stat count_map_constraints(struct isl_sched_graph *graph,
  2288. struct isl_sched_edge *edge, __isl_take isl_map *map,
  2289. int *n_eq, int *n_ineq, int use_coincidence)
  2290. {
  2291. isl_map *copy;
  2292. isl_basic_set *coef;
  2293. int f = edge_multiplicity(edge, use_coincidence);
  2294. int fp = parametric_intra_edge_multiplicity(edge, use_coincidence);
  2295. if (f == 0) {
  2296. isl_map_free(map);
  2297. return isl_stat_ok;
  2298. }
  2299. if (edge->src != edge->dst) {
  2300. coef = inter_coefficients(graph, edge, map);
  2301. return update_count(coef, f, n_eq, n_ineq);
  2302. }
  2303. if (fp > 0) {
  2304. copy = isl_map_copy(map);
  2305. coef = intra_coefficients(graph, edge->src, copy, 1);
  2306. if (update_count(coef, fp, n_eq, n_ineq) < 0)
  2307. goto error;
  2308. }
  2309. if (f > fp) {
  2310. copy = isl_map_copy(map);
  2311. coef = intra_coefficients(graph, edge->src, copy, 0);
  2312. if (update_count(coef, f - fp, n_eq, n_ineq) < 0)
  2313. goto error;
  2314. }
  2315. isl_map_free(map);
  2316. return isl_stat_ok;
  2317. error:
  2318. isl_map_free(map);
  2319. return isl_stat_error;
  2320. }
  2321. /* Count the number of equality and inequality constraints
  2322. * that will be added to the main lp problem.
  2323. * We count as follows
  2324. * validity -> 1 (>= 0)
  2325. * validity+proximity -> 2 (>= 0 and upper bound)
  2326. * proximity -> 2 (lower and upper bound)
  2327. * local(+any) -> 2 (>= 0 and <= 0)
  2328. *
  2329. * If "use_coincidence" is set, then we treat coincidence edges as local edges.
  2330. * Otherwise, we ignore them.
  2331. */
  2332. static int count_constraints(struct isl_sched_graph *graph,
  2333. int *n_eq, int *n_ineq, int use_coincidence)
  2334. {
  2335. int i;
  2336. *n_eq = *n_ineq = 0;
  2337. for (i = 0; i < graph->n_edge; ++i) {
  2338. struct isl_sched_edge *edge = &graph->edge[i];
  2339. isl_map *map = isl_map_copy(edge->map);
  2340. if (count_map_constraints(graph, edge, map, n_eq, n_ineq,
  2341. use_coincidence) < 0)
  2342. return -1;
  2343. }
  2344. return 0;
  2345. }
  2346. /* Count the number of constraints that will be added by
  2347. * add_bound_constant_constraints to bound the values of the constant terms
  2348. * and increment *n_eq and *n_ineq accordingly.
  2349. *
  2350. * In practice, add_bound_constant_constraints only adds inequalities.
  2351. */
  2352. static isl_stat count_bound_constant_constraints(isl_ctx *ctx,
  2353. struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
  2354. {
  2355. if (isl_options_get_schedule_max_constant_term(ctx) == -1)
  2356. return isl_stat_ok;
  2357. *n_ineq += graph->n;
  2358. return isl_stat_ok;
  2359. }
  2360. /* Add constraints to bound the values of the constant terms in the schedule,
  2361. * if requested by the user.
  2362. *
  2363. * The maximal value of the constant terms is defined by the option
  2364. * "schedule_max_constant_term".
  2365. */
  2366. static isl_stat add_bound_constant_constraints(isl_ctx *ctx,
  2367. struct isl_sched_graph *graph)
  2368. {
  2369. int i, k;
  2370. int max;
  2371. isl_size total;
  2372. max = isl_options_get_schedule_max_constant_term(ctx);
  2373. if (max == -1)
  2374. return isl_stat_ok;
  2375. total = isl_basic_set_dim(graph->lp, isl_dim_set);
  2376. if (total < 0)
  2377. return isl_stat_error;
  2378. for (i = 0; i < graph->n; ++i) {
  2379. struct isl_sched_node *node = &graph->node[i];
  2380. int pos;
  2381. k = isl_basic_set_alloc_inequality(graph->lp);
  2382. if (k < 0)
  2383. return isl_stat_error;
  2384. isl_seq_clr(graph->lp->ineq[k], 1 + total);
  2385. pos = node_cst_coef_offset(node);
  2386. isl_int_set_si(graph->lp->ineq[k][1 + pos], -1);
  2387. isl_int_set_si(graph->lp->ineq[k][0], max);
  2388. }
  2389. return isl_stat_ok;
  2390. }
  2391. /* Count the number of constraints that will be added by
  2392. * add_bound_coefficient_constraints and increment *n_eq and *n_ineq
  2393. * accordingly.
  2394. *
  2395. * In practice, add_bound_coefficient_constraints only adds inequalities.
  2396. */
  2397. static int count_bound_coefficient_constraints(isl_ctx *ctx,
  2398. struct isl_sched_graph *graph, int *n_eq, int *n_ineq)
  2399. {
  2400. int i;
  2401. if (isl_options_get_schedule_max_coefficient(ctx) == -1 &&
  2402. !isl_options_get_schedule_treat_coalescing(ctx))
  2403. return 0;
  2404. for (i = 0; i < graph->n; ++i)
  2405. *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar;
  2406. return 0;
  2407. }
  2408. /* Add constraints to graph->lp that bound the values of
  2409. * the parameter schedule coefficients of "node" to "max" and
  2410. * the variable schedule coefficients to the corresponding entry
  2411. * in node->max.
  2412. * In either case, a negative value means that no bound needs to be imposed.
  2413. *
  2414. * For parameter coefficients, this amounts to adding a constraint
  2415. *
  2416. * c_n <= max
  2417. *
  2418. * i.e.,
  2419. *
  2420. * -c_n + max >= 0
  2421. *
  2422. * The variables coefficients are, however, not represented directly.
  2423. * Instead, the variable coefficients c_x are written as differences
  2424. * c_x = c_x^+ - c_x^-.
  2425. * That is,
  2426. *
  2427. * -max_i <= c_x_i <= max_i
  2428. *
  2429. * is encoded as
  2430. *
  2431. * -max_i <= c_x_i^+ - c_x_i^- <= max_i
  2432. *
  2433. * or
  2434. *
  2435. * -(c_x_i^+ - c_x_i^-) + max_i >= 0
  2436. * c_x_i^+ - c_x_i^- + max_i >= 0
  2437. */
  2438. static isl_stat node_add_coefficient_constraints(isl_ctx *ctx,
  2439. struct isl_sched_graph *graph, struct isl_sched_node *node, int max)
  2440. {
  2441. int i, j, k;
  2442. isl_size total;
  2443. isl_vec *ineq;
  2444. total = isl_basic_set_dim(graph->lp, isl_dim_set);
  2445. if (total < 0)
  2446. return isl_stat_error;
  2447. for (j = 0; j < node->nparam; ++j) {
  2448. int dim;
  2449. if (max < 0)
  2450. continue;
  2451. k = isl_basic_set_alloc_inequality(graph->lp);
  2452. if (k < 0)
  2453. return isl_stat_error;
  2454. dim = 1 + node_par_coef_offset(node) + j;
  2455. isl_seq_clr(graph->lp->ineq[k], 1 + total);
  2456. isl_int_set_si(graph->lp->ineq[k][dim], -1);
  2457. isl_int_set_si(graph->lp->ineq[k][0], max);
  2458. }
  2459. ineq = isl_vec_alloc(ctx, 1 + total);
  2460. ineq = isl_vec_clr(ineq);
  2461. if (!ineq)
  2462. return isl_stat_error;
  2463. for (i = 0; i < node->nvar; ++i) {
  2464. int pos = 1 + node_var_coef_pos(node, i);
  2465. if (isl_int_is_neg(node->max->el[i]))
  2466. continue;
  2467. isl_int_set_si(ineq->el[pos], 1);
  2468. isl_int_set_si(ineq->el[pos + 1], -1);
  2469. isl_int_set(ineq->el[0], node->max->el[i]);
  2470. k = isl_basic_set_alloc_inequality(graph->lp);
  2471. if (k < 0)
  2472. goto error;
  2473. isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
  2474. isl_seq_neg(ineq->el + pos, ineq->el + pos, 2);
  2475. k = isl_basic_set_alloc_inequality(graph->lp);
  2476. if (k < 0)
  2477. goto error;
  2478. isl_seq_cpy(graph->lp->ineq[k], ineq->el, 1 + total);
  2479. isl_seq_clr(ineq->el + pos, 2);
  2480. }
  2481. isl_vec_free(ineq);
  2482. return isl_stat_ok;
  2483. error:
  2484. isl_vec_free(ineq);
  2485. return isl_stat_error;
  2486. }
  2487. /* Add constraints that bound the values of the variable and parameter
  2488. * coefficients of the schedule.
  2489. *
  2490. * The maximal value of the coefficients is defined by the option
  2491. * 'schedule_max_coefficient' and the entries in node->max.
  2492. * These latter entries are only set if either the schedule_max_coefficient
  2493. * option or the schedule_treat_coalescing option is set.
  2494. */
  2495. static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx,
  2496. struct isl_sched_graph *graph)
  2497. {
  2498. int i;
  2499. int max;
  2500. max = isl_options_get_schedule_max_coefficient(ctx);
  2501. if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx))
  2502. return isl_stat_ok;
  2503. for (i = 0; i < graph->n; ++i) {
  2504. struct isl_sched_node *node = &graph->node[i];
  2505. if (node_add_coefficient_constraints(ctx, graph, node, max) < 0)
  2506. return isl_stat_error;
  2507. }
  2508. return isl_stat_ok;
  2509. }
  2510. /* Add a constraint to graph->lp that equates the value at position
  2511. * "sum_pos" to the sum of the "n" values starting at "first".
  2512. */
  2513. static isl_stat add_sum_constraint(struct isl_sched_graph *graph,
  2514. int sum_pos, int first, int n)
  2515. {
  2516. int i, k;
  2517. isl_size total;
  2518. total = isl_basic_set_dim(graph->lp, isl_dim_set);
  2519. if (total < 0)
  2520. return isl_stat_error;
  2521. k = isl_basic_set_alloc_equality(graph->lp);
  2522. if (k < 0)
  2523. return isl_stat_error;
  2524. isl_seq_clr(graph->lp->eq[k], 1 + total);
  2525. isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
  2526. for (i = 0; i < n; ++i)
  2527. isl_int_set_si(graph->lp->eq[k][1 + first + i], 1);
  2528. return isl_stat_ok;
  2529. }
  2530. /* Add a constraint to graph->lp that equates the value at position
  2531. * "sum_pos" to the sum of the parameter coefficients of all nodes.
  2532. */
  2533. static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph,
  2534. int sum_pos)
  2535. {
  2536. int i, j, k;
  2537. isl_size total;
  2538. total = isl_basic_set_dim(graph->lp, isl_dim_set);
  2539. if (total < 0)
  2540. return isl_stat_error;
  2541. k = isl_basic_set_alloc_equality(graph->lp);
  2542. if (k < 0)
  2543. return isl_stat_error;
  2544. isl_seq_clr(graph->lp->eq[k], 1 + total);
  2545. isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
  2546. for (i = 0; i < graph->n; ++i) {
  2547. int pos = 1 + node_par_coef_offset(&graph->node[i]);
  2548. for (j = 0; j < graph->node[i].nparam; ++j)
  2549. isl_int_set_si(graph->lp->eq[k][pos + j], 1);
  2550. }
  2551. return isl_stat_ok;
  2552. }
  2553. /* Add a constraint to graph->lp that equates the value at position
  2554. * "sum_pos" to the sum of the variable coefficients of all nodes.
  2555. */
  2556. static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph,
  2557. int sum_pos)
  2558. {
  2559. int i, j, k;
  2560. isl_size total;
  2561. total = isl_basic_set_dim(graph->lp, isl_dim_set);
  2562. if (total < 0)
  2563. return isl_stat_error;
  2564. k = isl_basic_set_alloc_equality(graph->lp);
  2565. if (k < 0)
  2566. return isl_stat_error;
  2567. isl_seq_clr(graph->lp->eq[k], 1 + total);
  2568. isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1);
  2569. for (i = 0; i < graph->n; ++i) {
  2570. struct isl_sched_node *node = &graph->node[i];
  2571. int pos = 1 + node_var_coef_offset(node);
  2572. for (j = 0; j < 2 * node->nvar; ++j)
  2573. isl_int_set_si(graph->lp->eq[k][pos + j], 1);
  2574. }
  2575. return isl_stat_ok;
  2576. }
  2577. /* Construct an ILP problem for finding schedule coefficients
  2578. * that result in non-negative, but small dependence distances
  2579. * over all dependences.
  2580. * In particular, the dependence distances over proximity edges
  2581. * are bounded by m_0 + m_n n and we compute schedule coefficients
  2582. * with small values (preferably zero) of m_n and m_0.
  2583. *
  2584. * All variables of the ILP are non-negative. The actual coefficients
  2585. * may be negative, so each coefficient is represented as the difference
  2586. * of two non-negative variables. The negative part always appears
  2587. * immediately before the positive part.
  2588. * Other than that, the variables have the following order
  2589. *
  2590. * - sum of positive and negative parts of m_n coefficients
  2591. * - m_0
  2592. * - sum of all c_n coefficients
  2593. * (unconstrained when computing non-parametric schedules)
  2594. * - sum of positive and negative parts of all c_x coefficients
  2595. * - positive and negative parts of m_n coefficients
  2596. * - for each node
  2597. * - positive and negative parts of c_i_x, in opposite order
  2598. * - c_i_n (if parametric)
  2599. * - c_i_0
  2600. *
  2601. * The constraints are those from the edges plus two or three equalities
  2602. * to express the sums.
  2603. *
  2604. * If "use_coincidence" is set, then we treat coincidence edges as local edges.
  2605. * Otherwise, we ignore them.
  2606. */
  2607. static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
  2608. int use_coincidence)
  2609. {
  2610. int i;
  2611. isl_size nparam;
  2612. unsigned total;
  2613. isl_space *space;
  2614. int parametric;
  2615. int param_pos;
  2616. int n_eq, n_ineq;
  2617. parametric = ctx->opt->schedule_parametric;
  2618. nparam = isl_space_dim(graph->node[0].space, isl_dim_param);
  2619. if (nparam < 0)
  2620. return isl_stat_error;
  2621. param_pos = 4;
  2622. total = param_pos + 2 * nparam;
  2623. for (i = 0; i < graph->n; ++i) {
  2624. struct isl_sched_node *node = &graph->node[graph->sorted[i]];
  2625. if (node_update_vmap(node) < 0)
  2626. return isl_stat_error;
  2627. node->start = total;
  2628. total += 1 + node->nparam + 2 * node->nvar;
  2629. }
  2630. if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0)
  2631. return isl_stat_error;
  2632. if (count_bound_constant_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
  2633. return isl_stat_error;
  2634. if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0)
  2635. return isl_stat_error;
  2636. space = isl_space_set_alloc(ctx, 0, total);
  2637. isl_basic_set_free(graph->lp);
  2638. n_eq += 2 + parametric;
  2639. graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
  2640. if (add_sum_constraint(graph, 0, param_pos, 2 * nparam) < 0)
  2641. return isl_stat_error;
  2642. if (parametric && add_param_sum_constraint(graph, 2) < 0)
  2643. return isl_stat_error;
  2644. if (add_var_sum_constraint(graph, 3) < 0)
  2645. return isl_stat_error;
  2646. if (add_bound_constant_constraints(ctx, graph) < 0)
  2647. return isl_stat_error;
  2648. if (add_bound_coefficient_constraints(ctx, graph) < 0)
  2649. return isl_stat_error;
  2650. if (add_all_validity_constraints(graph, use_coincidence) < 0)
  2651. return isl_stat_error;
  2652. if (add_all_proximity_constraints(graph, use_coincidence) < 0)
  2653. return isl_stat_error;
  2654. return isl_stat_ok;
  2655. }
  2656. /* Analyze the conflicting constraint found by
  2657. * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity
  2658. * constraint of one of the edges between distinct nodes, living, moreover
  2659. * in distinct SCCs, then record the source and sink SCC as this may
  2660. * be a good place to cut between SCCs.
  2661. */
  2662. static int check_conflict(int con, void *user)
  2663. {
  2664. int i;
  2665. struct isl_sched_graph *graph = user;
  2666. if (graph->src_scc >= 0)
  2667. return 0;
  2668. con -= graph->lp->n_eq;
  2669. if (con >= graph->lp->n_ineq)
  2670. return 0;
  2671. for (i = 0; i < graph->n_edge; ++i) {
  2672. if (!is_validity(&graph->edge[i]))
  2673. continue;
  2674. if (graph->edge[i].src == graph->edge[i].dst)
  2675. continue;
  2676. if (graph->edge[i].src->scc == graph->edge[i].dst->scc)
  2677. continue;
  2678. if (graph->edge[i].start > con)
  2679. continue;
  2680. if (graph->edge[i].end <= con)
  2681. continue;
  2682. graph->src_scc = graph->edge[i].src->scc;
  2683. graph->dst_scc = graph->edge[i].dst->scc;
  2684. }
  2685. return 0;
  2686. }
  2687. /* Check whether the next schedule row of the given node needs to be
  2688. * non-trivial. Lower-dimensional domains may have some trivial rows,
  2689. * but as soon as the number of remaining required non-trivial rows
  2690. * is as large as the number or remaining rows to be computed,
  2691. * all remaining rows need to be non-trivial.
  2692. */
  2693. static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node)
  2694. {
  2695. return node->nvar - node->rank >= graph->maxvar - graph->n_row;
  2696. }
  2697. /* Construct a non-triviality region with triviality directions
  2698. * corresponding to the rows of "indep".
  2699. * The rows of "indep" are expressed in terms of the schedule coefficients c_i,
  2700. * while the triviality directions are expressed in terms of
  2701. * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing
  2702. * before c^+_i. Furthermore,
  2703. * the pairs of non-negative variables representing the coefficients
  2704. * are stored in the opposite order.
  2705. */
  2706. static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep)
  2707. {
  2708. isl_ctx *ctx;
  2709. isl_mat *mat;
  2710. int i, j;
  2711. isl_size n, n_var;
  2712. n = isl_mat_rows(indep);
  2713. n_var = isl_mat_cols(indep);
  2714. if (n < 0 || n_var < 0)
  2715. return NULL;
  2716. ctx = isl_mat_get_ctx(indep);
  2717. mat = isl_mat_alloc(ctx, n, 2 * n_var);
  2718. if (!mat)
  2719. return NULL;
  2720. for (i = 0; i < n; ++i) {
  2721. for (j = 0; j < n_var; ++j) {
  2722. int nj = n_var - 1 - j;
  2723. isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]);
  2724. isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]);
  2725. }
  2726. }
  2727. return mat;
  2728. }
  2729. /* Solve the ILP problem constructed in setup_lp.
  2730. * For each node such that all the remaining rows of its schedule
  2731. * need to be non-trivial, we construct a non-triviality region.
  2732. * This region imposes that the next row is independent of previous rows.
  2733. * In particular, the non-triviality region enforces that at least
  2734. * one of the linear combinations in the rows of node->indep is non-zero.
  2735. */
  2736. static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph)
  2737. {
  2738. int i;
  2739. isl_vec *sol;
  2740. isl_basic_set *lp;
  2741. for (i = 0; i < graph->n; ++i) {
  2742. struct isl_sched_node *node = &graph->node[i];
  2743. isl_mat *trivial;
  2744. graph->region[i].pos = node_var_coef_offset(node);
  2745. if (needs_row(graph, node))
  2746. trivial = construct_trivial(node->indep);
  2747. else
  2748. trivial = isl_mat_zero(ctx, 0, 0);
  2749. graph->region[i].trivial = trivial;
  2750. }
  2751. lp = isl_basic_set_copy(graph->lp);
  2752. sol = isl_tab_basic_set_non_trivial_lexmin(lp, 2, graph->n,
  2753. graph->region, &check_conflict, graph);
  2754. for (i = 0; i < graph->n; ++i)
  2755. isl_mat_free(graph->region[i].trivial);
  2756. return sol;
  2757. }
  2758. /* Extract the coefficients for the variables of "node" from "sol".
  2759. *
  2760. * Each schedule coefficient c_i_x is represented as the difference
  2761. * between two non-negative variables c_i_x^+ - c_i_x^-.
  2762. * The c_i_x^- appear before their c_i_x^+ counterpart.
  2763. * Furthermore, the order of these pairs is the opposite of that
  2764. * of the corresponding coefficients.
  2765. *
  2766. * Return c_i_x = c_i_x^+ - c_i_x^-
  2767. */
  2768. static __isl_give isl_vec *extract_var_coef(struct isl_sched_node *node,
  2769. __isl_keep isl_vec *sol)
  2770. {
  2771. int i;
  2772. int pos;
  2773. isl_vec *csol;
  2774. if (!sol)
  2775. return NULL;
  2776. csol = isl_vec_alloc(isl_vec_get_ctx(sol), node->nvar);
  2777. if (!csol)
  2778. return NULL;
  2779. pos = 1 + node_var_coef_offset(node);
  2780. for (i = 0; i < node->nvar; ++i)
  2781. isl_int_sub(csol->el[node->nvar - 1 - i],
  2782. sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]);
  2783. return csol;
  2784. }
  2785. /* Update the schedules of all nodes based on the given solution
  2786. * of the LP problem.
  2787. * The new row is added to the current band.
  2788. * All possibly negative coefficients are encoded as a difference
  2789. * of two non-negative variables, so we need to perform the subtraction
  2790. * here.
  2791. *
  2792. * If coincident is set, then the caller guarantees that the new
  2793. * row satisfies the coincidence constraints.
  2794. */
  2795. static int update_schedule(struct isl_sched_graph *graph,
  2796. __isl_take isl_vec *sol, int coincident)
  2797. {
  2798. int i, j;
  2799. isl_vec *csol = NULL;
  2800. if (!sol)
  2801. goto error;
  2802. if (sol->size == 0)
  2803. isl_die(sol->ctx, isl_error_internal,
  2804. "no solution found", goto error);
  2805. if (graph->n_total_row >= graph->max_row)
  2806. isl_die(sol->ctx, isl_error_internal,
  2807. "too many schedule rows", goto error);
  2808. for (i = 0; i < graph->n; ++i) {
  2809. struct isl_sched_node *node = &graph->node[i];
  2810. int pos;
  2811. isl_size row = isl_mat_rows(node->sched);
  2812. isl_vec_free(csol);
  2813. csol = extract_var_coef(node, sol);
  2814. if (row < 0 || !csol)
  2815. goto error;
  2816. isl_map_free(node->sched_map);
  2817. node->sched_map = NULL;
  2818. node->sched = isl_mat_add_rows(node->sched, 1);
  2819. if (!node->sched)
  2820. goto error;
  2821. pos = node_cst_coef_offset(node);
  2822. node->sched = isl_mat_set_element(node->sched,
  2823. row, 0, sol->el[1 + pos]);
  2824. pos = node_par_coef_offset(node);
  2825. for (j = 0; j < node->nparam; ++j)
  2826. node->sched = isl_mat_set_element(node->sched,
  2827. row, 1 + j, sol->el[1 + pos + j]);
  2828. for (j = 0; j < node->nvar; ++j)
  2829. node->sched = isl_mat_set_element(node->sched,
  2830. row, 1 + node->nparam + j, csol->el[j]);
  2831. node->coincident[graph->n_total_row] = coincident;
  2832. }
  2833. isl_vec_free(sol);
  2834. isl_vec_free(csol);
  2835. graph->n_row++;
  2836. graph->n_total_row++;
  2837. return 0;
  2838. error:
  2839. isl_vec_free(sol);
  2840. isl_vec_free(csol);
  2841. return -1;
  2842. }
  2843. /* Convert row "row" of node->sched into an isl_aff living in "ls"
  2844. * and return this isl_aff.
  2845. */
  2846. static __isl_give isl_aff *extract_schedule_row(__isl_take isl_local_space *ls,
  2847. struct isl_sched_node *node, int row)
  2848. {
  2849. int j;
  2850. isl_int v;
  2851. isl_aff *aff;
  2852. isl_int_init(v);
  2853. aff = isl_aff_zero_on_domain(ls);
  2854. if (isl_mat_get_element(node->sched, row, 0, &v) < 0)
  2855. goto error;
  2856. aff = isl_aff_set_constant(aff, v);
  2857. for (j = 0; j < node->nparam; ++j) {
  2858. if (isl_mat_get_element(node->sched, row, 1 + j, &v) < 0)
  2859. goto error;
  2860. aff = isl_aff_set_coefficient(aff, isl_dim_param, j, v);
  2861. }
  2862. for (j = 0; j < node->nvar; ++j) {
  2863. if (isl_mat_get_element(node->sched, row,
  2864. 1 + node->nparam + j, &v) < 0)
  2865. goto error;
  2866. aff = isl_aff_set_coefficient(aff, isl_dim_in, j, v);
  2867. }
  2868. isl_int_clear(v);
  2869. return aff;
  2870. error:
  2871. isl_int_clear(v);
  2872. isl_aff_free(aff);
  2873. return NULL;
  2874. }
  2875. /* Convert the "n" rows starting at "first" of node->sched into a multi_aff
  2876. * and return this multi_aff.
  2877. *
  2878. * The result is defined over the uncompressed node domain.
  2879. */
  2880. static __isl_give isl_multi_aff *node_extract_partial_schedule_multi_aff(
  2881. struct isl_sched_node *node, int first, int n)
  2882. {
  2883. int i;
  2884. isl_space *space;
  2885. isl_local_space *ls;
  2886. isl_aff *aff;
  2887. isl_multi_aff *ma;
  2888. isl_size nrow;
  2889. if (!node)
  2890. return NULL;
  2891. nrow = isl_mat_rows(node->sched);
  2892. if (nrow < 0)
  2893. return NULL;
  2894. if (node->compressed)
  2895. space = isl_pw_multi_aff_get_domain_space(node->decompress);
  2896. else
  2897. space = isl_space_copy(node->space);
  2898. ls = isl_local_space_from_space(isl_space_copy(space));
  2899. space = isl_space_from_domain(space);
  2900. space = isl_space_add_dims(space, isl_dim_out, n);
  2901. ma = isl_multi_aff_zero(space);
  2902. for (i = first; i < first + n; ++i) {
  2903. aff = extract_schedule_row(isl_local_space_copy(ls), node, i);
  2904. ma = isl_multi_aff_set_aff(ma, i - first, aff);
  2905. }
  2906. isl_local_space_free(ls);
  2907. if (node->compressed)
  2908. ma = isl_multi_aff_pullback_multi_aff(ma,
  2909. isl_multi_aff_copy(node->compress));
  2910. return ma;
  2911. }
  2912. /* Convert node->sched into a multi_aff and return this multi_aff.
  2913. *
  2914. * The result is defined over the uncompressed node domain.
  2915. */
  2916. static __isl_give isl_multi_aff *node_extract_schedule_multi_aff(
  2917. struct isl_sched_node *node)
  2918. {
  2919. isl_size nrow;
  2920. nrow = isl_mat_rows(node->sched);
  2921. if (nrow < 0)
  2922. return NULL;
  2923. return node_extract_partial_schedule_multi_aff(node, 0, nrow);
  2924. }
  2925. /* Convert node->sched into a map and return this map.
  2926. *
  2927. * The result is cached in node->sched_map, which needs to be released
  2928. * whenever node->sched is updated.
  2929. * It is defined over the uncompressed node domain.
  2930. */
  2931. static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node)
  2932. {
  2933. if (!node->sched_map) {
  2934. isl_multi_aff *ma;
  2935. ma = node_extract_schedule_multi_aff(node);
  2936. node->sched_map = isl_map_from_multi_aff(ma);
  2937. }
  2938. return isl_map_copy(node->sched_map);
  2939. }
  2940. /* Construct a map that can be used to update a dependence relation
  2941. * based on the current schedule.
  2942. * That is, construct a map expressing that source and sink
  2943. * are executed within the same iteration of the current schedule.
  2944. * This map can then be intersected with the dependence relation.
  2945. * This is not the most efficient way, but this shouldn't be a critical
  2946. * operation.
  2947. */
  2948. static __isl_give isl_map *specializer(struct isl_sched_node *src,
  2949. struct isl_sched_node *dst)
  2950. {
  2951. isl_map *src_sched, *dst_sched;
  2952. src_sched = node_extract_schedule(src);
  2953. dst_sched = node_extract_schedule(dst);
  2954. return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched));
  2955. }
  2956. /* Intersect the domains of the nested relations in domain and range
  2957. * of "umap" with "map".
  2958. */
  2959. static __isl_give isl_union_map *intersect_domains(
  2960. __isl_take isl_union_map *umap, __isl_keep isl_map *map)
  2961. {
  2962. isl_union_set *uset;
  2963. umap = isl_union_map_zip(umap);
  2964. uset = isl_union_set_from_set(isl_map_wrap(isl_map_copy(map)));
  2965. umap = isl_union_map_intersect_domain(umap, uset);
  2966. umap = isl_union_map_zip(umap);
  2967. return umap;
  2968. }
  2969. /* Update the dependence relation of the given edge based
  2970. * on the current schedule.
  2971. * If the dependence is carried completely by the current schedule, then
  2972. * it is removed from the edge_tables. It is kept in the list of edges
  2973. * as otherwise all edge_tables would have to be recomputed.
  2974. *
  2975. * If the edge is of a type that can appear multiple times
  2976. * between the same pair of nodes, then it is added to
  2977. * the edge table (again). This prevents the situation
  2978. * where none of these edges is referenced from the edge table
  2979. * because the one that was referenced turned out to be empty and
  2980. * was therefore removed from the table.
  2981. */
  2982. static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph,
  2983. struct isl_sched_edge *edge)
  2984. {
  2985. int empty;
  2986. isl_map *id;
  2987. id = specializer(edge->src, edge->dst);
  2988. edge->map = isl_map_intersect(edge->map, isl_map_copy(id));
  2989. if (!edge->map)
  2990. goto error;
  2991. if (edge->tagged_condition) {
  2992. edge->tagged_condition =
  2993. intersect_domains(edge->tagged_condition, id);
  2994. if (!edge->tagged_condition)
  2995. goto error;
  2996. }
  2997. if (edge->tagged_validity) {
  2998. edge->tagged_validity =
  2999. intersect_domains(edge->tagged_validity, id);
  3000. if (!edge->tagged_validity)
  3001. goto error;
  3002. }
  3003. empty = isl_map_plain_is_empty(edge->map);
  3004. if (empty < 0)
  3005. goto error;
  3006. if (empty) {
  3007. if (graph_remove_edge(graph, edge) < 0)
  3008. goto error;
  3009. } else if (is_multi_edge_type(edge)) {
  3010. if (graph_edge_tables_add(ctx, graph, edge) < 0)
  3011. goto error;
  3012. }
  3013. isl_map_free(id);
  3014. return isl_stat_ok;
  3015. error:
  3016. isl_map_free(id);
  3017. return isl_stat_error;
  3018. }
  3019. /* Does the domain of "umap" intersect "uset"?
  3020. */
  3021. static int domain_intersects(__isl_keep isl_union_map *umap,
  3022. __isl_keep isl_union_set *uset)
  3023. {
  3024. int empty;
  3025. umap = isl_union_map_copy(umap);
  3026. umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(uset));
  3027. empty = isl_union_map_is_empty(umap);
  3028. isl_union_map_free(umap);
  3029. return empty < 0 ? -1 : !empty;
  3030. }
  3031. /* Does the range of "umap" intersect "uset"?
  3032. */
  3033. static int range_intersects(__isl_keep isl_union_map *umap,
  3034. __isl_keep isl_union_set *uset)
  3035. {
  3036. int empty;
  3037. umap = isl_union_map_copy(umap);
  3038. umap = isl_union_map_intersect_range(umap, isl_union_set_copy(uset));
  3039. empty = isl_union_map_is_empty(umap);
  3040. isl_union_map_free(umap);
  3041. return empty < 0 ? -1 : !empty;
  3042. }
  3043. /* Are the condition dependences of "edge" local with respect to
  3044. * the current schedule?
  3045. *
  3046. * That is, are domain and range of the condition dependences mapped
  3047. * to the same point?
  3048. *
  3049. * In other words, is the condition false?
  3050. */
  3051. static int is_condition_false(struct isl_sched_edge *edge)
  3052. {
  3053. isl_union_map *umap;
  3054. isl_map *map, *sched, *test;
  3055. int empty, local;
  3056. empty = isl_union_map_is_empty(edge->tagged_condition);
  3057. if (empty < 0 || empty)
  3058. return empty;
  3059. umap = isl_union_map_copy(edge->tagged_condition);
  3060. umap = isl_union_map_zip(umap);
  3061. umap = isl_union_set_unwrap(isl_union_map_domain(umap));
  3062. map = isl_map_from_union_map(umap);
  3063. sched = node_extract_schedule(edge->src);
  3064. map = isl_map_apply_domain(map, sched);
  3065. sched = node_extract_schedule(edge->dst);
  3066. map = isl_map_apply_range(map, sched);
  3067. test = isl_map_identity(isl_map_get_space(map));
  3068. local = isl_map_is_subset(map, test);
  3069. isl_map_free(map);
  3070. isl_map_free(test);
  3071. return local;
  3072. }
  3073. /* For each conditional validity constraint that is adjacent
  3074. * to a condition with domain in condition_source or range in condition_sink,
  3075. * turn it into an unconditional validity constraint.
  3076. */
  3077. static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph,
  3078. __isl_take isl_union_set *condition_source,
  3079. __isl_take isl_union_set *condition_sink)
  3080. {
  3081. int i;
  3082. condition_source = isl_union_set_coalesce(condition_source);
  3083. condition_sink = isl_union_set_coalesce(condition_sink);
  3084. for (i = 0; i < graph->n_edge; ++i) {
  3085. int adjacent;
  3086. isl_union_map *validity;
  3087. if (!is_conditional_validity(&graph->edge[i]))
  3088. continue;
  3089. if (is_validity(&graph->edge[i]))
  3090. continue;
  3091. validity = graph->edge[i].tagged_validity;
  3092. adjacent = domain_intersects(validity, condition_sink);
  3093. if (adjacent >= 0 && !adjacent)
  3094. adjacent = range_intersects(validity, condition_source);
  3095. if (adjacent < 0)
  3096. goto error;
  3097. if (!adjacent)
  3098. continue;
  3099. set_validity(&graph->edge[i]);
  3100. }
  3101. isl_union_set_free(condition_source);
  3102. isl_union_set_free(condition_sink);
  3103. return 0;
  3104. error:
  3105. isl_union_set_free(condition_source);
  3106. isl_union_set_free(condition_sink);
  3107. return -1;
  3108. }
  3109. /* Update the dependence relations of all edges based on the current schedule
  3110. * and enforce conditional validity constraints that are adjacent
  3111. * to satisfied condition constraints.
  3112. *
  3113. * First check if any of the condition constraints are satisfied
  3114. * (i.e., not local to the outer schedule) and keep track of
  3115. * their domain and range.
  3116. * Then update all dependence relations (which removes the non-local
  3117. * constraints).
  3118. * Finally, if any condition constraints turned out to be satisfied,
  3119. * then turn all adjacent conditional validity constraints into
  3120. * unconditional validity constraints.
  3121. */
  3122. static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph)
  3123. {
  3124. int i;
  3125. int any = 0;
  3126. isl_union_set *source, *sink;
  3127. source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
  3128. sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
  3129. for (i = 0; i < graph->n_edge; ++i) {
  3130. int local;
  3131. isl_union_set *uset;
  3132. isl_union_map *umap;
  3133. if (!is_condition(&graph->edge[i]))
  3134. continue;
  3135. if (is_local(&graph->edge[i]))
  3136. continue;
  3137. local = is_condition_false(&graph->edge[i]);
  3138. if (local < 0)
  3139. goto error;
  3140. if (local)
  3141. continue;
  3142. any = 1;
  3143. umap = isl_union_map_copy(graph->edge[i].tagged_condition);
  3144. uset = isl_union_map_domain(umap);
  3145. source = isl_union_set_union(source, uset);
  3146. umap = isl_union_map_copy(graph->edge[i].tagged_condition);
  3147. uset = isl_union_map_range(umap);
  3148. sink = isl_union_set_union(sink, uset);
  3149. }
  3150. for (i = 0; i < graph->n_edge; ++i) {
  3151. if (update_edge(ctx, graph, &graph->edge[i]) < 0)
  3152. goto error;
  3153. }
  3154. if (any)
  3155. return unconditionalize_adjacent_validity(graph, source, sink);
  3156. isl_union_set_free(source);
  3157. isl_union_set_free(sink);
  3158. return 0;
  3159. error:
  3160. isl_union_set_free(source);
  3161. isl_union_set_free(sink);
  3162. return -1;
  3163. }
  3164. static void next_band(struct isl_sched_graph *graph)
  3165. {
  3166. graph->band_start = graph->n_total_row;
  3167. }
  3168. /* Return the union of the universe domains of the nodes in "graph"
  3169. * that satisfy "pred".
  3170. */
  3171. static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx,
  3172. struct isl_sched_graph *graph,
  3173. int (*pred)(struct isl_sched_node *node, int data), int data)
  3174. {
  3175. int i;
  3176. isl_set *set;
  3177. isl_union_set *dom;
  3178. for (i = 0; i < graph->n; ++i)
  3179. if (pred(&graph->node[i], data))
  3180. break;
  3181. if (i >= graph->n)
  3182. isl_die(ctx, isl_error_internal,
  3183. "empty component", return NULL);
  3184. set = isl_set_universe(isl_space_copy(graph->node[i].space));
  3185. dom = isl_union_set_from_set(set);
  3186. for (i = i + 1; i < graph->n; ++i) {
  3187. if (!pred(&graph->node[i], data))
  3188. continue;
  3189. set = isl_set_universe(isl_space_copy(graph->node[i].space));
  3190. dom = isl_union_set_union(dom, isl_union_set_from_set(set));
  3191. }
  3192. return dom;
  3193. }
  3194. /* Return a list of unions of universe domains, where each element
  3195. * in the list corresponds to an SCC (or WCC) indexed by node->scc.
  3196. */
  3197. static __isl_give isl_union_set_list *extract_sccs(isl_ctx *ctx,
  3198. struct isl_sched_graph *graph)
  3199. {
  3200. int i;
  3201. isl_union_set_list *filters;
  3202. filters = isl_union_set_list_alloc(ctx, graph->scc);
  3203. for (i = 0; i < graph->scc; ++i) {
  3204. isl_union_set *dom;
  3205. dom = isl_sched_graph_domain(ctx, graph, &node_scc_exactly, i);
  3206. filters = isl_union_set_list_add(filters, dom);
  3207. }
  3208. return filters;
  3209. }
  3210. /* Return a list of two unions of universe domains, one for the SCCs up
  3211. * to and including graph->src_scc and another for the other SCCs.
  3212. */
  3213. static __isl_give isl_union_set_list *extract_split(isl_ctx *ctx,
  3214. struct isl_sched_graph *graph)
  3215. {
  3216. isl_union_set *dom;
  3217. isl_union_set_list *filters;
  3218. filters = isl_union_set_list_alloc(ctx, 2);
  3219. dom = isl_sched_graph_domain(ctx, graph,
  3220. &node_scc_at_most, graph->src_scc);
  3221. filters = isl_union_set_list_add(filters, dom);
  3222. dom = isl_sched_graph_domain(ctx, graph,
  3223. &node_scc_at_least, graph->src_scc + 1);
  3224. filters = isl_union_set_list_add(filters, dom);
  3225. return filters;
  3226. }
  3227. /* Copy nodes that satisfy node_pred from the src dependence graph
  3228. * to the dst dependence graph.
  3229. */
  3230. static isl_stat copy_nodes(struct isl_sched_graph *dst,
  3231. struct isl_sched_graph *src,
  3232. int (*node_pred)(struct isl_sched_node *node, int data), int data)
  3233. {
  3234. int i;
  3235. dst->n = 0;
  3236. for (i = 0; i < src->n; ++i) {
  3237. int j;
  3238. if (!node_pred(&src->node[i], data))
  3239. continue;
  3240. j = dst->n;
  3241. dst->node[j].space = isl_space_copy(src->node[i].space);
  3242. dst->node[j].compressed = src->node[i].compressed;
  3243. dst->node[j].hull = isl_set_copy(src->node[i].hull);
  3244. dst->node[j].compress =
  3245. isl_multi_aff_copy(src->node[i].compress);
  3246. dst->node[j].decompress =
  3247. isl_pw_multi_aff_copy(src->node[i].decompress);
  3248. dst->node[j].nvar = src->node[i].nvar;
  3249. dst->node[j].nparam = src->node[i].nparam;
  3250. dst->node[j].sched = isl_mat_copy(src->node[i].sched);
  3251. dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map);
  3252. dst->node[j].coincident = src->node[i].coincident;
  3253. dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes);
  3254. dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds);
  3255. dst->node[j].max = isl_vec_copy(src->node[i].max);
  3256. dst->n++;
  3257. if (!dst->node[j].space || !dst->node[j].sched)
  3258. return isl_stat_error;
  3259. if (dst->node[j].compressed &&
  3260. (!dst->node[j].hull || !dst->node[j].compress ||
  3261. !dst->node[j].decompress))
  3262. return isl_stat_error;
  3263. }
  3264. return isl_stat_ok;
  3265. }
  3266. /* Copy non-empty edges that satisfy edge_pred from the src dependence graph
  3267. * to the dst dependence graph.
  3268. * If the source or destination node of the edge is not in the destination
  3269. * graph, then it must be a backward proximity edge and it should simply
  3270. * be ignored.
  3271. */
  3272. static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst,
  3273. struct isl_sched_graph *src,
  3274. int (*edge_pred)(struct isl_sched_edge *edge, int data), int data)
  3275. {
  3276. int i;
  3277. dst->n_edge = 0;
  3278. for (i = 0; i < src->n_edge; ++i) {
  3279. struct isl_sched_edge *edge = &src->edge[i];
  3280. isl_map *map;
  3281. isl_union_map *tagged_condition;
  3282. isl_union_map *tagged_validity;
  3283. struct isl_sched_node *dst_src, *dst_dst;
  3284. if (!edge_pred(edge, data))
  3285. continue;
  3286. if (isl_map_plain_is_empty(edge->map))
  3287. continue;
  3288. dst_src = graph_find_node(ctx, dst, edge->src->space);
  3289. dst_dst = graph_find_node(ctx, dst, edge->dst->space);
  3290. if (!dst_src || !dst_dst)
  3291. return isl_stat_error;
  3292. if (!is_node(dst, dst_src) || !is_node(dst, dst_dst)) {
  3293. if (is_validity(edge) || is_conditional_validity(edge))
  3294. isl_die(ctx, isl_error_internal,
  3295. "backward (conditional) validity edge",
  3296. return isl_stat_error);
  3297. continue;
  3298. }
  3299. map = isl_map_copy(edge->map);
  3300. tagged_condition = isl_union_map_copy(edge->tagged_condition);
  3301. tagged_validity = isl_union_map_copy(edge->tagged_validity);
  3302. dst->edge[dst->n_edge].src = dst_src;
  3303. dst->edge[dst->n_edge].dst = dst_dst;
  3304. dst->edge[dst->n_edge].map = map;
  3305. dst->edge[dst->n_edge].tagged_condition = tagged_condition;
  3306. dst->edge[dst->n_edge].tagged_validity = tagged_validity;
  3307. dst->edge[dst->n_edge].types = edge->types;
  3308. dst->n_edge++;
  3309. if (edge->tagged_condition && !tagged_condition)
  3310. return isl_stat_error;
  3311. if (edge->tagged_validity && !tagged_validity)
  3312. return isl_stat_error;
  3313. if (graph_edge_tables_add(ctx, dst,
  3314. &dst->edge[dst->n_edge - 1]) < 0)
  3315. return isl_stat_error;
  3316. }
  3317. return isl_stat_ok;
  3318. }
  3319. /* Compute the maximal number of variables over all nodes.
  3320. * This is the maximal number of linearly independent schedule
  3321. * rows that we need to compute.
  3322. * Just in case we end up in a part of the dependence graph
  3323. * with only lower-dimensional domains, we make sure we will
  3324. * compute the required amount of extra linearly independent rows.
  3325. */
  3326. static int compute_maxvar(struct isl_sched_graph *graph)
  3327. {
  3328. int i;
  3329. graph->maxvar = 0;
  3330. for (i = 0; i < graph->n; ++i) {
  3331. struct isl_sched_node *node = &graph->node[i];
  3332. int nvar;
  3333. if (node_update_vmap(node) < 0)
  3334. return -1;
  3335. nvar = node->nvar + graph->n_row - node->rank;
  3336. if (nvar > graph->maxvar)
  3337. graph->maxvar = nvar;
  3338. }
  3339. return 0;
  3340. }
  3341. /* Extract the subgraph of "graph" that consists of the nodes satisfying
  3342. * "node_pred" and the edges satisfying "edge_pred" and store
  3343. * the result in "sub".
  3344. */
  3345. static isl_stat extract_sub_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
  3346. int (*node_pred)(struct isl_sched_node *node, int data),
  3347. int (*edge_pred)(struct isl_sched_edge *edge, int data),
  3348. int data, struct isl_sched_graph *sub)
  3349. {
  3350. int i, n = 0, n_edge = 0;
  3351. int t;
  3352. for (i = 0; i < graph->n; ++i)
  3353. if (node_pred(&graph->node[i], data))
  3354. ++n;
  3355. for (i = 0; i < graph->n_edge; ++i)
  3356. if (edge_pred(&graph->edge[i], data))
  3357. ++n_edge;
  3358. if (graph_alloc(ctx, sub, n, n_edge) < 0)
  3359. return isl_stat_error;
  3360. sub->root = graph->root;
  3361. if (copy_nodes(sub, graph, node_pred, data) < 0)
  3362. return isl_stat_error;
  3363. if (graph_init_table(ctx, sub) < 0)
  3364. return isl_stat_error;
  3365. for (t = 0; t <= isl_edge_last; ++t)
  3366. sub->max_edge[t] = graph->max_edge[t];
  3367. if (graph_init_edge_tables(ctx, sub) < 0)
  3368. return isl_stat_error;
  3369. if (copy_edges(ctx, sub, graph, edge_pred, data) < 0)
  3370. return isl_stat_error;
  3371. sub->n_row = graph->n_row;
  3372. sub->max_row = graph->max_row;
  3373. sub->n_total_row = graph->n_total_row;
  3374. sub->band_start = graph->band_start;
  3375. return isl_stat_ok;
  3376. }
  3377. static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
  3378. struct isl_sched_graph *graph);
  3379. static __isl_give isl_schedule_node *compute_schedule_wcc(
  3380. isl_schedule_node *node, struct isl_sched_graph *graph);
  3381. /* Compute a schedule for a subgraph of "graph". In particular, for
  3382. * the graph composed of nodes that satisfy node_pred and edges that
  3383. * that satisfy edge_pred.
  3384. * If the subgraph is known to consist of a single component, then wcc should
  3385. * be set and then we call compute_schedule_wcc on the constructed subgraph.
  3386. * Otherwise, we call compute_schedule, which will check whether the subgraph
  3387. * is connected.
  3388. *
  3389. * The schedule is inserted at "node" and the updated schedule node
  3390. * is returned.
  3391. */
  3392. static __isl_give isl_schedule_node *compute_sub_schedule(
  3393. __isl_take isl_schedule_node *node, isl_ctx *ctx,
  3394. struct isl_sched_graph *graph,
  3395. int (*node_pred)(struct isl_sched_node *node, int data),
  3396. int (*edge_pred)(struct isl_sched_edge *edge, int data),
  3397. int data, int wcc)
  3398. {
  3399. struct isl_sched_graph split = { 0 };
  3400. if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data,
  3401. &split) < 0)
  3402. goto error;
  3403. if (wcc)
  3404. node = compute_schedule_wcc(node, &split);
  3405. else
  3406. node = compute_schedule(node, &split);
  3407. graph_free(ctx, &split);
  3408. return node;
  3409. error:
  3410. graph_free(ctx, &split);
  3411. return isl_schedule_node_free(node);
  3412. }
  3413. static int edge_scc_exactly(struct isl_sched_edge *edge, int scc)
  3414. {
  3415. return edge->src->scc == scc && edge->dst->scc == scc;
  3416. }
  3417. static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc)
  3418. {
  3419. return edge->dst->scc <= scc;
  3420. }
  3421. static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc)
  3422. {
  3423. return edge->src->scc >= scc;
  3424. }
  3425. /* Reset the current band by dropping all its schedule rows.
  3426. */
  3427. static isl_stat reset_band(struct isl_sched_graph *graph)
  3428. {
  3429. int i;
  3430. int drop;
  3431. drop = graph->n_total_row - graph->band_start;
  3432. graph->n_total_row -= drop;
  3433. graph->n_row -= drop;
  3434. for (i = 0; i < graph->n; ++i) {
  3435. struct isl_sched_node *node = &graph->node[i];
  3436. isl_map_free(node->sched_map);
  3437. node->sched_map = NULL;
  3438. node->sched = isl_mat_drop_rows(node->sched,
  3439. graph->band_start, drop);
  3440. if (!node->sched)
  3441. return isl_stat_error;
  3442. }
  3443. return isl_stat_ok;
  3444. }
  3445. /* Split the current graph into two parts and compute a schedule for each
  3446. * part individually. In particular, one part consists of all SCCs up
  3447. * to and including graph->src_scc, while the other part contains the other
  3448. * SCCs. The split is enforced by a sequence node inserted at position "node"
  3449. * in the schedule tree. Return the updated schedule node.
  3450. * If either of these two parts consists of a sequence, then it is spliced
  3451. * into the sequence containing the two parts.
  3452. *
  3453. * The current band is reset. It would be possible to reuse
  3454. * the previously computed rows as the first rows in the next
  3455. * band, but recomputing them may result in better rows as we are looking
  3456. * at a smaller part of the dependence graph.
  3457. */
  3458. static __isl_give isl_schedule_node *compute_split_schedule(
  3459. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  3460. {
  3461. int is_seq;
  3462. isl_ctx *ctx;
  3463. isl_union_set_list *filters;
  3464. if (!node)
  3465. return NULL;
  3466. if (reset_band(graph) < 0)
  3467. return isl_schedule_node_free(node);
  3468. next_band(graph);
  3469. ctx = isl_schedule_node_get_ctx(node);
  3470. filters = extract_split(ctx, graph);
  3471. node = isl_schedule_node_insert_sequence(node, filters);
  3472. node = isl_schedule_node_child(node, 1);
  3473. node = isl_schedule_node_child(node, 0);
  3474. node = compute_sub_schedule(node, ctx, graph,
  3475. &node_scc_at_least, &edge_src_scc_at_least,
  3476. graph->src_scc + 1, 0);
  3477. is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
  3478. node = isl_schedule_node_parent(node);
  3479. node = isl_schedule_node_parent(node);
  3480. if (is_seq)
  3481. node = isl_schedule_node_sequence_splice_child(node, 1);
  3482. node = isl_schedule_node_child(node, 0);
  3483. node = isl_schedule_node_child(node, 0);
  3484. node = compute_sub_schedule(node, ctx, graph,
  3485. &node_scc_at_most, &edge_dst_scc_at_most,
  3486. graph->src_scc, 0);
  3487. is_seq = isl_schedule_node_get_type(node) == isl_schedule_node_sequence;
  3488. node = isl_schedule_node_parent(node);
  3489. node = isl_schedule_node_parent(node);
  3490. if (is_seq)
  3491. node = isl_schedule_node_sequence_splice_child(node, 0);
  3492. return node;
  3493. }
  3494. /* Insert a band node at position "node" in the schedule tree corresponding
  3495. * to the current band in "graph". Mark the band node permutable
  3496. * if "permutable" is set.
  3497. * The partial schedules and the coincidence property are extracted
  3498. * from the graph nodes.
  3499. * Return the updated schedule node.
  3500. */
  3501. static __isl_give isl_schedule_node *insert_current_band(
  3502. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
  3503. int permutable)
  3504. {
  3505. int i;
  3506. int start, end, n;
  3507. isl_multi_aff *ma;
  3508. isl_multi_pw_aff *mpa;
  3509. isl_multi_union_pw_aff *mupa;
  3510. if (!node)
  3511. return NULL;
  3512. if (graph->n < 1)
  3513. isl_die(isl_schedule_node_get_ctx(node), isl_error_internal,
  3514. "graph should have at least one node",
  3515. return isl_schedule_node_free(node));
  3516. start = graph->band_start;
  3517. end = graph->n_total_row;
  3518. n = end - start;
  3519. ma = node_extract_partial_schedule_multi_aff(&graph->node[0], start, n);
  3520. mpa = isl_multi_pw_aff_from_multi_aff(ma);
  3521. mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
  3522. for (i = 1; i < graph->n; ++i) {
  3523. isl_multi_union_pw_aff *mupa_i;
  3524. ma = node_extract_partial_schedule_multi_aff(&graph->node[i],
  3525. start, n);
  3526. mpa = isl_multi_pw_aff_from_multi_aff(ma);
  3527. mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa);
  3528. mupa = isl_multi_union_pw_aff_union_add(mupa, mupa_i);
  3529. }
  3530. node = isl_schedule_node_insert_partial_schedule(node, mupa);
  3531. for (i = 0; i < n; ++i)
  3532. node = isl_schedule_node_band_member_set_coincident(node, i,
  3533. graph->node[0].coincident[start + i]);
  3534. node = isl_schedule_node_band_set_permutable(node, permutable);
  3535. return node;
  3536. }
  3537. /* Update the dependence relations based on the current schedule,
  3538. * add the current band to "node" and then continue with the computation
  3539. * of the next band.
  3540. * Return the updated schedule node.
  3541. */
  3542. static __isl_give isl_schedule_node *compute_next_band(
  3543. __isl_take isl_schedule_node *node,
  3544. struct isl_sched_graph *graph, int permutable)
  3545. {
  3546. isl_ctx *ctx;
  3547. if (!node)
  3548. return NULL;
  3549. ctx = isl_schedule_node_get_ctx(node);
  3550. if (update_edges(ctx, graph) < 0)
  3551. return isl_schedule_node_free(node);
  3552. node = insert_current_band(node, graph, permutable);
  3553. next_band(graph);
  3554. node = isl_schedule_node_child(node, 0);
  3555. node = compute_schedule(node, graph);
  3556. node = isl_schedule_node_parent(node);
  3557. return node;
  3558. }
  3559. /* Add the constraints "coef" derived from an edge from "node" to itself
  3560. * to graph->lp in order to respect the dependences and to try and carry them.
  3561. * "pos" is the sequence number of the edge that needs to be carried.
  3562. * "coef" represents general constraints on coefficients (c_0, c_x)
  3563. * of valid constraints for (y - x) with x and y instances of the node.
  3564. *
  3565. * The constraints added to graph->lp need to enforce
  3566. *
  3567. * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x)
  3568. * = c_j_x (y - x) >= e_i
  3569. *
  3570. * for each (x,y) in the dependence relation of the edge.
  3571. * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x),
  3572. * taking into account that each coefficient in c_j_x is represented
  3573. * as a pair of non-negative coefficients.
  3574. */
  3575. static isl_stat add_intra_constraints(struct isl_sched_graph *graph,
  3576. struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos)
  3577. {
  3578. isl_size offset;
  3579. isl_ctx *ctx;
  3580. isl_dim_map *dim_map;
  3581. offset = coef_var_offset(coef);
  3582. if (offset < 0)
  3583. coef = isl_basic_set_free(coef);
  3584. if (!coef)
  3585. return isl_stat_error;
  3586. ctx = isl_basic_set_get_ctx(coef);
  3587. dim_map = intra_dim_map(ctx, graph, node, offset, 1);
  3588. isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
  3589. graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
  3590. return isl_stat_ok;
  3591. }
  3592. /* Add the constraints "coef" derived from an edge from "src" to "dst"
  3593. * to graph->lp in order to respect the dependences and to try and carry them.
  3594. * "pos" is the sequence number of the edge that needs to be carried or
  3595. * -1 if no attempt should be made to carry the dependences.
  3596. * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y)
  3597. * of valid constraints for (x, y) with x and y instances of "src" and "dst".
  3598. *
  3599. * The constraints added to graph->lp need to enforce
  3600. *
  3601. * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i
  3602. *
  3603. * for each (x,y) in the dependence relation of the edge or
  3604. *
  3605. * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0
  3606. *
  3607. * if pos is -1.
  3608. * That is,
  3609. * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
  3610. * or
  3611. * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x)
  3612. * needs to be plugged in for (c_0, c_n, c_x, c_y),
  3613. * taking into account that each coefficient in c_j_x and c_k_x is represented
  3614. * as a pair of non-negative coefficients.
  3615. */
  3616. static isl_stat add_inter_constraints(struct isl_sched_graph *graph,
  3617. struct isl_sched_node *src, struct isl_sched_node *dst,
  3618. __isl_take isl_basic_set *coef, int pos)
  3619. {
  3620. isl_size offset;
  3621. isl_ctx *ctx;
  3622. isl_dim_map *dim_map;
  3623. offset = coef_var_offset(coef);
  3624. if (offset < 0)
  3625. coef = isl_basic_set_free(coef);
  3626. if (!coef)
  3627. return isl_stat_error;
  3628. ctx = isl_basic_set_get_ctx(coef);
  3629. dim_map = inter_dim_map(ctx, graph, src, dst, offset, 1);
  3630. if (pos >= 0)
  3631. isl_dim_map_range(dim_map, 3 + pos, 0, 0, 0, 1, -1);
  3632. graph->lp = add_constraints_dim_map(graph->lp, coef, dim_map);
  3633. return isl_stat_ok;
  3634. }
  3635. /* Data structure for keeping track of the data needed
  3636. * to exploit non-trivial lineality spaces.
  3637. *
  3638. * "any_non_trivial" is true if there are any non-trivial lineality spaces.
  3639. * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL.
  3640. * "equivalent" connects instances to other instances on the same line(s).
  3641. * "mask" contains the domain spaces of "equivalent".
  3642. * Any instance set not in "mask" does not have a non-trivial lineality space.
  3643. */
  3644. struct isl_exploit_lineality_data {
  3645. isl_bool any_non_trivial;
  3646. isl_union_map *equivalent;
  3647. isl_union_set *mask;
  3648. };
  3649. /* Data structure collecting information used during the construction
  3650. * of an LP for carrying dependences.
  3651. *
  3652. * "intra" is a sequence of coefficient constraints for intra-node edges.
  3653. * "inter" is a sequence of coefficient constraints for inter-node edges.
  3654. * "lineality" contains data used to exploit non-trivial lineality spaces.
  3655. */
  3656. struct isl_carry {
  3657. isl_basic_set_list *intra;
  3658. isl_basic_set_list *inter;
  3659. struct isl_exploit_lineality_data lineality;
  3660. };
  3661. /* Free all the data stored in "carry".
  3662. */
  3663. static void isl_carry_clear(struct isl_carry *carry)
  3664. {
  3665. isl_basic_set_list_free(carry->intra);
  3666. isl_basic_set_list_free(carry->inter);
  3667. isl_union_map_free(carry->lineality.equivalent);
  3668. isl_union_set_free(carry->lineality.mask);
  3669. }
  3670. /* Return a pointer to the node in "graph" that lives in "space".
  3671. * If the requested node has been compressed, then "space"
  3672. * corresponds to the compressed space.
  3673. * The graph is assumed to have such a node.
  3674. * Return NULL in case of error.
  3675. *
  3676. * First try and see if "space" is the space of an uncompressed node.
  3677. * If so, return that node.
  3678. * Otherwise, "space" was constructed by construct_compressed_id and
  3679. * contains a user pointer pointing to the node in the tuple id.
  3680. * However, this node belongs to the original dependence graph.
  3681. * If "graph" is a subgraph of this original dependence graph,
  3682. * then the node with the same space still needs to be looked up
  3683. * in the current graph.
  3684. */
  3685. static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx,
  3686. struct isl_sched_graph *graph, __isl_keep isl_space *space)
  3687. {
  3688. isl_id *id;
  3689. struct isl_sched_node *node;
  3690. if (!space)
  3691. return NULL;
  3692. node = graph_find_node(ctx, graph, space);
  3693. if (!node)
  3694. return NULL;
  3695. if (is_node(graph, node))
  3696. return node;
  3697. id = isl_space_get_tuple_id(space, isl_dim_set);
  3698. node = isl_id_get_user(id);
  3699. isl_id_free(id);
  3700. if (!node)
  3701. return NULL;
  3702. if (!is_node(graph->root, node))
  3703. isl_die(ctx, isl_error_internal,
  3704. "space points to invalid node", return NULL);
  3705. if (graph != graph->root)
  3706. node = graph_find_node(ctx, graph, node->space);
  3707. if (!is_node(graph, node))
  3708. isl_die(ctx, isl_error_internal,
  3709. "unable to find node", return NULL);
  3710. return node;
  3711. }
  3712. /* Internal data structure for add_all_constraints.
  3713. *
  3714. * "graph" is the schedule constraint graph for which an LP problem
  3715. * is being constructed.
  3716. * "carry_inter" indicates whether inter-node edges should be carried.
  3717. * "pos" is the position of the next edge that needs to be carried.
  3718. */
  3719. struct isl_add_all_constraints_data {
  3720. isl_ctx *ctx;
  3721. struct isl_sched_graph *graph;
  3722. int carry_inter;
  3723. int pos;
  3724. };
  3725. /* Add the constraints "coef" derived from an edge from a node to itself
  3726. * to data->graph->lp in order to respect the dependences and
  3727. * to try and carry them.
  3728. *
  3729. * The space of "coef" is of the form
  3730. *
  3731. * coefficients[[c_cst] -> S[c_x]]
  3732. *
  3733. * with S[c_x] the (compressed) space of the node.
  3734. * Extract the node from the space and call add_intra_constraints.
  3735. */
  3736. static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user)
  3737. {
  3738. struct isl_add_all_constraints_data *data = user;
  3739. isl_space *space;
  3740. struct isl_sched_node *node;
  3741. space = isl_basic_set_get_space(coef);
  3742. space = isl_space_range(isl_space_unwrap(space));
  3743. node = graph_find_compressed_node(data->ctx, data->graph, space);
  3744. isl_space_free(space);
  3745. return add_intra_constraints(data->graph, node, coef, data->pos++);
  3746. }
  3747. /* Add the constraints "coef" derived from an edge from a node j
  3748. * to a node k to data->graph->lp in order to respect the dependences and
  3749. * to try and carry them (provided data->carry_inter is set).
  3750. *
  3751. * The space of "coef" is of the form
  3752. *
  3753. * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]]
  3754. *
  3755. * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes.
  3756. * Extract the nodes from the space and call add_inter_constraints.
  3757. */
  3758. static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user)
  3759. {
  3760. struct isl_add_all_constraints_data *data = user;
  3761. isl_space *space, *dom;
  3762. struct isl_sched_node *src, *dst;
  3763. int pos;
  3764. space = isl_basic_set_get_space(coef);
  3765. space = isl_space_unwrap(isl_space_range(isl_space_unwrap(space)));
  3766. dom = isl_space_domain(isl_space_copy(space));
  3767. src = graph_find_compressed_node(data->ctx, data->graph, dom);
  3768. isl_space_free(dom);
  3769. space = isl_space_range(space);
  3770. dst = graph_find_compressed_node(data->ctx, data->graph, space);
  3771. isl_space_free(space);
  3772. pos = data->carry_inter ? data->pos++ : -1;
  3773. return add_inter_constraints(data->graph, src, dst, coef, pos);
  3774. }
  3775. /* Add constraints to graph->lp that force all (conditional) validity
  3776. * dependences to be respected and attempt to carry them.
  3777. * "intra" is the sequence of coefficient constraints for intra-node edges.
  3778. * "inter" is the sequence of coefficient constraints for inter-node edges.
  3779. * "carry_inter" indicates whether inter-node edges should be carried or
  3780. * only respected.
  3781. */
  3782. static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph,
  3783. __isl_keep isl_basic_set_list *intra,
  3784. __isl_keep isl_basic_set_list *inter, int carry_inter)
  3785. {
  3786. struct isl_add_all_constraints_data data = { ctx, graph, carry_inter };
  3787. data.pos = 0;
  3788. if (isl_basic_set_list_foreach(intra, &lp_add_intra, &data) < 0)
  3789. return isl_stat_error;
  3790. if (isl_basic_set_list_foreach(inter, &lp_add_inter, &data) < 0)
  3791. return isl_stat_error;
  3792. return isl_stat_ok;
  3793. }
  3794. /* Internal data structure for count_all_constraints
  3795. * for keeping track of the number of equality and inequality constraints.
  3796. */
  3797. struct isl_sched_count {
  3798. int n_eq;
  3799. int n_ineq;
  3800. };
  3801. /* Add the number of equality and inequality constraints of "bset"
  3802. * to data->n_eq and data->n_ineq.
  3803. */
  3804. static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user)
  3805. {
  3806. struct isl_sched_count *data = user;
  3807. return update_count(bset, 1, &data->n_eq, &data->n_ineq);
  3808. }
  3809. /* Count the number of equality and inequality constraints
  3810. * that will be added to the carry_lp problem.
  3811. * We count each edge exactly once.
  3812. * "intra" is the sequence of coefficient constraints for intra-node edges.
  3813. * "inter" is the sequence of coefficient constraints for inter-node edges.
  3814. */
  3815. static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra,
  3816. __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq)
  3817. {
  3818. struct isl_sched_count data;
  3819. data.n_eq = data.n_ineq = 0;
  3820. if (isl_basic_set_list_foreach(inter, &bset_update_count, &data) < 0)
  3821. return isl_stat_error;
  3822. if (isl_basic_set_list_foreach(intra, &bset_update_count, &data) < 0)
  3823. return isl_stat_error;
  3824. *n_eq = data.n_eq;
  3825. *n_ineq = data.n_ineq;
  3826. return isl_stat_ok;
  3827. }
  3828. /* Construct an LP problem for finding schedule coefficients
  3829. * such that the schedule carries as many validity dependences as possible.
  3830. * In particular, for each dependence i, we bound the dependence distance
  3831. * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum
  3832. * of all e_i's. Dependences with e_i = 0 in the solution are simply
  3833. * respected, while those with e_i > 0 (in practice e_i = 1) are carried.
  3834. * "intra" is the sequence of coefficient constraints for intra-node edges.
  3835. * "inter" is the sequence of coefficient constraints for inter-node edges.
  3836. * "n_edge" is the total number of edges.
  3837. * "carry_inter" indicates whether inter-node edges should be carried or
  3838. * only respected. That is, if "carry_inter" is not set, then
  3839. * no e_i variables are introduced for the inter-node edges.
  3840. *
  3841. * All variables of the LP are non-negative. The actual coefficients
  3842. * may be negative, so each coefficient is represented as the difference
  3843. * of two non-negative variables. The negative part always appears
  3844. * immediately before the positive part.
  3845. * Other than that, the variables have the following order
  3846. *
  3847. * - sum of (1 - e_i) over all edges
  3848. * - sum of all c_n coefficients
  3849. * (unconstrained when computing non-parametric schedules)
  3850. * - sum of positive and negative parts of all c_x coefficients
  3851. * - for each edge
  3852. * - e_i
  3853. * - for each node
  3854. * - positive and negative parts of c_i_x, in opposite order
  3855. * - c_i_n (if parametric)
  3856. * - c_i_0
  3857. *
  3858. * The constraints are those from the (validity) edges plus three equalities
  3859. * to express the sums and n_edge inequalities to express e_i <= 1.
  3860. */
  3861. static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph,
  3862. int n_edge, __isl_keep isl_basic_set_list *intra,
  3863. __isl_keep isl_basic_set_list *inter, int carry_inter)
  3864. {
  3865. int i;
  3866. int k;
  3867. isl_space *space;
  3868. unsigned total;
  3869. int n_eq, n_ineq;
  3870. total = 3 + n_edge;
  3871. for (i = 0; i < graph->n; ++i) {
  3872. struct isl_sched_node *node = &graph->node[graph->sorted[i]];
  3873. node->start = total;
  3874. total += 1 + node->nparam + 2 * node->nvar;
  3875. }
  3876. if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0)
  3877. return isl_stat_error;
  3878. space = isl_space_set_alloc(ctx, 0, total);
  3879. isl_basic_set_free(graph->lp);
  3880. n_eq += 3;
  3881. n_ineq += n_edge;
  3882. graph->lp = isl_basic_set_alloc_space(space, 0, n_eq, n_ineq);
  3883. graph->lp = isl_basic_set_set_rational(graph->lp);
  3884. k = isl_basic_set_alloc_equality(graph->lp);
  3885. if (k < 0)
  3886. return isl_stat_error;
  3887. isl_seq_clr(graph->lp->eq[k], 1 + total);
  3888. isl_int_set_si(graph->lp->eq[k][0], -n_edge);
  3889. isl_int_set_si(graph->lp->eq[k][1], 1);
  3890. for (i = 0; i < n_edge; ++i)
  3891. isl_int_set_si(graph->lp->eq[k][4 + i], 1);
  3892. if (add_param_sum_constraint(graph, 1) < 0)
  3893. return isl_stat_error;
  3894. if (add_var_sum_constraint(graph, 2) < 0)
  3895. return isl_stat_error;
  3896. for (i = 0; i < n_edge; ++i) {
  3897. k = isl_basic_set_alloc_inequality(graph->lp);
  3898. if (k < 0)
  3899. return isl_stat_error;
  3900. isl_seq_clr(graph->lp->ineq[k], 1 + total);
  3901. isl_int_set_si(graph->lp->ineq[k][4 + i], -1);
  3902. isl_int_set_si(graph->lp->ineq[k][0], 1);
  3903. }
  3904. if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0)
  3905. return isl_stat_error;
  3906. return isl_stat_ok;
  3907. }
  3908. static __isl_give isl_schedule_node *compute_component_schedule(
  3909. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
  3910. int wcc);
  3911. /* If the schedule_split_scaled option is set and if the linear
  3912. * parts of the scheduling rows for all nodes in the graphs have
  3913. * a non-trivial common divisor, then remove this
  3914. * common divisor from the linear part.
  3915. * Otherwise, insert a band node directly and continue with
  3916. * the construction of the schedule.
  3917. *
  3918. * If a non-trivial common divisor is found, then
  3919. * the linear part is reduced and the remainder is ignored.
  3920. * The pieces of the graph that are assigned different remainders
  3921. * form (groups of) strongly connected components within
  3922. * the scaled down band. If needed, they can therefore
  3923. * be ordered along this remainder in a sequence node.
  3924. * However, this ordering is not enforced here in order to allow
  3925. * the scheduler to combine some of the strongly connected components.
  3926. */
  3927. static __isl_give isl_schedule_node *split_scaled(
  3928. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  3929. {
  3930. int i;
  3931. int row;
  3932. isl_ctx *ctx;
  3933. isl_int gcd, gcd_i;
  3934. isl_size n_row;
  3935. if (!node)
  3936. return NULL;
  3937. ctx = isl_schedule_node_get_ctx(node);
  3938. if (!ctx->opt->schedule_split_scaled)
  3939. return compute_next_band(node, graph, 0);
  3940. if (graph->n <= 1)
  3941. return compute_next_band(node, graph, 0);
  3942. n_row = isl_mat_rows(graph->node[0].sched);
  3943. if (n_row < 0)
  3944. return isl_schedule_node_free(node);
  3945. isl_int_init(gcd);
  3946. isl_int_init(gcd_i);
  3947. isl_int_set_si(gcd, 0);
  3948. row = n_row - 1;
  3949. for (i = 0; i < graph->n; ++i) {
  3950. struct isl_sched_node *node = &graph->node[i];
  3951. isl_size cols = isl_mat_cols(node->sched);
  3952. if (cols < 0)
  3953. break;
  3954. isl_seq_gcd(node->sched->row[row] + 1, cols - 1, &gcd_i);
  3955. isl_int_gcd(gcd, gcd, gcd_i);
  3956. }
  3957. isl_int_clear(gcd_i);
  3958. if (i < graph->n)
  3959. goto error;
  3960. if (isl_int_cmp_si(gcd, 1) <= 0) {
  3961. isl_int_clear(gcd);
  3962. return compute_next_band(node, graph, 0);
  3963. }
  3964. for (i = 0; i < graph->n; ++i) {
  3965. struct isl_sched_node *node = &graph->node[i];
  3966. isl_int_fdiv_q(node->sched->row[row][0],
  3967. node->sched->row[row][0], gcd);
  3968. isl_int_mul(node->sched->row[row][0],
  3969. node->sched->row[row][0], gcd);
  3970. node->sched = isl_mat_scale_down_row(node->sched, row, gcd);
  3971. if (!node->sched)
  3972. goto error;
  3973. }
  3974. isl_int_clear(gcd);
  3975. return compute_next_band(node, graph, 0);
  3976. error:
  3977. isl_int_clear(gcd);
  3978. return isl_schedule_node_free(node);
  3979. }
  3980. /* Is the schedule row "sol" trivial on node "node"?
  3981. * That is, is the solution zero on the dimensions linearly independent of
  3982. * the previously found solutions?
  3983. * Return 1 if the solution is trivial, 0 if it is not and -1 on error.
  3984. *
  3985. * Each coefficient is represented as the difference between
  3986. * two non-negative values in "sol".
  3987. * We construct the schedule row s and check if it is linearly
  3988. * independent of previously computed schedule rows
  3989. * by computing T s, with T the linear combinations that are zero
  3990. * on linearly dependent schedule rows.
  3991. * If the result consists of all zeros, then the solution is trivial.
  3992. */
  3993. static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol)
  3994. {
  3995. int trivial;
  3996. isl_vec *node_sol;
  3997. if (!sol)
  3998. return -1;
  3999. if (node->nvar == node->rank)
  4000. return 0;
  4001. node_sol = extract_var_coef(node, sol);
  4002. node_sol = isl_mat_vec_product(isl_mat_copy(node->indep), node_sol);
  4003. if (!node_sol)
  4004. return -1;
  4005. trivial = isl_seq_first_non_zero(node_sol->el,
  4006. node->nvar - node->rank) == -1;
  4007. isl_vec_free(node_sol);
  4008. return trivial;
  4009. }
  4010. /* Is the schedule row "sol" trivial on any node where it should
  4011. * not be trivial?
  4012. * Return 1 if any solution is trivial, 0 if they are not and -1 on error.
  4013. */
  4014. static int is_any_trivial(struct isl_sched_graph *graph,
  4015. __isl_keep isl_vec *sol)
  4016. {
  4017. int i;
  4018. for (i = 0; i < graph->n; ++i) {
  4019. struct isl_sched_node *node = &graph->node[i];
  4020. int trivial;
  4021. if (!needs_row(graph, node))
  4022. continue;
  4023. trivial = is_trivial(node, sol);
  4024. if (trivial < 0 || trivial)
  4025. return trivial;
  4026. }
  4027. return 0;
  4028. }
  4029. /* Does the schedule represented by "sol" perform loop coalescing on "node"?
  4030. * If so, return the position of the coalesced dimension.
  4031. * Otherwise, return node->nvar or -1 on error.
  4032. *
  4033. * In particular, look for pairs of coefficients c_i and c_j such that
  4034. * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|.
  4035. * If any such pair is found, then return i.
  4036. * If size_i is infinity, then no check on c_i needs to be performed.
  4037. */
  4038. static int find_node_coalescing(struct isl_sched_node *node,
  4039. __isl_keep isl_vec *sol)
  4040. {
  4041. int i, j;
  4042. isl_int max;
  4043. isl_vec *csol;
  4044. if (node->nvar <= 1)
  4045. return node->nvar;
  4046. csol = extract_var_coef(node, sol);
  4047. if (!csol)
  4048. return -1;
  4049. isl_int_init(max);
  4050. for (i = 0; i < node->nvar; ++i) {
  4051. isl_val *v;
  4052. if (isl_int_is_zero(csol->el[i]))
  4053. continue;
  4054. v = isl_multi_val_get_val(node->sizes, i);
  4055. if (!v)
  4056. goto error;
  4057. if (!isl_val_is_int(v)) {
  4058. isl_val_free(v);
  4059. continue;
  4060. }
  4061. v = isl_val_div_ui(v, 2);
  4062. v = isl_val_ceil(v);
  4063. if (!v)
  4064. goto error;
  4065. isl_int_mul(max, v->n, csol->el[i]);
  4066. isl_val_free(v);
  4067. for (j = 0; j < node->nvar; ++j) {
  4068. if (j == i)
  4069. continue;
  4070. if (isl_int_abs_gt(csol->el[j], max))
  4071. break;
  4072. }
  4073. if (j < node->nvar)
  4074. break;
  4075. }
  4076. isl_int_clear(max);
  4077. isl_vec_free(csol);
  4078. return i;
  4079. error:
  4080. isl_int_clear(max);
  4081. isl_vec_free(csol);
  4082. return -1;
  4083. }
  4084. /* Force the schedule coefficient at position "pos" of "node" to be zero
  4085. * in "tl".
  4086. * The coefficient is encoded as the difference between two non-negative
  4087. * variables. Force these two variables to have the same value.
  4088. */
  4089. static __isl_give isl_tab_lexmin *zero_out_node_coef(
  4090. __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos)
  4091. {
  4092. int dim;
  4093. isl_ctx *ctx;
  4094. isl_vec *eq;
  4095. ctx = isl_space_get_ctx(node->space);
  4096. dim = isl_tab_lexmin_dim(tl);
  4097. if (dim < 0)
  4098. return isl_tab_lexmin_free(tl);
  4099. eq = isl_vec_alloc(ctx, 1 + dim);
  4100. eq = isl_vec_clr(eq);
  4101. if (!eq)
  4102. return isl_tab_lexmin_free(tl);
  4103. pos = 1 + node_var_coef_pos(node, pos);
  4104. isl_int_set_si(eq->el[pos], 1);
  4105. isl_int_set_si(eq->el[pos + 1], -1);
  4106. tl = isl_tab_lexmin_add_eq(tl, eq->el);
  4107. isl_vec_free(eq);
  4108. return tl;
  4109. }
  4110. /* Return the lexicographically smallest rational point in the basic set
  4111. * from which "tl" was constructed, double checking that this input set
  4112. * was not empty.
  4113. */
  4114. static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl)
  4115. {
  4116. isl_vec *sol;
  4117. sol = isl_tab_lexmin_get_solution(tl);
  4118. if (!sol)
  4119. return NULL;
  4120. if (sol->size == 0)
  4121. isl_die(isl_vec_get_ctx(sol), isl_error_internal,
  4122. "error in schedule construction",
  4123. return isl_vec_free(sol));
  4124. return sol;
  4125. }
  4126. /* Does the solution "sol" of the LP problem constructed by setup_carry_lp
  4127. * carry any of the "n_edge" groups of dependences?
  4128. * The value in the first position is the sum of (1 - e_i) over all "n_edge"
  4129. * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented
  4130. * by the edge are carried by the solution.
  4131. * If the sum of the (1 - e_i) is smaller than "n_edge" then at least
  4132. * one of those is carried.
  4133. *
  4134. * Note that despite the fact that the problem is solved using a rational
  4135. * solver, the solution is guaranteed to be integral.
  4136. * Specifically, the dependence distance lower bounds e_i (and therefore
  4137. * also their sum) are integers. See Lemma 5 of [1].
  4138. *
  4139. * Any potential denominator of the sum is cleared by this function.
  4140. * The denominator is not relevant for any of the other elements
  4141. * in the solution.
  4142. *
  4143. * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
  4144. * Problem, Part II: Multi-Dimensional Time.
  4145. * In Intl. Journal of Parallel Programming, 1992.
  4146. */
  4147. static int carries_dependences(__isl_keep isl_vec *sol, int n_edge)
  4148. {
  4149. isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]);
  4150. isl_int_set_si(sol->el[0], 1);
  4151. return isl_int_cmp_si(sol->el[1], n_edge) < 0;
  4152. }
  4153. /* Return the lexicographically smallest rational point in "lp",
  4154. * assuming that all variables are non-negative and performing some
  4155. * additional sanity checks.
  4156. * If "want_integral" is set, then compute the lexicographically smallest
  4157. * integer point instead.
  4158. * In particular, "lp" should not be empty by construction.
  4159. * Double check that this is the case.
  4160. * If dependences are not carried for any of the "n_edge" edges,
  4161. * then return an empty vector.
  4162. *
  4163. * If the schedule_treat_coalescing option is set and
  4164. * if the computed schedule performs loop coalescing on a given node,
  4165. * i.e., if it is of the form
  4166. *
  4167. * c_i i + c_j j + ...
  4168. *
  4169. * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero
  4170. * to cut out this solution. Repeat this process until no more loop
  4171. * coalescing occurs or until no more dependences can be carried.
  4172. * In the latter case, revert to the previously computed solution.
  4173. *
  4174. * If the caller requests an integral solution and if coalescing should
  4175. * be treated, then perform the coalescing treatment first as
  4176. * an integral solution computed before coalescing treatment
  4177. * would carry the same number of edges and would therefore probably
  4178. * also be coalescing.
  4179. *
  4180. * To allow the coalescing treatment to be performed first,
  4181. * the initial solution is allowed to be rational and it is only
  4182. * cut out (if needed) in the next iteration, if no coalescing measures
  4183. * were taken.
  4184. */
  4185. static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph,
  4186. __isl_take isl_basic_set *lp, int n_edge, int want_integral)
  4187. {
  4188. int i, pos, cut;
  4189. isl_ctx *ctx;
  4190. isl_tab_lexmin *tl;
  4191. isl_vec *sol = NULL, *prev;
  4192. int treat_coalescing;
  4193. int try_again;
  4194. if (!lp)
  4195. return NULL;
  4196. ctx = isl_basic_set_get_ctx(lp);
  4197. treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx);
  4198. tl = isl_tab_lexmin_from_basic_set(lp);
  4199. cut = 0;
  4200. do {
  4201. int integral;
  4202. try_again = 0;
  4203. if (cut)
  4204. tl = isl_tab_lexmin_cut_to_integer(tl);
  4205. prev = sol;
  4206. sol = non_empty_solution(tl);
  4207. if (!sol)
  4208. goto error;
  4209. integral = isl_int_is_one(sol->el[0]);
  4210. if (!carries_dependences(sol, n_edge)) {
  4211. if (!prev)
  4212. prev = isl_vec_alloc(ctx, 0);
  4213. isl_vec_free(sol);
  4214. sol = prev;
  4215. break;
  4216. }
  4217. prev = isl_vec_free(prev);
  4218. cut = want_integral && !integral;
  4219. if (cut)
  4220. try_again = 1;
  4221. if (!treat_coalescing)
  4222. continue;
  4223. for (i = 0; i < graph->n; ++i) {
  4224. struct isl_sched_node *node = &graph->node[i];
  4225. pos = find_node_coalescing(node, sol);
  4226. if (pos < 0)
  4227. goto error;
  4228. if (pos < node->nvar)
  4229. break;
  4230. }
  4231. if (i < graph->n) {
  4232. try_again = 1;
  4233. tl = zero_out_node_coef(tl, &graph->node[i], pos);
  4234. cut = 0;
  4235. }
  4236. } while (try_again);
  4237. isl_tab_lexmin_free(tl);
  4238. return sol;
  4239. error:
  4240. isl_tab_lexmin_free(tl);
  4241. isl_vec_free(prev);
  4242. isl_vec_free(sol);
  4243. return NULL;
  4244. }
  4245. /* If "edge" is an edge from a node to itself, then add the corresponding
  4246. * dependence relation to "umap".
  4247. * If "node" has been compressed, then the dependence relation
  4248. * is also compressed first.
  4249. */
  4250. static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap,
  4251. struct isl_sched_edge *edge)
  4252. {
  4253. isl_map *map;
  4254. struct isl_sched_node *node = edge->src;
  4255. if (edge->src != edge->dst)
  4256. return umap;
  4257. map = isl_map_copy(edge->map);
  4258. map = compress(map, node, node);
  4259. umap = isl_union_map_add_map(umap, map);
  4260. return umap;
  4261. }
  4262. /* If "edge" is an edge from a node to another node, then add the corresponding
  4263. * dependence relation to "umap".
  4264. * If the source or destination nodes of "edge" have been compressed,
  4265. * then the dependence relation is also compressed first.
  4266. */
  4267. static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap,
  4268. struct isl_sched_edge *edge)
  4269. {
  4270. isl_map *map;
  4271. if (edge->src == edge->dst)
  4272. return umap;
  4273. map = isl_map_copy(edge->map);
  4274. map = compress(map, edge->src, edge->dst);
  4275. umap = isl_union_map_add_map(umap, map);
  4276. return umap;
  4277. }
  4278. /* Internal data structure used by union_drop_coalescing_constraints
  4279. * to collect bounds on all relevant statements.
  4280. *
  4281. * "graph" is the schedule constraint graph for which an LP problem
  4282. * is being constructed.
  4283. * "bounds" collects the bounds.
  4284. */
  4285. struct isl_collect_bounds_data {
  4286. isl_ctx *ctx;
  4287. struct isl_sched_graph *graph;
  4288. isl_union_set *bounds;
  4289. };
  4290. /* Add the size bounds for the node with instance deltas in "set"
  4291. * to data->bounds.
  4292. */
  4293. static isl_stat collect_bounds(__isl_take isl_set *set, void *user)
  4294. {
  4295. struct isl_collect_bounds_data *data = user;
  4296. struct isl_sched_node *node;
  4297. isl_space *space;
  4298. isl_set *bounds;
  4299. space = isl_set_get_space(set);
  4300. isl_set_free(set);
  4301. node = graph_find_compressed_node(data->ctx, data->graph, space);
  4302. isl_space_free(space);
  4303. bounds = isl_set_from_basic_set(get_size_bounds(node));
  4304. data->bounds = isl_union_set_add_set(data->bounds, bounds);
  4305. return isl_stat_ok;
  4306. }
  4307. /* Drop some constraints from "delta" that could be exploited
  4308. * to construct loop coalescing schedules.
  4309. * In particular, drop those constraint that bound the difference
  4310. * to the size of the domain.
  4311. * Do this for each set/node in "delta" separately.
  4312. * The parameters are assumed to have been projected out by the caller.
  4313. */
  4314. static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx,
  4315. struct isl_sched_graph *graph, __isl_take isl_union_set *delta)
  4316. {
  4317. struct isl_collect_bounds_data data = { ctx, graph };
  4318. data.bounds = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
  4319. if (isl_union_set_foreach_set(delta, &collect_bounds, &data) < 0)
  4320. data.bounds = isl_union_set_free(data.bounds);
  4321. delta = isl_union_set_plain_gist(delta, data.bounds);
  4322. return delta;
  4323. }
  4324. /* Given a non-trivial lineality space "lineality", add the corresponding
  4325. * universe set to data->mask and add a map from elements to
  4326. * other elements along the lines in "lineality" to data->equivalent.
  4327. * If this is the first time this function gets called
  4328. * (data->any_non_trivial is still false), then set data->any_non_trivial and
  4329. * initialize data->mask and data->equivalent.
  4330. *
  4331. * In particular, if the lineality space is defined by equality constraints
  4332. *
  4333. * E x = 0
  4334. *
  4335. * then construct an affine mapping
  4336. *
  4337. * f : x -> E x
  4338. *
  4339. * and compute the equivalence relation of having the same image under f:
  4340. *
  4341. * { x -> x' : E x = E x' }
  4342. */
  4343. static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality,
  4344. struct isl_exploit_lineality_data *data)
  4345. {
  4346. isl_mat *eq;
  4347. isl_space *space;
  4348. isl_set *univ;
  4349. isl_multi_aff *ma;
  4350. isl_multi_pw_aff *mpa;
  4351. isl_map *map;
  4352. isl_size n;
  4353. if (isl_basic_set_check_no_locals(lineality) < 0)
  4354. goto error;
  4355. space = isl_basic_set_get_space(lineality);
  4356. if (!data->any_non_trivial) {
  4357. data->equivalent = isl_union_map_empty(isl_space_copy(space));
  4358. data->mask = isl_union_set_empty(isl_space_copy(space));
  4359. }
  4360. data->any_non_trivial = isl_bool_true;
  4361. univ = isl_set_universe(isl_space_copy(space));
  4362. data->mask = isl_union_set_add_set(data->mask, univ);
  4363. eq = isl_basic_set_extract_equalities(lineality);
  4364. n = isl_mat_rows(eq);
  4365. if (n < 0)
  4366. space = isl_space_free(space);
  4367. eq = isl_mat_insert_zero_rows(eq, 0, 1);
  4368. eq = isl_mat_set_element_si(eq, 0, 0, 1);
  4369. space = isl_space_from_domain(space);
  4370. space = isl_space_add_dims(space, isl_dim_out, n);
  4371. ma = isl_multi_aff_from_aff_mat(space, eq);
  4372. mpa = isl_multi_pw_aff_from_multi_aff(ma);
  4373. map = isl_multi_pw_aff_eq_map(mpa, isl_multi_pw_aff_copy(mpa));
  4374. data->equivalent = isl_union_map_add_map(data->equivalent, map);
  4375. isl_basic_set_free(lineality);
  4376. return isl_stat_ok;
  4377. error:
  4378. isl_basic_set_free(lineality);
  4379. return isl_stat_error;
  4380. }
  4381. /* Check if the lineality space "set" is non-trivial (i.e., is not just
  4382. * the origin or, in other words, satisfies a number of equality constraints
  4383. * that is smaller than the dimension of the set).
  4384. * If so, extend data->mask and data->equivalent accordingly.
  4385. *
  4386. * The input should not have any local variables already, but
  4387. * isl_set_remove_divs is called to make sure it does not.
  4388. */
  4389. static isl_stat add_lineality(__isl_take isl_set *set, void *user)
  4390. {
  4391. struct isl_exploit_lineality_data *data = user;
  4392. isl_basic_set *hull;
  4393. isl_size dim;
  4394. isl_size n_eq;
  4395. set = isl_set_remove_divs(set);
  4396. hull = isl_set_unshifted_simple_hull(set);
  4397. dim = isl_basic_set_dim(hull, isl_dim_set);
  4398. n_eq = isl_basic_set_n_equality(hull);
  4399. if (dim < 0 || n_eq < 0)
  4400. goto error;
  4401. if (dim != n_eq)
  4402. return add_non_trivial_lineality(hull, data);
  4403. isl_basic_set_free(hull);
  4404. return isl_stat_ok;
  4405. error:
  4406. isl_basic_set_free(hull);
  4407. return isl_stat_error;
  4408. }
  4409. /* Check if the difference set on intra-node schedule constraints "intra"
  4410. * has any non-trivial lineality space.
  4411. * If so, then extend the difference set to a difference set
  4412. * on equivalent elements. That is, if "intra" is
  4413. *
  4414. * { y - x : (x,y) \in V }
  4415. *
  4416. * and elements are equivalent if they have the same image under f,
  4417. * then return
  4418. *
  4419. * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
  4420. *
  4421. * or, since f is linear,
  4422. *
  4423. * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') }
  4424. *
  4425. * The results of the search for non-trivial lineality spaces is stored
  4426. * in "data".
  4427. */
  4428. static __isl_give isl_union_set *exploit_intra_lineality(
  4429. __isl_take isl_union_set *intra,
  4430. struct isl_exploit_lineality_data *data)
  4431. {
  4432. isl_union_set *lineality;
  4433. isl_union_set *uset;
  4434. data->any_non_trivial = isl_bool_false;
  4435. lineality = isl_union_set_copy(intra);
  4436. lineality = isl_union_set_combined_lineality_space(lineality);
  4437. if (isl_union_set_foreach_set(lineality, &add_lineality, data) < 0)
  4438. data->any_non_trivial = isl_bool_error;
  4439. isl_union_set_free(lineality);
  4440. if (data->any_non_trivial < 0)
  4441. return isl_union_set_free(intra);
  4442. if (!data->any_non_trivial)
  4443. return intra;
  4444. uset = isl_union_set_copy(intra);
  4445. intra = isl_union_set_subtract(intra, isl_union_set_copy(data->mask));
  4446. uset = isl_union_set_apply(uset, isl_union_map_copy(data->equivalent));
  4447. intra = isl_union_set_union(intra, uset);
  4448. intra = isl_union_set_remove_divs(intra);
  4449. return intra;
  4450. }
  4451. /* If the difference set on intra-node schedule constraints was found to have
  4452. * any non-trivial lineality space by exploit_intra_lineality,
  4453. * as recorded in "data", then extend the inter-node
  4454. * schedule constraints "inter" to schedule constraints on equivalent elements.
  4455. * That is, if "inter" is V and
  4456. * elements are equivalent if they have the same image under f, then return
  4457. *
  4458. * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') }
  4459. */
  4460. static __isl_give isl_union_map *exploit_inter_lineality(
  4461. __isl_take isl_union_map *inter,
  4462. struct isl_exploit_lineality_data *data)
  4463. {
  4464. isl_union_map *umap;
  4465. if (data->any_non_trivial < 0)
  4466. return isl_union_map_free(inter);
  4467. if (!data->any_non_trivial)
  4468. return inter;
  4469. umap = isl_union_map_copy(inter);
  4470. inter = isl_union_map_subtract_range(inter,
  4471. isl_union_set_copy(data->mask));
  4472. umap = isl_union_map_apply_range(umap,
  4473. isl_union_map_copy(data->equivalent));
  4474. inter = isl_union_map_union(inter, umap);
  4475. umap = isl_union_map_copy(inter);
  4476. inter = isl_union_map_subtract_domain(inter,
  4477. isl_union_set_copy(data->mask));
  4478. umap = isl_union_map_apply_range(isl_union_map_copy(data->equivalent),
  4479. umap);
  4480. inter = isl_union_map_union(inter, umap);
  4481. inter = isl_union_map_remove_divs(inter);
  4482. return inter;
  4483. }
  4484. /* For each (conditional) validity edge in "graph",
  4485. * add the corresponding dependence relation using "add"
  4486. * to a collection of dependence relations and return the result.
  4487. * If "coincidence" is set, then coincidence edges are considered as well.
  4488. */
  4489. static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph,
  4490. __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap,
  4491. struct isl_sched_edge *edge), int coincidence)
  4492. {
  4493. int i;
  4494. isl_space *space;
  4495. isl_union_map *umap;
  4496. space = isl_space_copy(graph->node[0].space);
  4497. umap = isl_union_map_empty(space);
  4498. for (i = 0; i < graph->n_edge; ++i) {
  4499. struct isl_sched_edge *edge = &graph->edge[i];
  4500. if (!is_any_validity(edge) &&
  4501. (!coincidence || !is_coincidence(edge)))
  4502. continue;
  4503. umap = add(umap, edge);
  4504. }
  4505. return umap;
  4506. }
  4507. /* For each dependence relation on a (conditional) validity edge
  4508. * from a node to itself,
  4509. * construct the set of coefficients of valid constraints for elements
  4510. * in that dependence relation and collect the results.
  4511. * If "coincidence" is set, then coincidence edges are considered as well.
  4512. *
  4513. * In particular, for each dependence relation R, constraints
  4514. * on coefficients (c_0, c_x) are constructed such that
  4515. *
  4516. * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R }
  4517. *
  4518. * If the schedule_treat_coalescing option is set, then some constraints
  4519. * that could be exploited to construct coalescing schedules
  4520. * are removed before the dual is computed, but after the parameters
  4521. * have been projected out.
  4522. * The entire computation is essentially the same as that performed
  4523. * by intra_coefficients, except that it operates on multiple
  4524. * edges together and that the parameters are always projected out.
  4525. *
  4526. * Additionally, exploit any non-trivial lineality space
  4527. * in the difference set after removing coalescing constraints and
  4528. * store the results of the non-trivial lineality space detection in "data".
  4529. * The procedure is currently run unconditionally, but it is unlikely
  4530. * to find any non-trivial lineality spaces if no coalescing constraints
  4531. * have been removed.
  4532. *
  4533. * Note that if a dependence relation is a union of basic maps,
  4534. * then each basic map needs to be treated individually as it may only
  4535. * be possible to carry the dependences expressed by some of those
  4536. * basic maps and not all of them.
  4537. * The collected validity constraints are therefore not coalesced and
  4538. * it is assumed that they are not coalesced automatically.
  4539. * Duplicate basic maps can be removed, however.
  4540. * In particular, if the same basic map appears as a disjunct
  4541. * in multiple edges, then it only needs to be carried once.
  4542. */
  4543. static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx,
  4544. struct isl_sched_graph *graph, int coincidence,
  4545. struct isl_exploit_lineality_data *data)
  4546. {
  4547. isl_union_map *intra;
  4548. isl_union_set *delta;
  4549. isl_basic_set_list *list;
  4550. intra = collect_validity(graph, &add_intra, coincidence);
  4551. delta = isl_union_map_deltas(intra);
  4552. delta = isl_union_set_project_out_all_params(delta);
  4553. delta = isl_union_set_remove_divs(delta);
  4554. if (isl_options_get_schedule_treat_coalescing(ctx))
  4555. delta = union_drop_coalescing_constraints(ctx, graph, delta);
  4556. delta = exploit_intra_lineality(delta, data);
  4557. list = isl_union_set_get_basic_set_list(delta);
  4558. isl_union_set_free(delta);
  4559. return isl_basic_set_list_coefficients(list);
  4560. }
  4561. /* For each dependence relation on a (conditional) validity edge
  4562. * from a node to some other node,
  4563. * construct the set of coefficients of valid constraints for elements
  4564. * in that dependence relation and collect the results.
  4565. * If "coincidence" is set, then coincidence edges are considered as well.
  4566. *
  4567. * In particular, for each dependence relation R, constraints
  4568. * on coefficients (c_0, c_n, c_x, c_y) are constructed such that
  4569. *
  4570. * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R
  4571. *
  4572. * This computation is essentially the same as that performed
  4573. * by inter_coefficients, except that it operates on multiple
  4574. * edges together.
  4575. *
  4576. * Additionally, exploit any non-trivial lineality space
  4577. * that may have been discovered by collect_intra_validity
  4578. * (as stored in "data").
  4579. *
  4580. * Note that if a dependence relation is a union of basic maps,
  4581. * then each basic map needs to be treated individually as it may only
  4582. * be possible to carry the dependences expressed by some of those
  4583. * basic maps and not all of them.
  4584. * The collected validity constraints are therefore not coalesced and
  4585. * it is assumed that they are not coalesced automatically.
  4586. * Duplicate basic maps can be removed, however.
  4587. * In particular, if the same basic map appears as a disjunct
  4588. * in multiple edges, then it only needs to be carried once.
  4589. */
  4590. static __isl_give isl_basic_set_list *collect_inter_validity(
  4591. struct isl_sched_graph *graph, int coincidence,
  4592. struct isl_exploit_lineality_data *data)
  4593. {
  4594. isl_union_map *inter;
  4595. isl_union_set *wrap;
  4596. isl_basic_set_list *list;
  4597. inter = collect_validity(graph, &add_inter, coincidence);
  4598. inter = exploit_inter_lineality(inter, data);
  4599. inter = isl_union_map_remove_divs(inter);
  4600. wrap = isl_union_map_wrap(inter);
  4601. list = isl_union_set_get_basic_set_list(wrap);
  4602. isl_union_set_free(wrap);
  4603. return isl_basic_set_list_coefficients(list);
  4604. }
  4605. /* Construct an LP problem for finding schedule coefficients
  4606. * such that the schedule carries as many of the "n_edge" groups of
  4607. * dependences as possible based on the corresponding coefficient
  4608. * constraints and return the lexicographically smallest non-trivial solution.
  4609. * "intra" is the sequence of coefficient constraints for intra-node edges.
  4610. * "inter" is the sequence of coefficient constraints for inter-node edges.
  4611. * If "want_integral" is set, then compute an integral solution
  4612. * for the coefficients rather than using the numerators
  4613. * of a rational solution.
  4614. * "carry_inter" indicates whether inter-node edges should be carried or
  4615. * only respected.
  4616. *
  4617. * If none of the "n_edge" groups can be carried
  4618. * then return an empty vector.
  4619. */
  4620. static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx,
  4621. struct isl_sched_graph *graph, int n_edge,
  4622. __isl_keep isl_basic_set_list *intra,
  4623. __isl_keep isl_basic_set_list *inter, int want_integral,
  4624. int carry_inter)
  4625. {
  4626. isl_basic_set *lp;
  4627. if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0)
  4628. return NULL;
  4629. lp = isl_basic_set_copy(graph->lp);
  4630. return non_neg_lexmin(graph, lp, n_edge, want_integral);
  4631. }
  4632. /* Construct an LP problem for finding schedule coefficients
  4633. * such that the schedule carries as many of the validity dependences
  4634. * as possible and
  4635. * return the lexicographically smallest non-trivial solution.
  4636. * If "fallback" is set, then the carrying is performed as a fallback
  4637. * for the Pluto-like scheduler.
  4638. * If "coincidence" is set, then try and carry coincidence edges as well.
  4639. *
  4640. * The variable "n_edge" stores the number of groups that should be carried.
  4641. * If none of the "n_edge" groups can be carried
  4642. * then return an empty vector.
  4643. * If, moreover, "n_edge" is zero, then the LP problem does not even
  4644. * need to be constructed.
  4645. *
  4646. * If a fallback solution is being computed, then compute an integral solution
  4647. * for the coefficients rather than using the numerators
  4648. * of a rational solution.
  4649. *
  4650. * If a fallback solution is being computed, if there are any intra-node
  4651. * dependences, and if requested by the user, then first try
  4652. * to only carry those intra-node dependences.
  4653. * If this fails to carry any dependences, then try again
  4654. * with the inter-node dependences included.
  4655. */
  4656. static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx,
  4657. struct isl_sched_graph *graph, int fallback, int coincidence)
  4658. {
  4659. isl_size n_intra, n_inter;
  4660. int n_edge;
  4661. struct isl_carry carry = { 0 };
  4662. isl_vec *sol;
  4663. carry.intra = collect_intra_validity(ctx, graph, coincidence,
  4664. &carry.lineality);
  4665. carry.inter = collect_inter_validity(graph, coincidence,
  4666. &carry.lineality);
  4667. n_intra = isl_basic_set_list_n_basic_set(carry.intra);
  4668. n_inter = isl_basic_set_list_n_basic_set(carry.inter);
  4669. if (n_intra < 0 || n_inter < 0)
  4670. goto error;
  4671. if (fallback && n_intra > 0 &&
  4672. isl_options_get_schedule_carry_self_first(ctx)) {
  4673. sol = compute_carrying_sol_coef(ctx, graph, n_intra,
  4674. carry.intra, carry.inter, fallback, 0);
  4675. if (!sol || sol->size != 0 || n_inter == 0) {
  4676. isl_carry_clear(&carry);
  4677. return sol;
  4678. }
  4679. isl_vec_free(sol);
  4680. }
  4681. n_edge = n_intra + n_inter;
  4682. if (n_edge == 0) {
  4683. isl_carry_clear(&carry);
  4684. return isl_vec_alloc(ctx, 0);
  4685. }
  4686. sol = compute_carrying_sol_coef(ctx, graph, n_edge,
  4687. carry.intra, carry.inter, fallback, 1);
  4688. isl_carry_clear(&carry);
  4689. return sol;
  4690. error:
  4691. isl_carry_clear(&carry);
  4692. return NULL;
  4693. }
  4694. /* Construct a schedule row for each node such that as many validity dependences
  4695. * as possible are carried and then continue with the next band.
  4696. * If "fallback" is set, then the carrying is performed as a fallback
  4697. * for the Pluto-like scheduler.
  4698. * If "coincidence" is set, then try and carry coincidence edges as well.
  4699. *
  4700. * If there are no validity dependences, then no dependence can be carried and
  4701. * the procedure is guaranteed to fail. If there is more than one component,
  4702. * then try computing a schedule on each component separately
  4703. * to prevent or at least postpone this failure.
  4704. *
  4705. * If a schedule row is computed, then check that dependences are carried
  4706. * for at least one of the edges.
  4707. *
  4708. * If the computed schedule row turns out to be trivial on one or
  4709. * more nodes where it should not be trivial, then we throw it away
  4710. * and try again on each component separately.
  4711. *
  4712. * If there is only one component, then we accept the schedule row anyway,
  4713. * but we do not consider it as a complete row and therefore do not
  4714. * increment graph->n_row. Note that the ranks of the nodes that
  4715. * do get a non-trivial schedule part will get updated regardless and
  4716. * graph->maxvar is computed based on these ranks. The test for
  4717. * whether more schedule rows are required in compute_schedule_wcc
  4718. * is therefore not affected.
  4719. *
  4720. * Insert a band corresponding to the schedule row at position "node"
  4721. * of the schedule tree and continue with the construction of the schedule.
  4722. * This insertion and the continued construction is performed by split_scaled
  4723. * after optionally checking for non-trivial common divisors.
  4724. */
  4725. static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node,
  4726. struct isl_sched_graph *graph, int fallback, int coincidence)
  4727. {
  4728. int trivial;
  4729. isl_ctx *ctx;
  4730. isl_vec *sol;
  4731. if (!node)
  4732. return NULL;
  4733. ctx = isl_schedule_node_get_ctx(node);
  4734. sol = compute_carrying_sol(ctx, graph, fallback, coincidence);
  4735. if (!sol)
  4736. return isl_schedule_node_free(node);
  4737. if (sol->size == 0) {
  4738. isl_vec_free(sol);
  4739. if (graph->scc > 1)
  4740. return compute_component_schedule(node, graph, 1);
  4741. isl_die(ctx, isl_error_unknown, "unable to carry dependences",
  4742. return isl_schedule_node_free(node));
  4743. }
  4744. trivial = is_any_trivial(graph, sol);
  4745. if (trivial < 0) {
  4746. sol = isl_vec_free(sol);
  4747. } else if (trivial && graph->scc > 1) {
  4748. isl_vec_free(sol);
  4749. return compute_component_schedule(node, graph, 1);
  4750. }
  4751. if (update_schedule(graph, sol, 0) < 0)
  4752. return isl_schedule_node_free(node);
  4753. if (trivial)
  4754. graph->n_row--;
  4755. return split_scaled(node, graph);
  4756. }
  4757. /* Construct a schedule row for each node such that as many validity dependences
  4758. * as possible are carried and then continue with the next band.
  4759. * Do so as a fallback for the Pluto-like scheduler.
  4760. * If "coincidence" is set, then try and carry coincidence edges as well.
  4761. */
  4762. static __isl_give isl_schedule_node *carry_fallback(
  4763. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
  4764. int coincidence)
  4765. {
  4766. return carry(node, graph, 1, coincidence);
  4767. }
  4768. /* Construct a schedule row for each node such that as many validity dependences
  4769. * as possible are carried and then continue with the next band.
  4770. * Do so for the case where the Feautrier scheduler was selected
  4771. * by the user.
  4772. */
  4773. static __isl_give isl_schedule_node *carry_feautrier(
  4774. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  4775. {
  4776. return carry(node, graph, 0, 0);
  4777. }
  4778. /* Construct a schedule row for each node such that as many validity dependences
  4779. * as possible are carried and then continue with the next band.
  4780. * Do so as a fallback for the Pluto-like scheduler.
  4781. */
  4782. static __isl_give isl_schedule_node *carry_dependences(
  4783. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  4784. {
  4785. return carry_fallback(node, graph, 0);
  4786. }
  4787. /* Construct a schedule row for each node such that as many validity or
  4788. * coincidence dependences as possible are carried and
  4789. * then continue with the next band.
  4790. * Do so as a fallback for the Pluto-like scheduler.
  4791. */
  4792. static __isl_give isl_schedule_node *carry_coincidence(
  4793. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  4794. {
  4795. return carry_fallback(node, graph, 1);
  4796. }
  4797. /* Topologically sort statements mapped to the same schedule iteration
  4798. * and add insert a sequence node in front of "node"
  4799. * corresponding to this order.
  4800. * If "initialized" is set, then it may be assumed that compute_maxvar
  4801. * has been called on the current band. Otherwise, call
  4802. * compute_maxvar if and before carry_dependences gets called.
  4803. *
  4804. * If it turns out to be impossible to sort the statements apart,
  4805. * because different dependences impose different orderings
  4806. * on the statements, then we extend the schedule such that
  4807. * it carries at least one more dependence.
  4808. */
  4809. static __isl_give isl_schedule_node *sort_statements(
  4810. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
  4811. int initialized)
  4812. {
  4813. isl_ctx *ctx;
  4814. isl_union_set_list *filters;
  4815. if (!node)
  4816. return NULL;
  4817. ctx = isl_schedule_node_get_ctx(node);
  4818. if (graph->n < 1)
  4819. isl_die(ctx, isl_error_internal,
  4820. "graph should have at least one node",
  4821. return isl_schedule_node_free(node));
  4822. if (graph->n == 1)
  4823. return node;
  4824. if (update_edges(ctx, graph) < 0)
  4825. return isl_schedule_node_free(node);
  4826. if (graph->n_edge == 0)
  4827. return node;
  4828. if (detect_sccs(ctx, graph) < 0)
  4829. return isl_schedule_node_free(node);
  4830. next_band(graph);
  4831. if (graph->scc < graph->n) {
  4832. if (!initialized && compute_maxvar(graph) < 0)
  4833. return isl_schedule_node_free(node);
  4834. return carry_dependences(node, graph);
  4835. }
  4836. filters = extract_sccs(ctx, graph);
  4837. node = isl_schedule_node_insert_sequence(node, filters);
  4838. return node;
  4839. }
  4840. /* Are there any (non-empty) (conditional) validity edges in the graph?
  4841. */
  4842. static int has_validity_edges(struct isl_sched_graph *graph)
  4843. {
  4844. int i;
  4845. for (i = 0; i < graph->n_edge; ++i) {
  4846. int empty;
  4847. empty = isl_map_plain_is_empty(graph->edge[i].map);
  4848. if (empty < 0)
  4849. return -1;
  4850. if (empty)
  4851. continue;
  4852. if (is_any_validity(&graph->edge[i]))
  4853. return 1;
  4854. }
  4855. return 0;
  4856. }
  4857. /* Should we apply a Feautrier step?
  4858. * That is, did the user request the Feautrier algorithm and are
  4859. * there any validity dependences (left)?
  4860. */
  4861. static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph)
  4862. {
  4863. if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER)
  4864. return 0;
  4865. return has_validity_edges(graph);
  4866. }
  4867. /* Compute a schedule for a connected dependence graph using Feautrier's
  4868. * multi-dimensional scheduling algorithm and return the updated schedule node.
  4869. *
  4870. * The original algorithm is described in [1].
  4871. * The main idea is to minimize the number of scheduling dimensions, by
  4872. * trying to satisfy as many dependences as possible per scheduling dimension.
  4873. *
  4874. * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling
  4875. * Problem, Part II: Multi-Dimensional Time.
  4876. * In Intl. Journal of Parallel Programming, 1992.
  4877. */
  4878. static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier(
  4879. isl_schedule_node *node, struct isl_sched_graph *graph)
  4880. {
  4881. return carry_feautrier(node, graph);
  4882. }
  4883. /* Turn off the "local" bit on all (condition) edges.
  4884. */
  4885. static void clear_local_edges(struct isl_sched_graph *graph)
  4886. {
  4887. int i;
  4888. for (i = 0; i < graph->n_edge; ++i)
  4889. if (is_condition(&graph->edge[i]))
  4890. clear_local(&graph->edge[i]);
  4891. }
  4892. /* Does "graph" have both condition and conditional validity edges?
  4893. */
  4894. static int need_condition_check(struct isl_sched_graph *graph)
  4895. {
  4896. int i;
  4897. int any_condition = 0;
  4898. int any_conditional_validity = 0;
  4899. for (i = 0; i < graph->n_edge; ++i) {
  4900. if (is_condition(&graph->edge[i]))
  4901. any_condition = 1;
  4902. if (is_conditional_validity(&graph->edge[i]))
  4903. any_conditional_validity = 1;
  4904. }
  4905. return any_condition && any_conditional_validity;
  4906. }
  4907. /* Does "graph" contain any coincidence edge?
  4908. */
  4909. static int has_any_coincidence(struct isl_sched_graph *graph)
  4910. {
  4911. int i;
  4912. for (i = 0; i < graph->n_edge; ++i)
  4913. if (is_coincidence(&graph->edge[i]))
  4914. return 1;
  4915. return 0;
  4916. }
  4917. /* Extract the final schedule row as a map with the iteration domain
  4918. * of "node" as domain.
  4919. */
  4920. static __isl_give isl_map *final_row(struct isl_sched_node *node)
  4921. {
  4922. isl_multi_aff *ma;
  4923. isl_size n_row;
  4924. n_row = isl_mat_rows(node->sched);
  4925. if (n_row < 0)
  4926. return NULL;
  4927. ma = node_extract_partial_schedule_multi_aff(node, n_row - 1, 1);
  4928. return isl_map_from_multi_aff(ma);
  4929. }
  4930. /* Is the conditional validity dependence in the edge with index "edge_index"
  4931. * violated by the latest (i.e., final) row of the schedule?
  4932. * That is, is i scheduled after j
  4933. * for any conditional validity dependence i -> j?
  4934. */
  4935. static int is_violated(struct isl_sched_graph *graph, int edge_index)
  4936. {
  4937. isl_map *src_sched, *dst_sched, *map;
  4938. struct isl_sched_edge *edge = &graph->edge[edge_index];
  4939. int empty;
  4940. src_sched = final_row(edge->src);
  4941. dst_sched = final_row(edge->dst);
  4942. map = isl_map_copy(edge->map);
  4943. map = isl_map_apply_domain(map, src_sched);
  4944. map = isl_map_apply_range(map, dst_sched);
  4945. map = isl_map_order_gt(map, isl_dim_in, 0, isl_dim_out, 0);
  4946. empty = isl_map_is_empty(map);
  4947. isl_map_free(map);
  4948. if (empty < 0)
  4949. return -1;
  4950. return !empty;
  4951. }
  4952. /* Does "graph" have any satisfied condition edges that
  4953. * are adjacent to the conditional validity constraint with
  4954. * domain "conditional_source" and range "conditional_sink"?
  4955. *
  4956. * A satisfied condition is one that is not local.
  4957. * If a condition was forced to be local already (i.e., marked as local)
  4958. * then there is no need to check if it is in fact local.
  4959. *
  4960. * Additionally, mark all adjacent condition edges found as local.
  4961. */
  4962. static int has_adjacent_true_conditions(struct isl_sched_graph *graph,
  4963. __isl_keep isl_union_set *conditional_source,
  4964. __isl_keep isl_union_set *conditional_sink)
  4965. {
  4966. int i;
  4967. int any = 0;
  4968. for (i = 0; i < graph->n_edge; ++i) {
  4969. int adjacent, local;
  4970. isl_union_map *condition;
  4971. if (!is_condition(&graph->edge[i]))
  4972. continue;
  4973. if (is_local(&graph->edge[i]))
  4974. continue;
  4975. condition = graph->edge[i].tagged_condition;
  4976. adjacent = domain_intersects(condition, conditional_sink);
  4977. if (adjacent >= 0 && !adjacent)
  4978. adjacent = range_intersects(condition,
  4979. conditional_source);
  4980. if (adjacent < 0)
  4981. return -1;
  4982. if (!adjacent)
  4983. continue;
  4984. set_local(&graph->edge[i]);
  4985. local = is_condition_false(&graph->edge[i]);
  4986. if (local < 0)
  4987. return -1;
  4988. if (!local)
  4989. any = 1;
  4990. }
  4991. return any;
  4992. }
  4993. /* Are there any violated conditional validity dependences with
  4994. * adjacent condition dependences that are not local with respect
  4995. * to the current schedule?
  4996. * That is, is the conditional validity constraint violated?
  4997. *
  4998. * Additionally, mark all those adjacent condition dependences as local.
  4999. * We also mark those adjacent condition dependences that were not marked
  5000. * as local before, but just happened to be local already. This ensures
  5001. * that they remain local if the schedule is recomputed.
  5002. *
  5003. * We first collect domain and range of all violated conditional validity
  5004. * dependences and then check if there are any adjacent non-local
  5005. * condition dependences.
  5006. */
  5007. static int has_violated_conditional_constraint(isl_ctx *ctx,
  5008. struct isl_sched_graph *graph)
  5009. {
  5010. int i;
  5011. int any = 0;
  5012. isl_union_set *source, *sink;
  5013. source = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
  5014. sink = isl_union_set_empty(isl_space_params_alloc(ctx, 0));
  5015. for (i = 0; i < graph->n_edge; ++i) {
  5016. isl_union_set *uset;
  5017. isl_union_map *umap;
  5018. int violated;
  5019. if (!is_conditional_validity(&graph->edge[i]))
  5020. continue;
  5021. violated = is_violated(graph, i);
  5022. if (violated < 0)
  5023. goto error;
  5024. if (!violated)
  5025. continue;
  5026. any = 1;
  5027. umap = isl_union_map_copy(graph->edge[i].tagged_validity);
  5028. uset = isl_union_map_domain(umap);
  5029. source = isl_union_set_union(source, uset);
  5030. source = isl_union_set_coalesce(source);
  5031. umap = isl_union_map_copy(graph->edge[i].tagged_validity);
  5032. uset = isl_union_map_range(umap);
  5033. sink = isl_union_set_union(sink, uset);
  5034. sink = isl_union_set_coalesce(sink);
  5035. }
  5036. if (any)
  5037. any = has_adjacent_true_conditions(graph, source, sink);
  5038. isl_union_set_free(source);
  5039. isl_union_set_free(sink);
  5040. return any;
  5041. error:
  5042. isl_union_set_free(source);
  5043. isl_union_set_free(sink);
  5044. return -1;
  5045. }
  5046. /* Examine the current band (the rows between graph->band_start and
  5047. * graph->n_total_row), deciding whether to drop it or add it to "node"
  5048. * and then continue with the computation of the next band, if any.
  5049. * If "initialized" is set, then it may be assumed that compute_maxvar
  5050. * has been called on the current band. Otherwise, call
  5051. * compute_maxvar if and before carry_dependences gets called.
  5052. *
  5053. * The caller keeps looking for a new row as long as
  5054. * graph->n_row < graph->maxvar. If the latest attempt to find
  5055. * such a row failed (i.e., we still have graph->n_row < graph->maxvar),
  5056. * then we either
  5057. * - split between SCCs and start over (assuming we found an interesting
  5058. * pair of SCCs between which to split)
  5059. * - continue with the next band (assuming the current band has at least
  5060. * one row)
  5061. * - if there is more than one SCC left, then split along all SCCs
  5062. * - if outer coincidence needs to be enforced, then try to carry as many
  5063. * validity or coincidence dependences as possible and
  5064. * continue with the next band
  5065. * - try to carry as many validity dependences as possible and
  5066. * continue with the next band
  5067. * In each case, we first insert a band node in the schedule tree
  5068. * if any rows have been computed.
  5069. *
  5070. * If the caller managed to complete the schedule and the current band
  5071. * is empty, then finish off by topologically
  5072. * sorting the statements based on the remaining dependences.
  5073. * If, on the other hand, the current band has at least one row,
  5074. * then continue with the next band. Note that this next band
  5075. * will necessarily be empty, but the graph may still be split up
  5076. * into weakly connected components before arriving back here.
  5077. */
  5078. static __isl_give isl_schedule_node *compute_schedule_finish_band(
  5079. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
  5080. int initialized)
  5081. {
  5082. int empty;
  5083. if (!node)
  5084. return NULL;
  5085. empty = graph->n_total_row == graph->band_start;
  5086. if (graph->n_row < graph->maxvar) {
  5087. isl_ctx *ctx;
  5088. ctx = isl_schedule_node_get_ctx(node);
  5089. if (!ctx->opt->schedule_maximize_band_depth && !empty)
  5090. return compute_next_band(node, graph, 1);
  5091. if (graph->src_scc >= 0)
  5092. return compute_split_schedule(node, graph);
  5093. if (!empty)
  5094. return compute_next_band(node, graph, 1);
  5095. if (graph->scc > 1)
  5096. return compute_component_schedule(node, graph, 1);
  5097. if (!initialized && compute_maxvar(graph) < 0)
  5098. return isl_schedule_node_free(node);
  5099. if (isl_options_get_schedule_outer_coincidence(ctx))
  5100. return carry_coincidence(node, graph);
  5101. return carry_dependences(node, graph);
  5102. }
  5103. if (!empty)
  5104. return compute_next_band(node, graph, 1);
  5105. return sort_statements(node, graph, initialized);
  5106. }
  5107. /* Construct a band of schedule rows for a connected dependence graph.
  5108. * The caller is responsible for determining the strongly connected
  5109. * components and calling compute_maxvar first.
  5110. *
  5111. * We try to find a sequence of as many schedule rows as possible that result
  5112. * in non-negative dependence distances (independent of the previous rows
  5113. * in the sequence, i.e., such that the sequence is tilable), with as
  5114. * many of the initial rows as possible satisfying the coincidence constraints.
  5115. * The computation stops if we can't find any more rows or if we have found
  5116. * all the rows we wanted to find.
  5117. *
  5118. * If ctx->opt->schedule_outer_coincidence is set, then we force the
  5119. * outermost dimension to satisfy the coincidence constraints. If this
  5120. * turns out to be impossible, we fall back on the general scheme above
  5121. * and try to carry as many dependences as possible.
  5122. *
  5123. * If "graph" contains both condition and conditional validity dependences,
  5124. * then we need to check that that the conditional schedule constraint
  5125. * is satisfied, i.e., there are no violated conditional validity dependences
  5126. * that are adjacent to any non-local condition dependences.
  5127. * If there are, then we mark all those adjacent condition dependences
  5128. * as local and recompute the current band. Those dependences that
  5129. * are marked local will then be forced to be local.
  5130. * The initial computation is performed with no dependences marked as local.
  5131. * If we are lucky, then there will be no violated conditional validity
  5132. * dependences adjacent to any non-local condition dependences.
  5133. * Otherwise, we mark some additional condition dependences as local and
  5134. * recompute. We continue this process until there are no violations left or
  5135. * until we are no longer able to compute a schedule.
  5136. * Since there are only a finite number of dependences,
  5137. * there will only be a finite number of iterations.
  5138. */
  5139. static isl_stat compute_schedule_wcc_band(isl_ctx *ctx,
  5140. struct isl_sched_graph *graph)
  5141. {
  5142. int has_coincidence;
  5143. int use_coincidence;
  5144. int force_coincidence = 0;
  5145. int check_conditional;
  5146. if (sort_sccs(graph) < 0)
  5147. return isl_stat_error;
  5148. clear_local_edges(graph);
  5149. check_conditional = need_condition_check(graph);
  5150. has_coincidence = has_any_coincidence(graph);
  5151. if (ctx->opt->schedule_outer_coincidence)
  5152. force_coincidence = 1;
  5153. use_coincidence = has_coincidence;
  5154. while (graph->n_row < graph->maxvar) {
  5155. isl_vec *sol;
  5156. int violated;
  5157. int coincident;
  5158. graph->src_scc = -1;
  5159. graph->dst_scc = -1;
  5160. if (setup_lp(ctx, graph, use_coincidence) < 0)
  5161. return isl_stat_error;
  5162. sol = solve_lp(ctx, graph);
  5163. if (!sol)
  5164. return isl_stat_error;
  5165. if (sol->size == 0) {
  5166. int empty = graph->n_total_row == graph->band_start;
  5167. isl_vec_free(sol);
  5168. if (use_coincidence && (!force_coincidence || !empty)) {
  5169. use_coincidence = 0;
  5170. continue;
  5171. }
  5172. return isl_stat_ok;
  5173. }
  5174. coincident = !has_coincidence || use_coincidence;
  5175. if (update_schedule(graph, sol, coincident) < 0)
  5176. return isl_stat_error;
  5177. if (!check_conditional)
  5178. continue;
  5179. violated = has_violated_conditional_constraint(ctx, graph);
  5180. if (violated < 0)
  5181. return isl_stat_error;
  5182. if (!violated)
  5183. continue;
  5184. if (reset_band(graph) < 0)
  5185. return isl_stat_error;
  5186. use_coincidence = has_coincidence;
  5187. }
  5188. return isl_stat_ok;
  5189. }
  5190. /* Compute a schedule for a connected dependence graph by considering
  5191. * the graph as a whole and return the updated schedule node.
  5192. *
  5193. * The actual schedule rows of the current band are computed by
  5194. * compute_schedule_wcc_band. compute_schedule_finish_band takes
  5195. * care of integrating the band into "node" and continuing
  5196. * the computation.
  5197. */
  5198. static __isl_give isl_schedule_node *compute_schedule_wcc_whole(
  5199. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  5200. {
  5201. isl_ctx *ctx;
  5202. if (!node)
  5203. return NULL;
  5204. ctx = isl_schedule_node_get_ctx(node);
  5205. if (compute_schedule_wcc_band(ctx, graph) < 0)
  5206. return isl_schedule_node_free(node);
  5207. return compute_schedule_finish_band(node, graph, 1);
  5208. }
  5209. /* Clustering information used by compute_schedule_wcc_clustering.
  5210. *
  5211. * "n" is the number of SCCs in the original dependence graph
  5212. * "scc" is an array of "n" elements, each representing an SCC
  5213. * of the original dependence graph. All entries in the same cluster
  5214. * have the same number of schedule rows.
  5215. * "scc_cluster" maps each SCC index to the cluster to which it belongs,
  5216. * where each cluster is represented by the index of the first SCC
  5217. * in the cluster. Initially, each SCC belongs to a cluster containing
  5218. * only that SCC.
  5219. *
  5220. * "scc_in_merge" is used by merge_clusters_along_edge to keep
  5221. * track of which SCCs need to be merged.
  5222. *
  5223. * "cluster" contains the merged clusters of SCCs after the clustering
  5224. * has completed.
  5225. *
  5226. * "scc_node" is a temporary data structure used inside copy_partial.
  5227. * For each SCC, it keeps track of the number of nodes in the SCC
  5228. * that have already been copied.
  5229. */
  5230. struct isl_clustering {
  5231. int n;
  5232. struct isl_sched_graph *scc;
  5233. struct isl_sched_graph *cluster;
  5234. int *scc_cluster;
  5235. int *scc_node;
  5236. int *scc_in_merge;
  5237. };
  5238. /* Initialize the clustering data structure "c" from "graph".
  5239. *
  5240. * In particular, allocate memory, extract the SCCs from "graph"
  5241. * into c->scc, initialize scc_cluster and construct
  5242. * a band of schedule rows for each SCC.
  5243. * Within each SCC, there is only one SCC by definition.
  5244. * Each SCC initially belongs to a cluster containing only that SCC.
  5245. */
  5246. static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
  5247. struct isl_sched_graph *graph)
  5248. {
  5249. int i;
  5250. c->n = graph->scc;
  5251. c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
  5252. c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
  5253. c->scc_cluster = isl_calloc_array(ctx, int, c->n);
  5254. c->scc_node = isl_calloc_array(ctx, int, c->n);
  5255. c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
  5256. if (!c->scc || !c->cluster ||
  5257. !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
  5258. return isl_stat_error;
  5259. for (i = 0; i < c->n; ++i) {
  5260. if (extract_sub_graph(ctx, graph, &node_scc_exactly,
  5261. &edge_scc_exactly, i, &c->scc[i]) < 0)
  5262. return isl_stat_error;
  5263. c->scc[i].scc = 1;
  5264. if (compute_maxvar(&c->scc[i]) < 0)
  5265. return isl_stat_error;
  5266. if (compute_schedule_wcc_band(ctx, &c->scc[i]) < 0)
  5267. return isl_stat_error;
  5268. c->scc_cluster[i] = i;
  5269. }
  5270. return isl_stat_ok;
  5271. }
  5272. /* Free all memory allocated for "c".
  5273. */
  5274. static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
  5275. {
  5276. int i;
  5277. if (c->scc)
  5278. for (i = 0; i < c->n; ++i)
  5279. graph_free(ctx, &c->scc[i]);
  5280. free(c->scc);
  5281. if (c->cluster)
  5282. for (i = 0; i < c->n; ++i)
  5283. graph_free(ctx, &c->cluster[i]);
  5284. free(c->cluster);
  5285. free(c->scc_cluster);
  5286. free(c->scc_node);
  5287. free(c->scc_in_merge);
  5288. }
  5289. /* Should we refrain from merging the cluster in "graph" with
  5290. * any other cluster?
  5291. * In particular, is its current schedule band empty and incomplete.
  5292. */
  5293. static int bad_cluster(struct isl_sched_graph *graph)
  5294. {
  5295. return graph->n_row < graph->maxvar &&
  5296. graph->n_total_row == graph->band_start;
  5297. }
  5298. /* Is "edge" a proximity edge with a non-empty dependence relation?
  5299. */
  5300. static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
  5301. {
  5302. if (!is_proximity(edge))
  5303. return isl_bool_false;
  5304. return isl_bool_not(isl_map_plain_is_empty(edge->map));
  5305. }
  5306. /* Return the index of an edge in "graph" that can be used to merge
  5307. * two clusters in "c".
  5308. * Return graph->n_edge if no such edge can be found.
  5309. * Return -1 on error.
  5310. *
  5311. * In particular, return a proximity edge between two clusters
  5312. * that is not marked "no_merge" and such that neither of the
  5313. * two clusters has an incomplete, empty band.
  5314. *
  5315. * If there are multiple such edges, then try and find the most
  5316. * appropriate edge to use for merging. In particular, pick the edge
  5317. * with the greatest weight. If there are multiple of those,
  5318. * then pick one with the shortest distance between
  5319. * the two cluster representatives.
  5320. */
  5321. static int find_proximity(struct isl_sched_graph *graph,
  5322. struct isl_clustering *c)
  5323. {
  5324. int i, best = graph->n_edge, best_dist, best_weight;
  5325. for (i = 0; i < graph->n_edge; ++i) {
  5326. struct isl_sched_edge *edge = &graph->edge[i];
  5327. int dist, weight;
  5328. isl_bool prox;
  5329. prox = is_non_empty_proximity(edge);
  5330. if (prox < 0)
  5331. return -1;
  5332. if (!prox)
  5333. continue;
  5334. if (edge->no_merge)
  5335. continue;
  5336. if (bad_cluster(&c->scc[edge->src->scc]) ||
  5337. bad_cluster(&c->scc[edge->dst->scc]))
  5338. continue;
  5339. dist = c->scc_cluster[edge->dst->scc] -
  5340. c->scc_cluster[edge->src->scc];
  5341. if (dist == 0)
  5342. continue;
  5343. weight = edge->weight;
  5344. if (best < graph->n_edge) {
  5345. if (best_weight > weight)
  5346. continue;
  5347. if (best_weight == weight && best_dist <= dist)
  5348. continue;
  5349. }
  5350. best = i;
  5351. best_dist = dist;
  5352. best_weight = weight;
  5353. }
  5354. return best;
  5355. }
  5356. /* Internal data structure used in mark_merge_sccs.
  5357. *
  5358. * "graph" is the dependence graph in which a strongly connected
  5359. * component is constructed.
  5360. * "scc_cluster" maps each SCC index to the cluster to which it belongs.
  5361. * "src" and "dst" are the indices of the nodes that are being merged.
  5362. */
  5363. struct isl_mark_merge_sccs_data {
  5364. struct isl_sched_graph *graph;
  5365. int *scc_cluster;
  5366. int src;
  5367. int dst;
  5368. };
  5369. /* Check whether the cluster containing node "i" depends on the cluster
  5370. * containing node "j". If "i" and "j" belong to the same cluster,
  5371. * then they are taken to depend on each other to ensure that
  5372. * the resulting strongly connected component consists of complete
  5373. * clusters. Furthermore, if "i" and "j" are the two nodes that
  5374. * are being merged, then they are taken to depend on each other as well.
  5375. * Otherwise, check if there is a (conditional) validity dependence
  5376. * from node[j] to node[i], forcing node[i] to follow node[j].
  5377. */
  5378. static isl_bool cluster_follows(int i, int j, void *user)
  5379. {
  5380. struct isl_mark_merge_sccs_data *data = user;
  5381. struct isl_sched_graph *graph = data->graph;
  5382. int *scc_cluster = data->scc_cluster;
  5383. if (data->src == i && data->dst == j)
  5384. return isl_bool_true;
  5385. if (data->src == j && data->dst == i)
  5386. return isl_bool_true;
  5387. if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
  5388. return isl_bool_true;
  5389. return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
  5390. }
  5391. /* Mark all SCCs that belong to either of the two clusters in "c"
  5392. * connected by the edge in "graph" with index "edge", or to any
  5393. * of the intermediate clusters.
  5394. * The marking is recorded in c->scc_in_merge.
  5395. *
  5396. * The given edge has been selected for merging two clusters,
  5397. * meaning that there is at least a proximity edge between the two nodes.
  5398. * However, there may also be (indirect) validity dependences
  5399. * between the two nodes. When merging the two clusters, all clusters
  5400. * containing one or more of the intermediate nodes along the
  5401. * indirect validity dependences need to be merged in as well.
  5402. *
  5403. * First collect all such nodes by computing the strongly connected
  5404. * component (SCC) containing the two nodes connected by the edge, where
  5405. * the two nodes are considered to depend on each other to make
  5406. * sure they end up in the same SCC. Similarly, each node is considered
  5407. * to depend on every other node in the same cluster to ensure
  5408. * that the SCC consists of complete clusters.
  5409. *
  5410. * Then the original SCCs that contain any of these nodes are marked
  5411. * in c->scc_in_merge.
  5412. */
  5413. static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
  5414. int edge, struct isl_clustering *c)
  5415. {
  5416. struct isl_mark_merge_sccs_data data;
  5417. struct isl_tarjan_graph *g;
  5418. int i;
  5419. for (i = 0; i < c->n; ++i)
  5420. c->scc_in_merge[i] = 0;
  5421. data.graph = graph;
  5422. data.scc_cluster = c->scc_cluster;
  5423. data.src = graph->edge[edge].src - graph->node;
  5424. data.dst = graph->edge[edge].dst - graph->node;
  5425. g = isl_tarjan_graph_component(ctx, graph->n, data.dst,
  5426. &cluster_follows, &data);
  5427. if (!g)
  5428. goto error;
  5429. i = g->op;
  5430. if (i < 3)
  5431. isl_die(ctx, isl_error_internal,
  5432. "expecting at least two nodes in component",
  5433. goto error);
  5434. if (g->order[--i] != -1)
  5435. isl_die(ctx, isl_error_internal,
  5436. "expecting end of component marker", goto error);
  5437. for (--i; i >= 0 && g->order[i] != -1; --i) {
  5438. int scc = graph->node[g->order[i]].scc;
  5439. c->scc_in_merge[scc] = 1;
  5440. }
  5441. isl_tarjan_graph_free(g);
  5442. return isl_stat_ok;
  5443. error:
  5444. isl_tarjan_graph_free(g);
  5445. return isl_stat_error;
  5446. }
  5447. /* Construct the identifier "cluster_i".
  5448. */
  5449. static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
  5450. {
  5451. char name[40];
  5452. snprintf(name, sizeof(name), "cluster_%d", i);
  5453. return isl_id_alloc(ctx, name, NULL);
  5454. }
  5455. /* Construct the space of the cluster with index "i" containing
  5456. * the strongly connected component "scc".
  5457. *
  5458. * In particular, construct a space called cluster_i with dimension equal
  5459. * to the number of schedule rows in the current band of "scc".
  5460. */
  5461. static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
  5462. {
  5463. int nvar;
  5464. isl_space *space;
  5465. isl_id *id;
  5466. nvar = scc->n_total_row - scc->band_start;
  5467. space = isl_space_copy(scc->node[0].space);
  5468. space = isl_space_params(space);
  5469. space = isl_space_set_from_params(space);
  5470. space = isl_space_add_dims(space, isl_dim_set, nvar);
  5471. id = cluster_id(isl_space_get_ctx(space), i);
  5472. space = isl_space_set_tuple_id(space, isl_dim_set, id);
  5473. return space;
  5474. }
  5475. /* Collect the domain of the graph for merging clusters.
  5476. *
  5477. * In particular, for each cluster with first SCC "i", construct
  5478. * a set in the space called cluster_i with dimension equal
  5479. * to the number of schedule rows in the current band of the cluster.
  5480. */
  5481. static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
  5482. struct isl_sched_graph *graph, struct isl_clustering *c)
  5483. {
  5484. int i;
  5485. isl_space *space;
  5486. isl_union_set *domain;
  5487. space = isl_space_params_alloc(ctx, 0);
  5488. domain = isl_union_set_empty(space);
  5489. for (i = 0; i < graph->scc; ++i) {
  5490. isl_space *space;
  5491. if (!c->scc_in_merge[i])
  5492. continue;
  5493. if (c->scc_cluster[i] != i)
  5494. continue;
  5495. space = cluster_space(&c->scc[i], i);
  5496. domain = isl_union_set_add_set(domain, isl_set_universe(space));
  5497. }
  5498. return domain;
  5499. }
  5500. /* Construct a map from the original instances to the corresponding
  5501. * cluster instance in the current bands of the clusters in "c".
  5502. */
  5503. static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
  5504. struct isl_sched_graph *graph, struct isl_clustering *c)
  5505. {
  5506. int i, j;
  5507. isl_space *space;
  5508. isl_union_map *cluster_map;
  5509. space = isl_space_params_alloc(ctx, 0);
  5510. cluster_map = isl_union_map_empty(space);
  5511. for (i = 0; i < graph->scc; ++i) {
  5512. int start, n;
  5513. isl_id *id;
  5514. if (!c->scc_in_merge[i])
  5515. continue;
  5516. id = cluster_id(ctx, c->scc_cluster[i]);
  5517. start = c->scc[i].band_start;
  5518. n = c->scc[i].n_total_row - start;
  5519. for (j = 0; j < c->scc[i].n; ++j) {
  5520. isl_multi_aff *ma;
  5521. isl_map *map;
  5522. struct isl_sched_node *node = &c->scc[i].node[j];
  5523. ma = node_extract_partial_schedule_multi_aff(node,
  5524. start, n);
  5525. ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out,
  5526. isl_id_copy(id));
  5527. map = isl_map_from_multi_aff(ma);
  5528. cluster_map = isl_union_map_add_map(cluster_map, map);
  5529. }
  5530. isl_id_free(id);
  5531. }
  5532. return cluster_map;
  5533. }
  5534. /* Add "umap" to the schedule constraints "sc" of all types of "edge"
  5535. * that are not isl_edge_condition or isl_edge_conditional_validity.
  5536. */
  5537. static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
  5538. struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
  5539. __isl_take isl_schedule_constraints *sc)
  5540. {
  5541. enum isl_edge_type t;
  5542. if (!sc)
  5543. return NULL;
  5544. for (t = isl_edge_first; t <= isl_edge_last; ++t) {
  5545. if (t == isl_edge_condition ||
  5546. t == isl_edge_conditional_validity)
  5547. continue;
  5548. if (!is_type(edge, t))
  5549. continue;
  5550. sc = isl_schedule_constraints_add(sc, t,
  5551. isl_union_map_copy(umap));
  5552. }
  5553. return sc;
  5554. }
  5555. /* Add schedule constraints of types isl_edge_condition and
  5556. * isl_edge_conditional_validity to "sc" by applying "umap" to
  5557. * the domains of the wrapped relations in domain and range
  5558. * of the corresponding tagged constraints of "edge".
  5559. */
  5560. static __isl_give isl_schedule_constraints *add_conditional_constraints(
  5561. struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
  5562. __isl_take isl_schedule_constraints *sc)
  5563. {
  5564. enum isl_edge_type t;
  5565. isl_union_map *tagged;
  5566. for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
  5567. if (!is_type(edge, t))
  5568. continue;
  5569. if (t == isl_edge_condition)
  5570. tagged = isl_union_map_copy(edge->tagged_condition);
  5571. else
  5572. tagged = isl_union_map_copy(edge->tagged_validity);
  5573. tagged = isl_union_map_zip(tagged);
  5574. tagged = isl_union_map_apply_domain(tagged,
  5575. isl_union_map_copy(umap));
  5576. tagged = isl_union_map_zip(tagged);
  5577. sc = isl_schedule_constraints_add(sc, t, tagged);
  5578. if (!sc)
  5579. return NULL;
  5580. }
  5581. return sc;
  5582. }
  5583. /* Given a mapping "cluster_map" from the original instances to
  5584. * the cluster instances, add schedule constraints on the clusters
  5585. * to "sc" corresponding to the original constraints represented by "edge".
  5586. *
  5587. * For non-tagged dependence constraints, the cluster constraints
  5588. * are obtained by applying "cluster_map" to the edge->map.
  5589. *
  5590. * For tagged dependence constraints, "cluster_map" needs to be applied
  5591. * to the domains of the wrapped relations in domain and range
  5592. * of the tagged dependence constraints. Pick out the mappings
  5593. * from these domains from "cluster_map" and construct their product.
  5594. * This mapping can then be applied to the pair of domains.
  5595. */
  5596. static __isl_give isl_schedule_constraints *collect_edge_constraints(
  5597. struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
  5598. __isl_take isl_schedule_constraints *sc)
  5599. {
  5600. isl_union_map *umap;
  5601. isl_space *space;
  5602. isl_union_set *uset;
  5603. isl_union_map *umap1, *umap2;
  5604. if (!sc)
  5605. return NULL;
  5606. umap = isl_union_map_from_map(isl_map_copy(edge->map));
  5607. umap = isl_union_map_apply_domain(umap,
  5608. isl_union_map_copy(cluster_map));
  5609. umap = isl_union_map_apply_range(umap,
  5610. isl_union_map_copy(cluster_map));
  5611. sc = add_non_conditional_constraints(edge, umap, sc);
  5612. isl_union_map_free(umap);
  5613. if (!sc || (!is_condition(edge) && !is_conditional_validity(edge)))
  5614. return sc;
  5615. space = isl_space_domain(isl_map_get_space(edge->map));
  5616. uset = isl_union_set_from_set(isl_set_universe(space));
  5617. umap1 = isl_union_map_copy(cluster_map);
  5618. umap1 = isl_union_map_intersect_domain(umap1, uset);
  5619. space = isl_space_range(isl_map_get_space(edge->map));
  5620. uset = isl_union_set_from_set(isl_set_universe(space));
  5621. umap2 = isl_union_map_copy(cluster_map);
  5622. umap2 = isl_union_map_intersect_domain(umap2, uset);
  5623. umap = isl_union_map_product(umap1, umap2);
  5624. sc = add_conditional_constraints(edge, umap, sc);
  5625. isl_union_map_free(umap);
  5626. return sc;
  5627. }
  5628. /* Given a mapping "cluster_map" from the original instances to
  5629. * the cluster instances, add schedule constraints on the clusters
  5630. * to "sc" corresponding to all edges in "graph" between nodes that
  5631. * belong to SCCs that are marked for merging in "scc_in_merge".
  5632. */
  5633. static __isl_give isl_schedule_constraints *collect_constraints(
  5634. struct isl_sched_graph *graph, int *scc_in_merge,
  5635. __isl_keep isl_union_map *cluster_map,
  5636. __isl_take isl_schedule_constraints *sc)
  5637. {
  5638. int i;
  5639. for (i = 0; i < graph->n_edge; ++i) {
  5640. struct isl_sched_edge *edge = &graph->edge[i];
  5641. if (!scc_in_merge[edge->src->scc])
  5642. continue;
  5643. if (!scc_in_merge[edge->dst->scc])
  5644. continue;
  5645. sc = collect_edge_constraints(edge, cluster_map, sc);
  5646. }
  5647. return sc;
  5648. }
  5649. /* Construct a dependence graph for scheduling clusters with respect
  5650. * to each other and store the result in "merge_graph".
  5651. * In particular, the nodes of the graph correspond to the schedule
  5652. * dimensions of the current bands of those clusters that have been
  5653. * marked for merging in "c".
  5654. *
  5655. * First construct an isl_schedule_constraints object for this domain
  5656. * by transforming the edges in "graph" to the domain.
  5657. * Then initialize a dependence graph for scheduling from these
  5658. * constraints.
  5659. */
  5660. static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
  5661. struct isl_clustering *c, struct isl_sched_graph *merge_graph)
  5662. {
  5663. isl_union_set *domain;
  5664. isl_union_map *cluster_map;
  5665. isl_schedule_constraints *sc;
  5666. isl_stat r;
  5667. domain = collect_domain(ctx, graph, c);
  5668. sc = isl_schedule_constraints_on_domain(domain);
  5669. if (!sc)
  5670. return isl_stat_error;
  5671. cluster_map = collect_cluster_map(ctx, graph, c);
  5672. sc = collect_constraints(graph, c->scc_in_merge, cluster_map, sc);
  5673. isl_union_map_free(cluster_map);
  5674. r = graph_init(merge_graph, sc);
  5675. isl_schedule_constraints_free(sc);
  5676. return r;
  5677. }
  5678. /* Compute the maximal number of remaining schedule rows that still need
  5679. * to be computed for the nodes that belong to clusters with the maximal
  5680. * dimension for the current band (i.e., the band that is to be merged).
  5681. * Only clusters that are about to be merged are considered.
  5682. * "maxvar" is the maximal dimension for the current band.
  5683. * "c" contains information about the clusters.
  5684. *
  5685. * Return the maximal number of remaining schedule rows or -1 on error.
  5686. */
  5687. static int compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
  5688. {
  5689. int i, j;
  5690. int max_slack;
  5691. max_slack = 0;
  5692. for (i = 0; i < c->n; ++i) {
  5693. int nvar;
  5694. struct isl_sched_graph *scc;
  5695. if (!c->scc_in_merge[i])
  5696. continue;
  5697. scc = &c->scc[i];
  5698. nvar = scc->n_total_row - scc->band_start;
  5699. if (nvar != maxvar)
  5700. continue;
  5701. for (j = 0; j < scc->n; ++j) {
  5702. struct isl_sched_node *node = &scc->node[j];
  5703. int slack;
  5704. if (node_update_vmap(node) < 0)
  5705. return -1;
  5706. slack = node->nvar - node->rank;
  5707. if (slack > max_slack)
  5708. max_slack = slack;
  5709. }
  5710. }
  5711. return max_slack;
  5712. }
  5713. /* If there are any clusters where the dimension of the current band
  5714. * (i.e., the band that is to be merged) is smaller than "maxvar" and
  5715. * if there are any nodes in such a cluster where the number
  5716. * of remaining schedule rows that still need to be computed
  5717. * is greater than "max_slack", then return the smallest current band
  5718. * dimension of all these clusters. Otherwise return the original value
  5719. * of "maxvar". Return -1 in case of any error.
  5720. * Only clusters that are about to be merged are considered.
  5721. * "c" contains information about the clusters.
  5722. */
  5723. static int limit_maxvar_to_slack(int maxvar, int max_slack,
  5724. struct isl_clustering *c)
  5725. {
  5726. int i, j;
  5727. for (i = 0; i < c->n; ++i) {
  5728. int nvar;
  5729. struct isl_sched_graph *scc;
  5730. if (!c->scc_in_merge[i])
  5731. continue;
  5732. scc = &c->scc[i];
  5733. nvar = scc->n_total_row - scc->band_start;
  5734. if (nvar >= maxvar)
  5735. continue;
  5736. for (j = 0; j < scc->n; ++j) {
  5737. struct isl_sched_node *node = &scc->node[j];
  5738. int slack;
  5739. if (node_update_vmap(node) < 0)
  5740. return -1;
  5741. slack = node->nvar - node->rank;
  5742. if (slack > max_slack) {
  5743. maxvar = nvar;
  5744. break;
  5745. }
  5746. }
  5747. }
  5748. return maxvar;
  5749. }
  5750. /* Adjust merge_graph->maxvar based on the number of remaining schedule rows
  5751. * that still need to be computed. In particular, if there is a node
  5752. * in a cluster where the dimension of the current band is smaller
  5753. * than merge_graph->maxvar, but the number of remaining schedule rows
  5754. * is greater than that of any node in a cluster with the maximal
  5755. * dimension for the current band (i.e., merge_graph->maxvar),
  5756. * then adjust merge_graph->maxvar to the (smallest) current band dimension
  5757. * of those clusters. Without this adjustment, the total number of
  5758. * schedule dimensions would be increased, resulting in a skewed view
  5759. * of the number of coincident dimensions.
  5760. * "c" contains information about the clusters.
  5761. *
  5762. * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
  5763. * then there is no point in attempting any merge since it will be rejected
  5764. * anyway. Set merge_graph->maxvar to zero in such cases.
  5765. */
  5766. static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
  5767. struct isl_sched_graph *merge_graph, struct isl_clustering *c)
  5768. {
  5769. int max_slack, maxvar;
  5770. max_slack = compute_maxvar_max_slack(merge_graph->maxvar, c);
  5771. if (max_slack < 0)
  5772. return isl_stat_error;
  5773. maxvar = limit_maxvar_to_slack(merge_graph->maxvar, max_slack, c);
  5774. if (maxvar < 0)
  5775. return isl_stat_error;
  5776. if (maxvar < merge_graph->maxvar) {
  5777. if (isl_options_get_schedule_maximize_band_depth(ctx))
  5778. merge_graph->maxvar = 0;
  5779. else
  5780. merge_graph->maxvar = maxvar;
  5781. }
  5782. return isl_stat_ok;
  5783. }
  5784. /* Return the number of coincident dimensions in the current band of "graph",
  5785. * where the nodes of "graph" are assumed to be scheduled by a single band.
  5786. */
  5787. static int get_n_coincident(struct isl_sched_graph *graph)
  5788. {
  5789. int i;
  5790. for (i = graph->band_start; i < graph->n_total_row; ++i)
  5791. if (!graph->node[0].coincident[i])
  5792. break;
  5793. return i - graph->band_start;
  5794. }
  5795. /* Should the clusters be merged based on the cluster schedule
  5796. * in the current (and only) band of "merge_graph", given that
  5797. * coincidence should be maximized?
  5798. *
  5799. * If the number of coincident schedule dimensions in the merged band
  5800. * would be less than the maximal number of coincident schedule dimensions
  5801. * in any of the merged clusters, then the clusters should not be merged.
  5802. */
  5803. static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
  5804. struct isl_sched_graph *merge_graph)
  5805. {
  5806. int i;
  5807. int n_coincident;
  5808. int max_coincident;
  5809. max_coincident = 0;
  5810. for (i = 0; i < c->n; ++i) {
  5811. if (!c->scc_in_merge[i])
  5812. continue;
  5813. n_coincident = get_n_coincident(&c->scc[i]);
  5814. if (n_coincident > max_coincident)
  5815. max_coincident = n_coincident;
  5816. }
  5817. n_coincident = get_n_coincident(merge_graph);
  5818. return isl_bool_ok(n_coincident >= max_coincident);
  5819. }
  5820. /* Return the transformation on "node" expressed by the current (and only)
  5821. * band of "merge_graph" applied to the clusters in "c".
  5822. *
  5823. * First find the representation of "node" in its SCC in "c" and
  5824. * extract the transformation expressed by the current band.
  5825. * Then extract the transformation applied by "merge_graph"
  5826. * to the cluster to which this SCC belongs.
  5827. * Combine the two to obtain the complete transformation on the node.
  5828. *
  5829. * Note that the range of the first transformation is an anonymous space,
  5830. * while the domain of the second is named "cluster_X". The range
  5831. * of the former therefore needs to be adjusted before the two
  5832. * can be combined.
  5833. */
  5834. static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
  5835. struct isl_sched_node *node, struct isl_clustering *c,
  5836. struct isl_sched_graph *merge_graph)
  5837. {
  5838. struct isl_sched_node *scc_node, *cluster_node;
  5839. int start, n;
  5840. isl_id *id;
  5841. isl_space *space;
  5842. isl_multi_aff *ma, *ma2;
  5843. scc_node = graph_find_node(ctx, &c->scc[node->scc], node->space);
  5844. if (scc_node && !is_node(&c->scc[node->scc], scc_node))
  5845. isl_die(ctx, isl_error_internal, "unable to find node",
  5846. return NULL);
  5847. start = c->scc[node->scc].band_start;
  5848. n = c->scc[node->scc].n_total_row - start;
  5849. ma = node_extract_partial_schedule_multi_aff(scc_node, start, n);
  5850. space = cluster_space(&c->scc[node->scc], c->scc_cluster[node->scc]);
  5851. cluster_node = graph_find_node(ctx, merge_graph, space);
  5852. if (cluster_node && !is_node(merge_graph, cluster_node))
  5853. isl_die(ctx, isl_error_internal, "unable to find cluster",
  5854. space = isl_space_free(space));
  5855. id = isl_space_get_tuple_id(space, isl_dim_set);
  5856. ma = isl_multi_aff_set_tuple_id(ma, isl_dim_out, id);
  5857. isl_space_free(space);
  5858. n = merge_graph->n_total_row;
  5859. ma2 = node_extract_partial_schedule_multi_aff(cluster_node, 0, n);
  5860. ma = isl_multi_aff_pullback_multi_aff(ma2, ma);
  5861. return isl_map_from_multi_aff(ma);
  5862. }
  5863. /* Give a set of distances "set", are they bounded by a small constant
  5864. * in direction "pos"?
  5865. * In practice, check if they are bounded by 2 by checking that there
  5866. * are no elements with a value greater than or equal to 3 or
  5867. * smaller than or equal to -3.
  5868. */
  5869. static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
  5870. {
  5871. isl_bool bounded;
  5872. isl_set *test;
  5873. if (!set)
  5874. return isl_bool_error;
  5875. test = isl_set_copy(set);
  5876. test = isl_set_lower_bound_si(test, isl_dim_set, pos, 3);
  5877. bounded = isl_set_is_empty(test);
  5878. isl_set_free(test);
  5879. if (bounded < 0 || !bounded)
  5880. return bounded;
  5881. test = isl_set_copy(set);
  5882. test = isl_set_upper_bound_si(test, isl_dim_set, pos, -3);
  5883. bounded = isl_set_is_empty(test);
  5884. isl_set_free(test);
  5885. return bounded;
  5886. }
  5887. /* Does the set "set" have a fixed (but possible parametric) value
  5888. * at dimension "pos"?
  5889. */
  5890. static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
  5891. {
  5892. isl_size n;
  5893. isl_bool single;
  5894. n = isl_set_dim(set, isl_dim_set);
  5895. if (n < 0)
  5896. return isl_bool_error;
  5897. set = isl_set_copy(set);
  5898. set = isl_set_project_out(set, isl_dim_set, pos + 1, n - (pos + 1));
  5899. set = isl_set_project_out(set, isl_dim_set, 0, pos);
  5900. single = isl_set_is_singleton(set);
  5901. isl_set_free(set);
  5902. return single;
  5903. }
  5904. /* Does "map" have a fixed (but possible parametric) value
  5905. * at dimension "pos" of either its domain or its range?
  5906. */
  5907. static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
  5908. {
  5909. isl_set *set;
  5910. isl_bool single;
  5911. set = isl_map_domain(isl_map_copy(map));
  5912. single = has_single_value(set, pos);
  5913. isl_set_free(set);
  5914. if (single < 0 || single)
  5915. return single;
  5916. set = isl_map_range(isl_map_copy(map));
  5917. single = has_single_value(set, pos);
  5918. isl_set_free(set);
  5919. return single;
  5920. }
  5921. /* Does the edge "edge" from "graph" have bounded dependence distances
  5922. * in the merged graph "merge_graph" of a selection of clusters in "c"?
  5923. *
  5924. * Extract the complete transformations of the source and destination
  5925. * nodes of the edge, apply them to the edge constraints and
  5926. * compute the differences. Finally, check if these differences are bounded
  5927. * in each direction.
  5928. *
  5929. * If the dimension of the band is greater than the number of
  5930. * dimensions that can be expected to be optimized by the edge
  5931. * (based on its weight), then also allow the differences to be unbounded
  5932. * in the remaining dimensions, but only if either the source or
  5933. * the destination has a fixed value in that direction.
  5934. * This allows a statement that produces values that are used by
  5935. * several instances of another statement to be merged with that
  5936. * other statement.
  5937. * However, merging such clusters will introduce an inherently
  5938. * large proximity distance inside the merged cluster, meaning
  5939. * that proximity distances will no longer be optimized in
  5940. * subsequent merges. These merges are therefore only allowed
  5941. * after all other possible merges have been tried.
  5942. * The first time such a merge is encountered, the weight of the edge
  5943. * is replaced by a negative weight. The second time (i.e., after
  5944. * all merges over edges with a non-negative weight have been tried),
  5945. * the merge is allowed.
  5946. */
  5947. static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
  5948. struct isl_sched_graph *graph, struct isl_clustering *c,
  5949. struct isl_sched_graph *merge_graph)
  5950. {
  5951. int i, n_slack;
  5952. isl_size n;
  5953. isl_bool bounded;
  5954. isl_map *map, *t;
  5955. isl_set *dist;
  5956. map = isl_map_copy(edge->map);
  5957. t = extract_node_transformation(ctx, edge->src, c, merge_graph);
  5958. map = isl_map_apply_domain(map, t);
  5959. t = extract_node_transformation(ctx, edge->dst, c, merge_graph);
  5960. map = isl_map_apply_range(map, t);
  5961. dist = isl_map_deltas(isl_map_copy(map));
  5962. bounded = isl_bool_true;
  5963. n = isl_set_dim(dist, isl_dim_set);
  5964. if (n < 0)
  5965. goto error;
  5966. n_slack = n - edge->weight;
  5967. if (edge->weight < 0)
  5968. n_slack -= graph->max_weight + 1;
  5969. for (i = 0; i < n; ++i) {
  5970. isl_bool bounded_i, singular_i;
  5971. bounded_i = distance_is_bounded(dist, i);
  5972. if (bounded_i < 0)
  5973. goto error;
  5974. if (bounded_i)
  5975. continue;
  5976. if (edge->weight >= 0)
  5977. bounded = isl_bool_false;
  5978. n_slack--;
  5979. if (n_slack < 0)
  5980. break;
  5981. singular_i = has_singular_src_or_dst(map, i);
  5982. if (singular_i < 0)
  5983. goto error;
  5984. if (singular_i)
  5985. continue;
  5986. bounded = isl_bool_false;
  5987. break;
  5988. }
  5989. if (!bounded && i >= n && edge->weight >= 0)
  5990. edge->weight -= graph->max_weight + 1;
  5991. isl_map_free(map);
  5992. isl_set_free(dist);
  5993. return bounded;
  5994. error:
  5995. isl_map_free(map);
  5996. isl_set_free(dist);
  5997. return isl_bool_error;
  5998. }
  5999. /* Should the clusters be merged based on the cluster schedule
  6000. * in the current (and only) band of "merge_graph"?
  6001. * "graph" is the original dependence graph, while "c" records
  6002. * which SCCs are involved in the latest merge.
  6003. *
  6004. * In particular, is there at least one proximity constraint
  6005. * that is optimized by the merge?
  6006. *
  6007. * A proximity constraint is considered to be optimized
  6008. * if the dependence distances are small.
  6009. */
  6010. static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
  6011. struct isl_sched_graph *graph, struct isl_clustering *c,
  6012. struct isl_sched_graph *merge_graph)
  6013. {
  6014. int i;
  6015. for (i = 0; i < graph->n_edge; ++i) {
  6016. struct isl_sched_edge *edge = &graph->edge[i];
  6017. isl_bool bounded;
  6018. if (!is_proximity(edge))
  6019. continue;
  6020. if (!c->scc_in_merge[edge->src->scc])
  6021. continue;
  6022. if (!c->scc_in_merge[edge->dst->scc])
  6023. continue;
  6024. if (c->scc_cluster[edge->dst->scc] ==
  6025. c->scc_cluster[edge->src->scc])
  6026. continue;
  6027. bounded = has_bounded_distances(ctx, edge, graph, c,
  6028. merge_graph);
  6029. if (bounded < 0 || bounded)
  6030. return bounded;
  6031. }
  6032. return isl_bool_false;
  6033. }
  6034. /* Should the clusters be merged based on the cluster schedule
  6035. * in the current (and only) band of "merge_graph"?
  6036. * "graph" is the original dependence graph, while "c" records
  6037. * which SCCs are involved in the latest merge.
  6038. *
  6039. * If the current band is empty, then the clusters should not be merged.
  6040. *
  6041. * If the band depth should be maximized and the merge schedule
  6042. * is incomplete (meaning that the dimension of some of the schedule
  6043. * bands in the original schedule will be reduced), then the clusters
  6044. * should not be merged.
  6045. *
  6046. * If the schedule_maximize_coincidence option is set, then check that
  6047. * the number of coincident schedule dimensions is not reduced.
  6048. *
  6049. * Finally, only allow the merge if at least one proximity
  6050. * constraint is optimized.
  6051. */
  6052. static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
  6053. struct isl_clustering *c, struct isl_sched_graph *merge_graph)
  6054. {
  6055. if (merge_graph->n_total_row == merge_graph->band_start)
  6056. return isl_bool_false;
  6057. if (isl_options_get_schedule_maximize_band_depth(ctx) &&
  6058. merge_graph->n_total_row < merge_graph->maxvar)
  6059. return isl_bool_false;
  6060. if (isl_options_get_schedule_maximize_coincidence(ctx)) {
  6061. isl_bool ok;
  6062. ok = ok_to_merge_coincident(c, merge_graph);
  6063. if (ok < 0 || !ok)
  6064. return ok;
  6065. }
  6066. return ok_to_merge_proximity(ctx, graph, c, merge_graph);
  6067. }
  6068. /* Apply the schedule in "t_node" to the "n" rows starting at "first"
  6069. * of the schedule in "node" and return the result.
  6070. *
  6071. * That is, essentially compute
  6072. *
  6073. * T * N(first:first+n-1)
  6074. *
  6075. * taking into account the constant term and the parameter coefficients
  6076. * in "t_node".
  6077. */
  6078. static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
  6079. struct isl_sched_node *t_node, struct isl_sched_node *node,
  6080. int first, int n)
  6081. {
  6082. int i, j;
  6083. isl_mat *t;
  6084. isl_size n_row, n_col;
  6085. int n_param, n_var;
  6086. n_param = node->nparam;
  6087. n_var = node->nvar;
  6088. n_row = isl_mat_rows(t_node->sched);
  6089. n_col = isl_mat_cols(node->sched);
  6090. if (n_row < 0 || n_col < 0)
  6091. return NULL;
  6092. t = isl_mat_alloc(ctx, n_row, n_col);
  6093. if (!t)
  6094. return NULL;
  6095. for (i = 0; i < n_row; ++i) {
  6096. isl_seq_cpy(t->row[i], t_node->sched->row[i], 1 + n_param);
  6097. isl_seq_clr(t->row[i] + 1 + n_param, n_var);
  6098. for (j = 0; j < n; ++j)
  6099. isl_seq_addmul(t->row[i],
  6100. t_node->sched->row[i][1 + n_param + j],
  6101. node->sched->row[first + j],
  6102. 1 + n_param + n_var);
  6103. }
  6104. return t;
  6105. }
  6106. /* Apply the cluster schedule in "t_node" to the current band
  6107. * schedule of the nodes in "graph".
  6108. *
  6109. * In particular, replace the rows starting at band_start
  6110. * by the result of applying the cluster schedule in "t_node"
  6111. * to the original rows.
  6112. *
  6113. * The coincidence of the schedule is determined by the coincidence
  6114. * of the cluster schedule.
  6115. */
  6116. static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
  6117. struct isl_sched_node *t_node)
  6118. {
  6119. int i, j;
  6120. isl_size n_new;
  6121. int start, n;
  6122. start = graph->band_start;
  6123. n = graph->n_total_row - start;
  6124. n_new = isl_mat_rows(t_node->sched);
  6125. if (n_new < 0)
  6126. return isl_stat_error;
  6127. for (i = 0; i < graph->n; ++i) {
  6128. struct isl_sched_node *node = &graph->node[i];
  6129. isl_mat *t;
  6130. t = node_transformation(ctx, t_node, node, start, n);
  6131. node->sched = isl_mat_drop_rows(node->sched, start, n);
  6132. node->sched = isl_mat_concat(node->sched, t);
  6133. node->sched_map = isl_map_free(node->sched_map);
  6134. if (!node->sched)
  6135. return isl_stat_error;
  6136. for (j = 0; j < n_new; ++j)
  6137. node->coincident[start + j] = t_node->coincident[j];
  6138. }
  6139. graph->n_total_row -= n;
  6140. graph->n_row -= n;
  6141. graph->n_total_row += n_new;
  6142. graph->n_row += n_new;
  6143. return isl_stat_ok;
  6144. }
  6145. /* Merge the clusters marked for merging in "c" into a single
  6146. * cluster using the cluster schedule in the current band of "merge_graph".
  6147. * The representative SCC for the new cluster is the SCC with
  6148. * the smallest index.
  6149. *
  6150. * The current band schedule of each SCC in the new cluster is obtained
  6151. * by applying the schedule of the corresponding original cluster
  6152. * to the original band schedule.
  6153. * All SCCs in the new cluster have the same number of schedule rows.
  6154. */
  6155. static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
  6156. struct isl_sched_graph *merge_graph)
  6157. {
  6158. int i;
  6159. int cluster = -1;
  6160. isl_space *space;
  6161. for (i = 0; i < c->n; ++i) {
  6162. struct isl_sched_node *node;
  6163. if (!c->scc_in_merge[i])
  6164. continue;
  6165. if (cluster < 0)
  6166. cluster = i;
  6167. space = cluster_space(&c->scc[i], c->scc_cluster[i]);
  6168. node = graph_find_node(ctx, merge_graph, space);
  6169. isl_space_free(space);
  6170. if (!node)
  6171. return isl_stat_error;
  6172. if (!is_node(merge_graph, node))
  6173. isl_die(ctx, isl_error_internal,
  6174. "unable to find cluster",
  6175. return isl_stat_error);
  6176. if (transform(ctx, &c->scc[i], node) < 0)
  6177. return isl_stat_error;
  6178. c->scc_cluster[i] = cluster;
  6179. }
  6180. return isl_stat_ok;
  6181. }
  6182. /* Try and merge the clusters of SCCs marked in c->scc_in_merge
  6183. * by scheduling the current cluster bands with respect to each other.
  6184. *
  6185. * Construct a dependence graph with a space for each cluster and
  6186. * with the coordinates of each space corresponding to the schedule
  6187. * dimensions of the current band of that cluster.
  6188. * Construct a cluster schedule in this cluster dependence graph and
  6189. * apply it to the current cluster bands if it is applicable
  6190. * according to ok_to_merge.
  6191. *
  6192. * If the number of remaining schedule dimensions in a cluster
  6193. * with a non-maximal current schedule dimension is greater than
  6194. * the number of remaining schedule dimensions in clusters
  6195. * with a maximal current schedule dimension, then restrict
  6196. * the number of rows to be computed in the cluster schedule
  6197. * to the minimal such non-maximal current schedule dimension.
  6198. * Do this by adjusting merge_graph.maxvar.
  6199. *
  6200. * Return isl_bool_true if the clusters have effectively been merged
  6201. * into a single cluster.
  6202. *
  6203. * Note that since the standard scheduling algorithm minimizes the maximal
  6204. * distance over proximity constraints, the proximity constraints between
  6205. * the merged clusters may not be optimized any further than what is
  6206. * sufficient to bring the distances within the limits of the internal
  6207. * proximity constraints inside the individual clusters.
  6208. * It may therefore make sense to perform an additional translation step
  6209. * to bring the clusters closer to each other, while maintaining
  6210. * the linear part of the merging schedule found using the standard
  6211. * scheduling algorithm.
  6212. */
  6213. static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
  6214. struct isl_clustering *c)
  6215. {
  6216. struct isl_sched_graph merge_graph = { 0 };
  6217. isl_bool merged;
  6218. if (init_merge_graph(ctx, graph, c, &merge_graph) < 0)
  6219. goto error;
  6220. if (compute_maxvar(&merge_graph) < 0)
  6221. goto error;
  6222. if (adjust_maxvar_to_slack(ctx, &merge_graph,c) < 0)
  6223. goto error;
  6224. if (compute_schedule_wcc_band(ctx, &merge_graph) < 0)
  6225. goto error;
  6226. merged = ok_to_merge(ctx, graph, c, &merge_graph);
  6227. if (merged && merge(ctx, c, &merge_graph) < 0)
  6228. goto error;
  6229. graph_free(ctx, &merge_graph);
  6230. return merged;
  6231. error:
  6232. graph_free(ctx, &merge_graph);
  6233. return isl_bool_error;
  6234. }
  6235. /* Is there any edge marked "no_merge" between two SCCs that are
  6236. * about to be merged (i.e., that are set in "scc_in_merge")?
  6237. * "merge_edge" is the proximity edge along which the clusters of SCCs
  6238. * are going to be merged.
  6239. *
  6240. * If there is any edge between two SCCs with a negative weight,
  6241. * while the weight of "merge_edge" is non-negative, then this
  6242. * means that the edge was postponed. "merge_edge" should then
  6243. * also be postponed since merging along the edge with negative weight should
  6244. * be postponed until all edges with non-negative weight have been tried.
  6245. * Replace the weight of "merge_edge" by a negative weight as well and
  6246. * tell the caller not to attempt a merge.
  6247. */
  6248. static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
  6249. struct isl_sched_edge *merge_edge)
  6250. {
  6251. int i;
  6252. for (i = 0; i < graph->n_edge; ++i) {
  6253. struct isl_sched_edge *edge = &graph->edge[i];
  6254. if (!scc_in_merge[edge->src->scc])
  6255. continue;
  6256. if (!scc_in_merge[edge->dst->scc])
  6257. continue;
  6258. if (edge->no_merge)
  6259. return 1;
  6260. if (merge_edge->weight >= 0 && edge->weight < 0) {
  6261. merge_edge->weight -= graph->max_weight + 1;
  6262. return 1;
  6263. }
  6264. }
  6265. return 0;
  6266. }
  6267. /* Merge the two clusters in "c" connected by the edge in "graph"
  6268. * with index "edge" into a single cluster.
  6269. * If it turns out to be impossible to merge these two clusters,
  6270. * then mark the edge as "no_merge" such that it will not be
  6271. * considered again.
  6272. *
  6273. * First mark all SCCs that need to be merged. This includes the SCCs
  6274. * in the two clusters, but it may also include the SCCs
  6275. * of intermediate clusters.
  6276. * If there is already a no_merge edge between any pair of such SCCs,
  6277. * then simply mark the current edge as no_merge as well.
  6278. * Likewise, if any of those edges was postponed by has_bounded_distances,
  6279. * then postpone the current edge as well.
  6280. * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
  6281. * if the clusters did not end up getting merged, unless the non-merge
  6282. * is due to the fact that the edge was postponed. This postponement
  6283. * can be recognized by a change in weight (from non-negative to negative).
  6284. */
  6285. static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
  6286. struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
  6287. {
  6288. isl_bool merged;
  6289. int edge_weight = graph->edge[edge].weight;
  6290. if (mark_merge_sccs(ctx, graph, edge, c) < 0)
  6291. return isl_stat_error;
  6292. if (any_no_merge(graph, c->scc_in_merge, &graph->edge[edge]))
  6293. merged = isl_bool_false;
  6294. else
  6295. merged = try_merge(ctx, graph, c);
  6296. if (merged < 0)
  6297. return isl_stat_error;
  6298. if (!merged && edge_weight == graph->edge[edge].weight)
  6299. graph->edge[edge].no_merge = 1;
  6300. return isl_stat_ok;
  6301. }
  6302. /* Does "node" belong to the cluster identified by "cluster"?
  6303. */
  6304. static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
  6305. {
  6306. return node->cluster == cluster;
  6307. }
  6308. /* Does "edge" connect two nodes belonging to the cluster
  6309. * identified by "cluster"?
  6310. */
  6311. static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
  6312. {
  6313. return edge->src->cluster == cluster && edge->dst->cluster == cluster;
  6314. }
  6315. /* Swap the schedule of "node1" and "node2".
  6316. * Both nodes have been derived from the same node in a common parent graph.
  6317. * Since the "coincident" field is shared with that node
  6318. * in the parent graph, there is no need to also swap this field.
  6319. */
  6320. static void swap_sched(struct isl_sched_node *node1,
  6321. struct isl_sched_node *node2)
  6322. {
  6323. isl_mat *sched;
  6324. isl_map *sched_map;
  6325. sched = node1->sched;
  6326. node1->sched = node2->sched;
  6327. node2->sched = sched;
  6328. sched_map = node1->sched_map;
  6329. node1->sched_map = node2->sched_map;
  6330. node2->sched_map = sched_map;
  6331. }
  6332. /* Copy the current band schedule from the SCCs that form the cluster
  6333. * with index "pos" to the actual cluster at position "pos".
  6334. * By construction, the index of the first SCC that belongs to the cluster
  6335. * is also "pos".
  6336. *
  6337. * The order of the nodes inside both the SCCs and the cluster
  6338. * is assumed to be same as the order in the original "graph".
  6339. *
  6340. * Since the SCC graphs will no longer be used after this function,
  6341. * the schedules are actually swapped rather than copied.
  6342. */
  6343. static isl_stat copy_partial(struct isl_sched_graph *graph,
  6344. struct isl_clustering *c, int pos)
  6345. {
  6346. int i, j;
  6347. c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
  6348. c->cluster[pos].n_row = c->scc[pos].n_row;
  6349. c->cluster[pos].maxvar = c->scc[pos].maxvar;
  6350. j = 0;
  6351. for (i = 0; i < graph->n; ++i) {
  6352. int k;
  6353. int s;
  6354. if (graph->node[i].cluster != pos)
  6355. continue;
  6356. s = graph->node[i].scc;
  6357. k = c->scc_node[s]++;
  6358. swap_sched(&c->cluster[pos].node[j], &c->scc[s].node[k]);
  6359. if (c->scc[s].maxvar > c->cluster[pos].maxvar)
  6360. c->cluster[pos].maxvar = c->scc[s].maxvar;
  6361. ++j;
  6362. }
  6363. return isl_stat_ok;
  6364. }
  6365. /* Is there a (conditional) validity dependence from node[j] to node[i],
  6366. * forcing node[i] to follow node[j] or do the nodes belong to the same
  6367. * cluster?
  6368. */
  6369. static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
  6370. {
  6371. struct isl_sched_graph *graph = user;
  6372. if (graph->node[i].cluster == graph->node[j].cluster)
  6373. return isl_bool_true;
  6374. return graph_has_validity_edge(graph, &graph->node[j], &graph->node[i]);
  6375. }
  6376. /* Extract the merged clusters of SCCs in "graph", sort them, and
  6377. * store them in c->clusters. Update c->scc_cluster accordingly.
  6378. *
  6379. * First keep track of the cluster containing the SCC to which a node
  6380. * belongs in the node itself.
  6381. * Then extract the clusters into c->clusters, copying the current
  6382. * band schedule from the SCCs that belong to the cluster.
  6383. * Do this only once per cluster.
  6384. *
  6385. * Finally, topologically sort the clusters and update c->scc_cluster
  6386. * to match the new scc numbering. While the SCCs were originally
  6387. * sorted already, some SCCs that depend on some other SCCs may
  6388. * have been merged with SCCs that appear before these other SCCs.
  6389. * A reordering may therefore be required.
  6390. */
  6391. static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
  6392. struct isl_clustering *c)
  6393. {
  6394. int i;
  6395. for (i = 0; i < graph->n; ++i)
  6396. graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
  6397. for (i = 0; i < graph->scc; ++i) {
  6398. if (c->scc_cluster[i] != i)
  6399. continue;
  6400. if (extract_sub_graph(ctx, graph, &node_cluster_exactly,
  6401. &edge_cluster_exactly, i, &c->cluster[i]) < 0)
  6402. return isl_stat_error;
  6403. c->cluster[i].src_scc = -1;
  6404. c->cluster[i].dst_scc = -1;
  6405. if (copy_partial(graph, c, i) < 0)
  6406. return isl_stat_error;
  6407. }
  6408. if (detect_ccs(ctx, graph, &node_follows_strong_or_same_cluster) < 0)
  6409. return isl_stat_error;
  6410. for (i = 0; i < graph->n; ++i)
  6411. c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
  6412. return isl_stat_ok;
  6413. }
  6414. /* Compute weights on the proximity edges of "graph" that can
  6415. * be used by find_proximity to find the most appropriate
  6416. * proximity edge to use to merge two clusters in "c".
  6417. * The weights are also used by has_bounded_distances to determine
  6418. * whether the merge should be allowed.
  6419. * Store the maximum of the computed weights in graph->max_weight.
  6420. *
  6421. * The computed weight is a measure for the number of remaining schedule
  6422. * dimensions that can still be completely aligned.
  6423. * In particular, compute the number of equalities between
  6424. * input dimensions and output dimensions in the proximity constraints.
  6425. * The directions that are already handled by outer schedule bands
  6426. * are projected out prior to determining this number.
  6427. *
  6428. * Edges that will never be considered by find_proximity are ignored.
  6429. */
  6430. static isl_stat compute_weights(struct isl_sched_graph *graph,
  6431. struct isl_clustering *c)
  6432. {
  6433. int i;
  6434. graph->max_weight = 0;
  6435. for (i = 0; i < graph->n_edge; ++i) {
  6436. struct isl_sched_edge *edge = &graph->edge[i];
  6437. struct isl_sched_node *src = edge->src;
  6438. struct isl_sched_node *dst = edge->dst;
  6439. isl_basic_map *hull;
  6440. isl_bool prox;
  6441. isl_size n_in, n_out, n;
  6442. prox = is_non_empty_proximity(edge);
  6443. if (prox < 0)
  6444. return isl_stat_error;
  6445. if (!prox)
  6446. continue;
  6447. if (bad_cluster(&c->scc[edge->src->scc]) ||
  6448. bad_cluster(&c->scc[edge->dst->scc]))
  6449. continue;
  6450. if (c->scc_cluster[edge->dst->scc] ==
  6451. c->scc_cluster[edge->src->scc])
  6452. continue;
  6453. hull = isl_map_affine_hull(isl_map_copy(edge->map));
  6454. hull = isl_basic_map_transform_dims(hull, isl_dim_in, 0,
  6455. isl_mat_copy(src->vmap));
  6456. hull = isl_basic_map_transform_dims(hull, isl_dim_out, 0,
  6457. isl_mat_copy(dst->vmap));
  6458. hull = isl_basic_map_project_out(hull,
  6459. isl_dim_in, 0, src->rank);
  6460. hull = isl_basic_map_project_out(hull,
  6461. isl_dim_out, 0, dst->rank);
  6462. hull = isl_basic_map_remove_divs(hull);
  6463. n_in = isl_basic_map_dim(hull, isl_dim_in);
  6464. n_out = isl_basic_map_dim(hull, isl_dim_out);
  6465. if (n_in < 0 || n_out < 0)
  6466. hull = isl_basic_map_free(hull);
  6467. hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
  6468. isl_dim_in, 0, n_in);
  6469. hull = isl_basic_map_drop_constraints_not_involving_dims(hull,
  6470. isl_dim_out, 0, n_out);
  6471. n = isl_basic_map_n_equality(hull);
  6472. isl_basic_map_free(hull);
  6473. if (n < 0)
  6474. return isl_stat_error;
  6475. edge->weight = n;
  6476. if (edge->weight > graph->max_weight)
  6477. graph->max_weight = edge->weight;
  6478. }
  6479. return isl_stat_ok;
  6480. }
  6481. /* Call compute_schedule_finish_band on each of the clusters in "c"
  6482. * in their topological order. This order is determined by the scc
  6483. * fields of the nodes in "graph".
  6484. * Combine the results in a sequence expressing the topological order.
  6485. *
  6486. * If there is only one cluster left, then there is no need to introduce
  6487. * a sequence node. Also, in this case, the cluster necessarily contains
  6488. * the SCC at position 0 in the original graph and is therefore also
  6489. * stored in the first cluster of "c".
  6490. */
  6491. static __isl_give isl_schedule_node *finish_bands_clustering(
  6492. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
  6493. struct isl_clustering *c)
  6494. {
  6495. int i;
  6496. isl_ctx *ctx;
  6497. isl_union_set_list *filters;
  6498. if (graph->scc == 1)
  6499. return compute_schedule_finish_band(node, &c->cluster[0], 0);
  6500. ctx = isl_schedule_node_get_ctx(node);
  6501. filters = extract_sccs(ctx, graph);
  6502. node = isl_schedule_node_insert_sequence(node, filters);
  6503. for (i = 0; i < graph->scc; ++i) {
  6504. int j = c->scc_cluster[i];
  6505. node = isl_schedule_node_child(node, i);
  6506. node = isl_schedule_node_child(node, 0);
  6507. node = compute_schedule_finish_band(node, &c->cluster[j], 0);
  6508. node = isl_schedule_node_parent(node);
  6509. node = isl_schedule_node_parent(node);
  6510. }
  6511. return node;
  6512. }
  6513. /* Compute a schedule for a connected dependence graph by first considering
  6514. * each strongly connected component (SCC) in the graph separately and then
  6515. * incrementally combining them into clusters.
  6516. * Return the updated schedule node.
  6517. *
  6518. * Initially, each cluster consists of a single SCC, each with its
  6519. * own band schedule. The algorithm then tries to merge pairs
  6520. * of clusters along a proximity edge until no more suitable
  6521. * proximity edges can be found. During this merging, the schedule
  6522. * is maintained in the individual SCCs.
  6523. * After the merging is completed, the full resulting clusters
  6524. * are extracted and in finish_bands_clustering,
  6525. * compute_schedule_finish_band is called on each of them to integrate
  6526. * the band into "node" and to continue the computation.
  6527. *
  6528. * compute_weights initializes the weights that are used by find_proximity.
  6529. */
  6530. static __isl_give isl_schedule_node *compute_schedule_wcc_clustering(
  6531. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  6532. {
  6533. isl_ctx *ctx;
  6534. struct isl_clustering c;
  6535. int i;
  6536. ctx = isl_schedule_node_get_ctx(node);
  6537. if (clustering_init(ctx, &c, graph) < 0)
  6538. goto error;
  6539. if (compute_weights(graph, &c) < 0)
  6540. goto error;
  6541. for (;;) {
  6542. i = find_proximity(graph, &c);
  6543. if (i < 0)
  6544. goto error;
  6545. if (i >= graph->n_edge)
  6546. break;
  6547. if (merge_clusters_along_edge(ctx, graph, i, &c) < 0)
  6548. goto error;
  6549. }
  6550. if (extract_clusters(ctx, graph, &c) < 0)
  6551. goto error;
  6552. node = finish_bands_clustering(node, graph, &c);
  6553. clustering_free(ctx, &c);
  6554. return node;
  6555. error:
  6556. clustering_free(ctx, &c);
  6557. return isl_schedule_node_free(node);
  6558. }
  6559. /* Compute a schedule for a connected dependence graph and return
  6560. * the updated schedule node.
  6561. *
  6562. * If Feautrier's algorithm is selected, we first recursively try to satisfy
  6563. * as many validity dependences as possible. When all validity dependences
  6564. * are satisfied we extend the schedule to a full-dimensional schedule.
  6565. *
  6566. * Call compute_schedule_wcc_whole or compute_schedule_wcc_clustering
  6567. * depending on whether the user has selected the option to try and
  6568. * compute a schedule for the entire (weakly connected) component first.
  6569. * If there is only a single strongly connected component (SCC), then
  6570. * there is no point in trying to combine SCCs
  6571. * in compute_schedule_wcc_clustering, so compute_schedule_wcc_whole
  6572. * is called instead.
  6573. */
  6574. static __isl_give isl_schedule_node *compute_schedule_wcc(
  6575. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
  6576. {
  6577. isl_ctx *ctx;
  6578. if (!node)
  6579. return NULL;
  6580. ctx = isl_schedule_node_get_ctx(node);
  6581. if (detect_sccs(ctx, graph) < 0)
  6582. return isl_schedule_node_free(node);
  6583. if (compute_maxvar(graph) < 0)
  6584. return isl_schedule_node_free(node);
  6585. if (need_feautrier_step(ctx, graph))
  6586. return compute_schedule_wcc_feautrier(node, graph);
  6587. if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx))
  6588. return compute_schedule_wcc_whole(node, graph);
  6589. else
  6590. return compute_schedule_wcc_clustering(node, graph);
  6591. }
  6592. /* Compute a schedule for each group of nodes identified by node->scc
  6593. * separately and then combine them in a sequence node (or as set node
  6594. * if graph->weak is set) inserted at position "node" of the schedule tree.
  6595. * Return the updated schedule node.
  6596. *
  6597. * If "wcc" is set then each of the groups belongs to a single
  6598. * weakly connected component in the dependence graph so that
  6599. * there is no need for compute_sub_schedule to look for weakly
  6600. * connected components.
  6601. *
  6602. * If a set node would be introduced and if the number of components
  6603. * is equal to the number of nodes, then check if the schedule
  6604. * is already complete. If so, a redundant set node would be introduced
  6605. * (without any further descendants) stating that the statements
  6606. * can be executed in arbitrary order, which is also expressed
  6607. * by the absence of any node. Refrain from inserting any nodes
  6608. * in this case and simply return.
  6609. */
  6610. static __isl_give isl_schedule_node *compute_component_schedule(
  6611. __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
  6612. int wcc)
  6613. {
  6614. int component;
  6615. isl_ctx *ctx;
  6616. isl_union_set_list *filters;
  6617. if (!node)
  6618. return NULL;
  6619. if (graph->weak && graph->scc == graph->n) {
  6620. if (compute_maxvar(graph) < 0)
  6621. return isl_schedule_node_free(node);
  6622. if (graph->n_row >= graph->maxvar)
  6623. return node;
  6624. }
  6625. ctx = isl_schedule_node_get_ctx(node);
  6626. filters = extract_sccs(ctx, graph);
  6627. if (graph->weak)
  6628. node = isl_schedule_node_insert_set(node, filters);
  6629. else
  6630. node = isl_schedule_node_insert_sequence(node, filters);
  6631. for (component = 0; component < graph->scc; ++component) {
  6632. node = isl_schedule_node_child(node, component);
  6633. node = isl_schedule_node_child(node, 0);
  6634. node = compute_sub_schedule(node, ctx, graph,
  6635. &node_scc_exactly,
  6636. &edge_scc_exactly, component, wcc);
  6637. node = isl_schedule_node_parent(node);
  6638. node = isl_schedule_node_parent(node);
  6639. }
  6640. return node;
  6641. }
  6642. /* Compute a schedule for the given dependence graph and insert it at "node".
  6643. * Return the updated schedule node.
  6644. *
  6645. * We first check if the graph is connected (through validity and conditional
  6646. * validity dependences) and, if not, compute a schedule
  6647. * for each component separately.
  6648. * If the schedule_serialize_sccs option is set, then we check for strongly
  6649. * connected components instead and compute a separate schedule for
  6650. * each such strongly connected component.
  6651. */
  6652. static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node,
  6653. struct isl_sched_graph *graph)
  6654. {
  6655. isl_ctx *ctx;
  6656. if (!node)
  6657. return NULL;
  6658. ctx = isl_schedule_node_get_ctx(node);
  6659. if (isl_options_get_schedule_serialize_sccs(ctx)) {
  6660. if (detect_sccs(ctx, graph) < 0)
  6661. return isl_schedule_node_free(node);
  6662. } else {
  6663. if (detect_wccs(ctx, graph) < 0)
  6664. return isl_schedule_node_free(node);
  6665. }
  6666. if (graph->scc > 1)
  6667. return compute_component_schedule(node, graph, 1);
  6668. return compute_schedule_wcc(node, graph);
  6669. }
  6670. /* Compute a schedule on sc->domain that respects the given schedule
  6671. * constraints.
  6672. *
  6673. * In particular, the schedule respects all the validity dependences.
  6674. * If the default isl scheduling algorithm is used, it tries to minimize
  6675. * the dependence distances over the proximity dependences.
  6676. * If Feautrier's scheduling algorithm is used, the proximity dependence
  6677. * distances are only minimized during the extension to a full-dimensional
  6678. * schedule.
  6679. *
  6680. * If there are any condition and conditional validity dependences,
  6681. * then the conditional validity dependences may be violated inside
  6682. * a tilable band, provided they have no adjacent non-local
  6683. * condition dependences.
  6684. */
  6685. __isl_give isl_schedule *isl_schedule_constraints_compute_schedule(
  6686. __isl_take isl_schedule_constraints *sc)
  6687. {
  6688. isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc);
  6689. struct isl_sched_graph graph = { 0 };
  6690. isl_schedule *sched;
  6691. isl_schedule_node *node;
  6692. isl_union_set *domain;
  6693. isl_size n;
  6694. sc = isl_schedule_constraints_align_params(sc);
  6695. domain = isl_schedule_constraints_get_domain(sc);
  6696. n = isl_union_set_n_set(domain);
  6697. if (n == 0) {
  6698. isl_schedule_constraints_free(sc);
  6699. return isl_schedule_from_domain(domain);
  6700. }
  6701. if (n < 0 || graph_init(&graph, sc) < 0)
  6702. domain = isl_union_set_free(domain);
  6703. node = isl_schedule_node_from_domain(domain);
  6704. node = isl_schedule_node_child(node, 0);
  6705. if (graph.n > 0)
  6706. node = compute_schedule(node, &graph);
  6707. sched = isl_schedule_node_get_schedule(node);
  6708. isl_schedule_node_free(node);
  6709. graph_free(ctx, &graph);
  6710. isl_schedule_constraints_free(sc);
  6711. return sched;
  6712. }
  6713. /* Compute a schedule for the given union of domains that respects
  6714. * all the validity dependences and minimizes
  6715. * the dependence distances over the proximity dependences.
  6716. *
  6717. * This function is kept for backward compatibility.
  6718. */
  6719. __isl_give isl_schedule *isl_union_set_compute_schedule(
  6720. __isl_take isl_union_set *domain,
  6721. __isl_take isl_union_map *validity,
  6722. __isl_take isl_union_map *proximity)
  6723. {
  6724. isl_schedule_constraints *sc;
  6725. sc = isl_schedule_constraints_on_domain(domain);
  6726. sc = isl_schedule_constraints_set_validity(sc, validity);
  6727. sc = isl_schedule_constraints_set_proximity(sc, proximity);
  6728. return isl_schedule_constraints_compute_schedule(sc);
  6729. }