PPCISelDAGToDAG.cpp 290 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490
  1. //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines a pattern matching instruction selector for PowerPC,
  10. // converting from a legalized dag to a PPC dag.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "MCTargetDesc/PPCMCTargetDesc.h"
  14. #include "MCTargetDesc/PPCPredicates.h"
  15. #include "PPC.h"
  16. #include "PPCISelLowering.h"
  17. #include "PPCMachineFunctionInfo.h"
  18. #include "PPCSubtarget.h"
  19. #include "PPCTargetMachine.h"
  20. #include "llvm/ADT/APInt.h"
  21. #include "llvm/ADT/DenseMap.h"
  22. #include "llvm/ADT/STLExtras.h"
  23. #include "llvm/ADT/SmallPtrSet.h"
  24. #include "llvm/ADT/SmallVector.h"
  25. #include "llvm/ADT/Statistic.h"
  26. #include "llvm/Analysis/BranchProbabilityInfo.h"
  27. #include "llvm/CodeGen/FunctionLoweringInfo.h"
  28. #include "llvm/CodeGen/ISDOpcodes.h"
  29. #include "llvm/CodeGen/MachineBasicBlock.h"
  30. #include "llvm/CodeGen/MachineFunction.h"
  31. #include "llvm/CodeGen/MachineInstrBuilder.h"
  32. #include "llvm/CodeGen/MachineRegisterInfo.h"
  33. #include "llvm/CodeGen/SelectionDAG.h"
  34. #include "llvm/CodeGen/SelectionDAGISel.h"
  35. #include "llvm/CodeGen/SelectionDAGNodes.h"
  36. #include "llvm/CodeGen/TargetInstrInfo.h"
  37. #include "llvm/CodeGen/TargetRegisterInfo.h"
  38. #include "llvm/CodeGen/ValueTypes.h"
  39. #include "llvm/IR/BasicBlock.h"
  40. #include "llvm/IR/DebugLoc.h"
  41. #include "llvm/IR/Function.h"
  42. #include "llvm/IR/GlobalValue.h"
  43. #include "llvm/IR/InlineAsm.h"
  44. #include "llvm/IR/InstrTypes.h"
  45. #include "llvm/IR/IntrinsicsPowerPC.h"
  46. #include "llvm/IR/Module.h"
  47. #include "llvm/Support/Casting.h"
  48. #include "llvm/Support/CodeGen.h"
  49. #include "llvm/Support/CommandLine.h"
  50. #include "llvm/Support/Compiler.h"
  51. #include "llvm/Support/Debug.h"
  52. #include "llvm/Support/ErrorHandling.h"
  53. #include "llvm/Support/KnownBits.h"
  54. #include "llvm/Support/MachineValueType.h"
  55. #include "llvm/Support/MathExtras.h"
  56. #include "llvm/Support/raw_ostream.h"
  57. #include <algorithm>
  58. #include <cassert>
  59. #include <cstdint>
  60. #include <iterator>
  61. #include <limits>
  62. #include <memory>
  63. #include <new>
  64. #include <tuple>
  65. #include <utility>
  66. using namespace llvm;
  67. #define DEBUG_TYPE "ppc-codegen"
  68. STATISTIC(NumSextSetcc,
  69. "Number of (sext(setcc)) nodes expanded into GPR sequence.");
  70. STATISTIC(NumZextSetcc,
  71. "Number of (zext(setcc)) nodes expanded into GPR sequence.");
  72. STATISTIC(SignExtensionsAdded,
  73. "Number of sign extensions for compare inputs added.");
  74. STATISTIC(ZeroExtensionsAdded,
  75. "Number of zero extensions for compare inputs added.");
  76. STATISTIC(NumLogicOpsOnComparison,
  77. "Number of logical ops on i1 values calculated in GPR.");
  78. STATISTIC(OmittedForNonExtendUses,
  79. "Number of compares not eliminated as they have non-extending uses.");
  80. STATISTIC(NumP9Setb,
  81. "Number of compares lowered to setb.");
  82. // FIXME: Remove this once the bug has been fixed!
  83. cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
  84. cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
  85. static cl::opt<bool>
  86. UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
  87. cl::desc("use aggressive ppc isel for bit permutations"),
  88. cl::Hidden);
  89. static cl::opt<bool> BPermRewriterNoMasking(
  90. "ppc-bit-perm-rewriter-stress-rotates",
  91. cl::desc("stress rotate selection in aggressive ppc isel for "
  92. "bit permutations"),
  93. cl::Hidden);
  94. static cl::opt<bool> EnableBranchHint(
  95. "ppc-use-branch-hint", cl::init(true),
  96. cl::desc("Enable static hinting of branches on ppc"),
  97. cl::Hidden);
  98. static cl::opt<bool> EnableTLSOpt(
  99. "ppc-tls-opt", cl::init(true),
  100. cl::desc("Enable tls optimization peephole"),
  101. cl::Hidden);
  102. enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
  103. ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
  104. ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
  105. static cl::opt<ICmpInGPRType> CmpInGPR(
  106. "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
  107. cl::desc("Specify the types of comparisons to emit GPR-only code for."),
  108. cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
  109. clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
  110. clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
  111. clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
  112. clEnumValN(ICGPR_NonExtIn, "nonextin",
  113. "Only comparisons where inputs don't need [sz]ext."),
  114. clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
  115. clEnumValN(ICGPR_ZextI32, "zexti32",
  116. "Only i32 comparisons with zext result."),
  117. clEnumValN(ICGPR_ZextI64, "zexti64",
  118. "Only i64 comparisons with zext result."),
  119. clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
  120. clEnumValN(ICGPR_SextI32, "sexti32",
  121. "Only i32 comparisons with sext result."),
  122. clEnumValN(ICGPR_SextI64, "sexti64",
  123. "Only i64 comparisons with sext result.")));
  124. namespace {
  125. //===--------------------------------------------------------------------===//
  126. /// PPCDAGToDAGISel - PPC specific code to select PPC machine
  127. /// instructions for SelectionDAG operations.
  128. ///
  129. class PPCDAGToDAGISel : public SelectionDAGISel {
  130. const PPCTargetMachine &TM;
  131. const PPCSubtarget *Subtarget = nullptr;
  132. const PPCTargetLowering *PPCLowering = nullptr;
  133. unsigned GlobalBaseReg = 0;
  134. public:
  135. explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
  136. : SelectionDAGISel(tm, OptLevel), TM(tm) {}
  137. bool runOnMachineFunction(MachineFunction &MF) override {
  138. // Make sure we re-emit a set of the global base reg if necessary
  139. GlobalBaseReg = 0;
  140. Subtarget = &MF.getSubtarget<PPCSubtarget>();
  141. PPCLowering = Subtarget->getTargetLowering();
  142. if (Subtarget->hasROPProtect()) {
  143. // Create a place on the stack for the ROP Protection Hash.
  144. // The ROP Protection Hash will always be 8 bytes and aligned to 8
  145. // bytes.
  146. MachineFrameInfo &MFI = MF.getFrameInfo();
  147. PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
  148. const int Result = MFI.CreateStackObject(8, Align(8), false);
  149. FI->setROPProtectionHashSaveIndex(Result);
  150. }
  151. SelectionDAGISel::runOnMachineFunction(MF);
  152. return true;
  153. }
  154. void PreprocessISelDAG() override;
  155. void PostprocessISelDAG() override;
  156. /// getI16Imm - Return a target constant with the specified value, of type
  157. /// i16.
  158. inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
  159. return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
  160. }
  161. /// getI32Imm - Return a target constant with the specified value, of type
  162. /// i32.
  163. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  164. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  165. }
  166. /// getI64Imm - Return a target constant with the specified value, of type
  167. /// i64.
  168. inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
  169. return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  170. }
  171. /// getSmallIPtrImm - Return a target constant of pointer type.
  172. inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
  173. return CurDAG->getTargetConstant(
  174. Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
  175. }
  176. /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
  177. /// rotate and mask opcode and mask operation.
  178. static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
  179. unsigned &SH, unsigned &MB, unsigned &ME);
  180. /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
  181. /// base register. Return the virtual register that holds this value.
  182. SDNode *getGlobalBaseReg();
  183. void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
  184. // Select - Convert the specified operand from a target-independent to a
  185. // target-specific node if it hasn't already been changed.
  186. void Select(SDNode *N) override;
  187. bool tryBitfieldInsert(SDNode *N);
  188. bool tryBitPermutation(SDNode *N);
  189. bool tryIntCompareInGPR(SDNode *N);
  190. // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
  191. // an X-Form load instruction with the offset being a relocation coming from
  192. // the PPCISD::ADD_TLS.
  193. bool tryTLSXFormLoad(LoadSDNode *N);
  194. // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
  195. // an X-Form store instruction with the offset being a relocation coming from
  196. // the PPCISD::ADD_TLS.
  197. bool tryTLSXFormStore(StoreSDNode *N);
  198. /// SelectCC - Select a comparison of the specified values with the
  199. /// specified condition code, returning the CR# of the expression.
  200. SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  201. const SDLoc &dl, SDValue Chain = SDValue());
  202. /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
  203. /// immediate field. Note that the operand at this point is already the
  204. /// result of a prior SelectAddressRegImm call.
  205. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
  206. if (N.getOpcode() == ISD::TargetConstant ||
  207. N.getOpcode() == ISD::TargetGlobalAddress) {
  208. Out = N;
  209. return true;
  210. }
  211. return false;
  212. }
  213. /// SelectDSForm - Returns true if address N can be represented by the
  214. /// addressing mode of DSForm instructions (a base register, plus a signed
  215. /// 16-bit displacement that is a multiple of 4.
  216. bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  217. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  218. Align(4)) == PPC::AM_DSForm;
  219. }
  220. /// SelectDQForm - Returns true if address N can be represented by the
  221. /// addressing mode of DQForm instructions (a base register, plus a signed
  222. /// 16-bit displacement that is a multiple of 16.
  223. bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  224. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  225. Align(16)) == PPC::AM_DQForm;
  226. }
  227. /// SelectDForm - Returns true if address N can be represented by
  228. /// the addressing mode of DForm instructions (a base register, plus a
  229. /// signed 16-bit immediate.
  230. bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  231. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  232. None) == PPC::AM_DForm;
  233. }
  234. /// SelectPCRelForm - Returns true if address N can be represented by
  235. /// PC-Relative addressing mode.
  236. bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
  237. SDValue &Base) {
  238. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  239. None) == PPC::AM_PCRel;
  240. }
  241. /// SelectPDForm - Returns true if address N can be represented by Prefixed
  242. /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
  243. bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  244. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  245. None) == PPC::AM_PrefixDForm;
  246. }
  247. /// SelectXForm - Returns true if address N can be represented by the
  248. /// addressing mode of XForm instructions (an indexed [r+r] operation).
  249. bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
  250. return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
  251. None) == PPC::AM_XForm;
  252. }
  253. /// SelectForceXForm - Given the specified address, force it to be
  254. /// represented as an indexed [r+r] operation (an XForm instruction).
  255. bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
  256. SDValue &Base) {
  257. return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
  258. PPC::AM_XForm;
  259. }
  260. /// SelectAddrIdx - Given the specified address, check to see if it can be
  261. /// represented as an indexed [r+r] operation.
  262. /// This is for xform instructions whose associated displacement form is D.
  263. /// The last parameter \p 0 means associated D form has no requirment for 16
  264. /// bit signed displacement.
  265. /// Returns false if it can be represented by [r+imm], which are preferred.
  266. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
  267. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
  268. }
  269. /// SelectAddrIdx4 - Given the specified address, check to see if it can be
  270. /// represented as an indexed [r+r] operation.
  271. /// This is for xform instructions whose associated displacement form is DS.
  272. /// The last parameter \p 4 means associated DS form 16 bit signed
  273. /// displacement must be a multiple of 4.
  274. /// Returns false if it can be represented by [r+imm], which are preferred.
  275. bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
  276. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
  277. Align(4));
  278. }
  279. /// SelectAddrIdx16 - Given the specified address, check to see if it can be
  280. /// represented as an indexed [r+r] operation.
  281. /// This is for xform instructions whose associated displacement form is DQ.
  282. /// The last parameter \p 16 means associated DQ form 16 bit signed
  283. /// displacement must be a multiple of 16.
  284. /// Returns false if it can be represented by [r+imm], which are preferred.
  285. bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
  286. return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
  287. Align(16));
  288. }
  289. /// SelectAddrIdxOnly - Given the specified address, force it to be
  290. /// represented as an indexed [r+r] operation.
  291. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
  292. return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
  293. }
  294. /// SelectAddrImm - Returns true if the address N can be represented by
  295. /// a base register plus a signed 16-bit displacement [r+imm].
  296. /// The last parameter \p 0 means D form has no requirment for 16 bit signed
  297. /// displacement.
  298. bool SelectAddrImm(SDValue N, SDValue &Disp,
  299. SDValue &Base) {
  300. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
  301. }
  302. /// SelectAddrImmX4 - Returns true if the address N can be represented by
  303. /// a base register plus a signed 16-bit displacement that is a multiple of
  304. /// 4 (last parameter). Suitable for use by STD and friends.
  305. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
  306. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
  307. }
  308. /// SelectAddrImmX16 - Returns true if the address N can be represented by
  309. /// a base register plus a signed 16-bit displacement that is a multiple of
  310. /// 16(last parameter). Suitable for use by STXV and friends.
  311. bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
  312. return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
  313. Align(16));
  314. }
  315. /// SelectAddrImmX34 - Returns true if the address N can be represented by
  316. /// a base register plus a signed 34-bit displacement. Suitable for use by
  317. /// PSTXVP and friends.
  318. bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
  319. return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
  320. }
  321. // Select an address into a single register.
  322. bool SelectAddr(SDValue N, SDValue &Base) {
  323. Base = N;
  324. return true;
  325. }
  326. bool SelectAddrPCRel(SDValue N, SDValue &Base) {
  327. return PPCLowering->SelectAddressPCRel(N, Base);
  328. }
  329. /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
  330. /// inline asm expressions. It is always correct to compute the value into
  331. /// a register. The case of adding a (possibly relocatable) constant to a
  332. /// register can be improved, but it is wrong to substitute Reg+Reg for
  333. /// Reg in an asm, because the load or store opcode would have to change.
  334. bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  335. unsigned ConstraintID,
  336. std::vector<SDValue> &OutOps) override {
  337. switch(ConstraintID) {
  338. default:
  339. errs() << "ConstraintID: " << ConstraintID << "\n";
  340. llvm_unreachable("Unexpected asm memory constraint");
  341. case InlineAsm::Constraint_es:
  342. case InlineAsm::Constraint_m:
  343. case InlineAsm::Constraint_o:
  344. case InlineAsm::Constraint_Q:
  345. case InlineAsm::Constraint_Z:
  346. case InlineAsm::Constraint_Zy:
  347. // We need to make sure that this one operand does not end up in r0
  348. // (because we might end up lowering this as 0(%op)).
  349. const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
  350. const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
  351. SDLoc dl(Op);
  352. SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
  353. SDValue NewOp =
  354. SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
  355. dl, Op.getValueType(),
  356. Op, RC), 0);
  357. OutOps.push_back(NewOp);
  358. return false;
  359. }
  360. return true;
  361. }
  362. StringRef getPassName() const override {
  363. return "PowerPC DAG->DAG Pattern Instruction Selection";
  364. }
  365. // Include the pieces autogenerated from the target description.
  366. #include "PPCGenDAGISel.inc"
  367. private:
  368. bool trySETCC(SDNode *N);
  369. bool tryFoldSWTestBRCC(SDNode *N);
  370. bool tryAsSingleRLDICL(SDNode *N);
  371. bool tryAsSingleRLDICR(SDNode *N);
  372. bool tryAsSingleRLWINM(SDNode *N);
  373. bool tryAsSingleRLWINM8(SDNode *N);
  374. bool tryAsSingleRLWIMI(SDNode *N);
  375. bool tryAsPairOfRLDICL(SDNode *N);
  376. bool tryAsSingleRLDIMI(SDNode *N);
  377. void PeepholePPC64();
  378. void PeepholePPC64ZExt();
  379. void PeepholeCROps();
  380. SDValue combineToCMPB(SDNode *N);
  381. void foldBoolExts(SDValue &Res, SDNode *&N);
  382. bool AllUsersSelectZero(SDNode *N);
  383. void SwapAllSelectUsers(SDNode *N);
  384. bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
  385. void transferMemOperands(SDNode *N, SDNode *Result);
  386. };
  387. } // end anonymous namespace
  388. /// getGlobalBaseReg - Output the instructions required to put the
  389. /// base address to use for accessing globals into a register.
  390. ///
  391. SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
  392. if (!GlobalBaseReg) {
  393. const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
  394. // Insert the set of GlobalBaseReg into the first MBB of the function
  395. MachineBasicBlock &FirstMBB = MF->front();
  396. MachineBasicBlock::iterator MBBI = FirstMBB.begin();
  397. const Module *M = MF->getFunction().getParent();
  398. DebugLoc dl;
  399. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
  400. if (Subtarget->isTargetELF()) {
  401. GlobalBaseReg = PPC::R30;
  402. if (!Subtarget->isSecurePlt() &&
  403. M->getPICLevel() == PICLevel::SmallPIC) {
  404. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
  405. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  406. MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
  407. } else {
  408. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
  409. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  410. Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
  411. BuildMI(FirstMBB, MBBI, dl,
  412. TII.get(PPC::UpdateGBR), GlobalBaseReg)
  413. .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
  414. MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
  415. }
  416. } else {
  417. GlobalBaseReg =
  418. RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
  419. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
  420. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
  421. }
  422. } else {
  423. // We must ensure that this sequence is dominated by the prologue.
  424. // FIXME: This is a bit of a big hammer since we don't get the benefits
  425. // of shrink-wrapping whenever we emit this instruction. Considering
  426. // this is used in any function where we emit a jump table, this may be
  427. // a significant limitation. We should consider inserting this in the
  428. // block where it is used and then commoning this sequence up if it
  429. // appears in multiple places.
  430. // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
  431. // MovePCtoLR8.
  432. MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
  433. GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
  434. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
  435. BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
  436. }
  437. }
  438. return CurDAG->getRegister(GlobalBaseReg,
  439. PPCLowering->getPointerTy(CurDAG->getDataLayout()))
  440. .getNode();
  441. }
  442. // Check if a SDValue has the toc-data attribute.
  443. static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
  444. GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
  445. if (!GA)
  446. return false;
  447. const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
  448. if (!GV)
  449. return false;
  450. if (!GV->hasAttribute("toc-data"))
  451. return false;
  452. // TODO: These asserts should be updated as more support for the toc data
  453. // transformation is added (struct support, etc.).
  454. assert(
  455. PointerSize >= GV->getAlign().valueOrOne().value() &&
  456. "GlobalVariables with an alignment requirement stricter than TOC entry "
  457. "size not supported by the toc data transformation.");
  458. Type *GVType = GV->getValueType();
  459. assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
  460. "supported by the toc data transformation.");
  461. if (GVType->isVectorTy())
  462. report_fatal_error("A GlobalVariable of Vector type is not currently "
  463. "supported by the toc data transformation.");
  464. if (GVType->isArrayTy())
  465. report_fatal_error("A GlobalVariable of Array type is not currently "
  466. "supported by the toc data transformation.");
  467. if (GVType->isStructTy())
  468. report_fatal_error("A GlobalVariable of Struct type is not currently "
  469. "supported by the toc data transformation.");
  470. assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
  471. "A GlobalVariable with size larger than a TOC entry is not currently "
  472. "supported by the toc data transformation.");
  473. if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
  474. report_fatal_error("A GlobalVariable with private or local linkage is not "
  475. "currently supported by the toc data transformation.");
  476. assert(!GV->hasCommonLinkage() &&
  477. "Tentative definitions cannot have the mapping class XMC_TD.");
  478. return true;
  479. }
  480. /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
  481. /// operand. If so Imm will receive the 32-bit value.
  482. static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
  483. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
  484. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  485. return true;
  486. }
  487. return false;
  488. }
  489. /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
  490. /// operand. If so Imm will receive the 64-bit value.
  491. static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
  492. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
  493. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  494. return true;
  495. }
  496. return false;
  497. }
  498. // isInt32Immediate - This method tests to see if a constant operand.
  499. // If so Imm will receive the 32 bit value.
  500. static bool isInt32Immediate(SDValue N, unsigned &Imm) {
  501. return isInt32Immediate(N.getNode(), Imm);
  502. }
  503. /// isInt64Immediate - This method tests to see if the value is a 64-bit
  504. /// constant operand. If so Imm will receive the 64-bit value.
  505. static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
  506. return isInt64Immediate(N.getNode(), Imm);
  507. }
  508. static unsigned getBranchHint(unsigned PCC,
  509. const FunctionLoweringInfo &FuncInfo,
  510. const SDValue &DestMBB) {
  511. assert(isa<BasicBlockSDNode>(DestMBB));
  512. if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
  513. const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
  514. const Instruction *BBTerm = BB->getTerminator();
  515. if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
  516. const BasicBlock *TBB = BBTerm->getSuccessor(0);
  517. const BasicBlock *FBB = BBTerm->getSuccessor(1);
  518. auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
  519. auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
  520. // We only want to handle cases which are easy to predict at static time, e.g.
  521. // C++ throw statement, that is very likely not taken, or calling never
  522. // returned function, e.g. stdlib exit(). So we set Threshold to filter
  523. // unwanted cases.
  524. //
  525. // Below is LLVM branch weight table, we only want to handle case 1, 2
  526. //
  527. // Case Taken:Nontaken Example
  528. // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
  529. // 2. Invoke-terminating 1:1048575
  530. // 3. Coldblock 4:64 __builtin_expect
  531. // 4. Loop Branch 124:4 For loop
  532. // 5. PH/ZH/FPH 20:12
  533. const uint32_t Threshold = 10000;
  534. if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
  535. return PPC::BR_NO_HINT;
  536. LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
  537. << "::" << BB->getName() << "'\n"
  538. << " -> " << TBB->getName() << ": " << TProb << "\n"
  539. << " -> " << FBB->getName() << ": " << FProb << "\n");
  540. const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
  541. // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
  542. // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
  543. if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
  544. std::swap(TProb, FProb);
  545. return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
  546. }
  547. // isOpcWithIntImmediate - This method tests to see if the node is a specific
  548. // opcode and that it has a immediate integer right operand.
  549. // If so Imm will receive the 32 bit value.
  550. static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
  551. return N->getOpcode() == Opc
  552. && isInt32Immediate(N->getOperand(1).getNode(), Imm);
  553. }
  554. void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
  555. SDLoc dl(SN);
  556. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  557. SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
  558. unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
  559. if (SN->hasOneUse())
  560. CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
  561. getSmallIPtrImm(Offset, dl));
  562. else
  563. ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
  564. getSmallIPtrImm(Offset, dl)));
  565. }
  566. bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
  567. bool isShiftMask, unsigned &SH,
  568. unsigned &MB, unsigned &ME) {
  569. // Don't even go down this path for i64, since different logic will be
  570. // necessary for rldicl/rldicr/rldimi.
  571. if (N->getValueType(0) != MVT::i32)
  572. return false;
  573. unsigned Shift = 32;
  574. unsigned Indeterminant = ~0; // bit mask marking indeterminant results
  575. unsigned Opcode = N->getOpcode();
  576. if (N->getNumOperands() != 2 ||
  577. !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
  578. return false;
  579. if (Opcode == ISD::SHL) {
  580. // apply shift left to mask if it comes first
  581. if (isShiftMask) Mask = Mask << Shift;
  582. // determine which bits are made indeterminant by shift
  583. Indeterminant = ~(0xFFFFFFFFu << Shift);
  584. } else if (Opcode == ISD::SRL) {
  585. // apply shift right to mask if it comes first
  586. if (isShiftMask) Mask = Mask >> Shift;
  587. // determine which bits are made indeterminant by shift
  588. Indeterminant = ~(0xFFFFFFFFu >> Shift);
  589. // adjust for the left rotate
  590. Shift = 32 - Shift;
  591. } else if (Opcode == ISD::ROTL) {
  592. Indeterminant = 0;
  593. } else {
  594. return false;
  595. }
  596. // if the mask doesn't intersect any Indeterminant bits
  597. if (Mask && !(Mask & Indeterminant)) {
  598. SH = Shift & 31;
  599. // make sure the mask is still a mask (wrap arounds may not be)
  600. return isRunOfOnes(Mask, MB, ME);
  601. }
  602. return false;
  603. }
  604. bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
  605. SDValue Base = ST->getBasePtr();
  606. if (Base.getOpcode() != PPCISD::ADD_TLS)
  607. return false;
  608. SDValue Offset = ST->getOffset();
  609. if (!Offset.isUndef())
  610. return false;
  611. if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
  612. return false;
  613. SDLoc dl(ST);
  614. EVT MemVT = ST->getMemoryVT();
  615. EVT RegVT = ST->getValue().getValueType();
  616. unsigned Opcode;
  617. switch (MemVT.getSimpleVT().SimpleTy) {
  618. default:
  619. return false;
  620. case MVT::i8: {
  621. Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
  622. break;
  623. }
  624. case MVT::i16: {
  625. Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
  626. break;
  627. }
  628. case MVT::i32: {
  629. Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
  630. break;
  631. }
  632. case MVT::i64: {
  633. Opcode = PPC::STDXTLS;
  634. break;
  635. }
  636. }
  637. SDValue Chain = ST->getChain();
  638. SDVTList VTs = ST->getVTList();
  639. SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
  640. Chain};
  641. SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
  642. transferMemOperands(ST, MN);
  643. ReplaceNode(ST, MN);
  644. return true;
  645. }
  646. bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
  647. SDValue Base = LD->getBasePtr();
  648. if (Base.getOpcode() != PPCISD::ADD_TLS)
  649. return false;
  650. SDValue Offset = LD->getOffset();
  651. if (!Offset.isUndef())
  652. return false;
  653. if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
  654. return false;
  655. SDLoc dl(LD);
  656. EVT MemVT = LD->getMemoryVT();
  657. EVT RegVT = LD->getValueType(0);
  658. unsigned Opcode;
  659. switch (MemVT.getSimpleVT().SimpleTy) {
  660. default:
  661. return false;
  662. case MVT::i8: {
  663. Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
  664. break;
  665. }
  666. case MVT::i16: {
  667. Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
  668. break;
  669. }
  670. case MVT::i32: {
  671. Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
  672. break;
  673. }
  674. case MVT::i64: {
  675. Opcode = PPC::LDXTLS;
  676. break;
  677. }
  678. }
  679. SDValue Chain = LD->getChain();
  680. SDVTList VTs = LD->getVTList();
  681. SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
  682. SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
  683. transferMemOperands(LD, MN);
  684. ReplaceNode(LD, MN);
  685. return true;
  686. }
  687. /// Turn an or of two masked values into the rotate left word immediate then
  688. /// mask insert (rlwimi) instruction.
  689. bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
  690. SDValue Op0 = N->getOperand(0);
  691. SDValue Op1 = N->getOperand(1);
  692. SDLoc dl(N);
  693. KnownBits LKnown = CurDAG->computeKnownBits(Op0);
  694. KnownBits RKnown = CurDAG->computeKnownBits(Op1);
  695. unsigned TargetMask = LKnown.Zero.getZExtValue();
  696. unsigned InsertMask = RKnown.Zero.getZExtValue();
  697. if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
  698. unsigned Op0Opc = Op0.getOpcode();
  699. unsigned Op1Opc = Op1.getOpcode();
  700. unsigned Value, SH = 0;
  701. TargetMask = ~TargetMask;
  702. InsertMask = ~InsertMask;
  703. // If the LHS has a foldable shift and the RHS does not, then swap it to the
  704. // RHS so that we can fold the shift into the insert.
  705. if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
  706. if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
  707. Op0.getOperand(0).getOpcode() == ISD::SRL) {
  708. if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
  709. Op1.getOperand(0).getOpcode() != ISD::SRL) {
  710. std::swap(Op0, Op1);
  711. std::swap(Op0Opc, Op1Opc);
  712. std::swap(TargetMask, InsertMask);
  713. }
  714. }
  715. } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
  716. if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
  717. Op1.getOperand(0).getOpcode() != ISD::SRL) {
  718. std::swap(Op0, Op1);
  719. std::swap(Op0Opc, Op1Opc);
  720. std::swap(TargetMask, InsertMask);
  721. }
  722. }
  723. unsigned MB, ME;
  724. if (isRunOfOnes(InsertMask, MB, ME)) {
  725. if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
  726. isInt32Immediate(Op1.getOperand(1), Value)) {
  727. Op1 = Op1.getOperand(0);
  728. SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
  729. }
  730. if (Op1Opc == ISD::AND) {
  731. // The AND mask might not be a constant, and we need to make sure that
  732. // if we're going to fold the masking with the insert, all bits not
  733. // know to be zero in the mask are known to be one.
  734. KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
  735. bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
  736. unsigned SHOpc = Op1.getOperand(0).getOpcode();
  737. if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
  738. isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
  739. // Note that Value must be in range here (less than 32) because
  740. // otherwise there would not be any bits set in InsertMask.
  741. Op1 = Op1.getOperand(0).getOperand(0);
  742. SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
  743. }
  744. }
  745. SH &= 31;
  746. SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
  747. getI32Imm(ME, dl) };
  748. ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
  749. return true;
  750. }
  751. }
  752. return false;
  753. }
  754. static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
  755. unsigned MaxTruncation = 0;
  756. // Cannot use range-based for loop here as we need the actual use (i.e. we
  757. // need the operand number corresponding to the use). A range-based for
  758. // will unbox the use and provide an SDNode*.
  759. for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
  760. Use != UseEnd; ++Use) {
  761. unsigned Opc =
  762. Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
  763. switch (Opc) {
  764. default: return 0;
  765. case ISD::TRUNCATE:
  766. if (Use->isMachineOpcode())
  767. return 0;
  768. MaxTruncation =
  769. std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
  770. continue;
  771. case ISD::STORE: {
  772. if (Use->isMachineOpcode())
  773. return 0;
  774. StoreSDNode *STN = cast<StoreSDNode>(*Use);
  775. unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
  776. if (MemVTSize == 64 || Use.getOperandNo() != 0)
  777. return 0;
  778. MaxTruncation = std::max(MaxTruncation, MemVTSize);
  779. continue;
  780. }
  781. case PPC::STW8:
  782. case PPC::STWX8:
  783. case PPC::STWU8:
  784. case PPC::STWUX8:
  785. if (Use.getOperandNo() != 0)
  786. return 0;
  787. MaxTruncation = std::max(MaxTruncation, 32u);
  788. continue;
  789. case PPC::STH8:
  790. case PPC::STHX8:
  791. case PPC::STHU8:
  792. case PPC::STHUX8:
  793. if (Use.getOperandNo() != 0)
  794. return 0;
  795. MaxTruncation = std::max(MaxTruncation, 16u);
  796. continue;
  797. case PPC::STB8:
  798. case PPC::STBX8:
  799. case PPC::STBU8:
  800. case PPC::STBUX8:
  801. if (Use.getOperandNo() != 0)
  802. return 0;
  803. MaxTruncation = std::max(MaxTruncation, 8u);
  804. continue;
  805. }
  806. }
  807. return MaxTruncation;
  808. }
  809. // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
  810. // zeros and return the number of bits by the left of these consecutive zeros.
  811. static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
  812. unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
  813. unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
  814. if ((HiTZ + LoLZ) >= Num)
  815. return (32 + HiTZ);
  816. return 0;
  817. }
  818. // Direct materialization of 64-bit constants by enumerated patterns.
  819. static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
  820. uint64_t Imm, unsigned &InstCnt) {
  821. unsigned TZ = countTrailingZeros<uint64_t>(Imm);
  822. unsigned LZ = countLeadingZeros<uint64_t>(Imm);
  823. unsigned TO = countTrailingOnes<uint64_t>(Imm);
  824. unsigned LO = countLeadingOnes<uint64_t>(Imm);
  825. unsigned Hi32 = Hi_32(Imm);
  826. unsigned Lo32 = Lo_32(Imm);
  827. SDNode *Result = nullptr;
  828. unsigned Shift = 0;
  829. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  830. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  831. };
  832. // Following patterns use 1 instructions to materialize the Imm.
  833. InstCnt = 1;
  834. // 1-1) Patterns : {zeros}{15-bit valve}
  835. // {ones}{15-bit valve}
  836. if (isInt<16>(Imm)) {
  837. SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  838. return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
  839. }
  840. // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
  841. // {ones}{15-bit valve}{16 zeros}
  842. if (TZ > 15 && (LZ > 32 || LO > 32))
  843. return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  844. getI32Imm((Imm >> 16) & 0xffff));
  845. // Following patterns use 2 instructions to materialize the Imm.
  846. InstCnt = 2;
  847. assert(LZ < 64 && "Unexpected leading zeros here.");
  848. // Count of ones follwing the leading zeros.
  849. unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
  850. // 2-1) Patterns : {zeros}{31-bit value}
  851. // {ones}{31-bit value}
  852. if (isInt<32>(Imm)) {
  853. uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
  854. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  855. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  856. return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  857. getI32Imm(Imm & 0xffff));
  858. }
  859. // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
  860. // {zeros}{15-bit value}{zeros}
  861. // {zeros}{ones}{15-bit value}
  862. // {ones}{15-bit value}{zeros}
  863. // We can take advantage of LI's sign-extension semantics to generate leading
  864. // ones, and then use RLDIC to mask off the ones in both sides after rotation.
  865. if ((LZ + FO + TZ) > 48) {
  866. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  867. getI32Imm((Imm >> TZ) & 0xffff));
  868. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  869. getI32Imm(TZ), getI32Imm(LZ));
  870. }
  871. // 2-3) Pattern : {zeros}{15-bit value}{ones}
  872. // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
  873. // therefore we can take advantage of LI's sign-extension semantics, and then
  874. // mask them off after rotation.
  875. //
  876. // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
  877. // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
  878. // +------------------------+ +------------------------+
  879. // 63 0 63 0
  880. // Imm (Imm >> (48 - LZ) & 0xffff)
  881. // +----sext-----|--16-bit--+ +clear-|-----------------+
  882. // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
  883. // +------------------------+ +------------------------+
  884. // 63 0 63 0
  885. // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
  886. if ((LZ + TO) > 48) {
  887. // Since the immediates with (LZ > 32) have been handled by previous
  888. // patterns, here we have (LZ <= 32) to make sure we will not shift right
  889. // the Imm by a negative value.
  890. assert(LZ <= 32 && "Unexpected shift value.");
  891. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  892. getI32Imm((Imm >> (48 - LZ) & 0xffff)));
  893. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  894. getI32Imm(48 - LZ), getI32Imm(LZ));
  895. }
  896. // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
  897. // {ones}{15-bit value}{ones}
  898. // We can take advantage of LI's sign-extension semantics to generate leading
  899. // ones, and then use RLDICL to mask off the ones in left sides (if required)
  900. // after rotation.
  901. //
  902. // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
  903. // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
  904. // +------------------------+ +------------------------+
  905. // 63 0 63 0
  906. // Imm (Imm >> TO) & 0xffff
  907. // +----sext-----|--16-bit--+ +LZ|---------------------+
  908. // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
  909. // +------------------------+ +------------------------+
  910. // 63 0 63 0
  911. // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
  912. if ((LZ + FO + TO) > 48) {
  913. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  914. getI32Imm((Imm >> TO) & 0xffff));
  915. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  916. getI32Imm(TO), getI32Imm(LZ));
  917. }
  918. // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
  919. // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
  920. // value, we can use LI for Lo16 without generating leading ones then add the
  921. // Hi16(in Lo32).
  922. if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
  923. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  924. getI32Imm(Lo32 & 0xffff));
  925. return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
  926. getI32Imm(Lo32 >> 16));
  927. }
  928. // 2-6) Patterns : {******}{49 zeros}{******}
  929. // {******}{49 ones}{******}
  930. // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
  931. // bits remain on both sides. Rotate right the Imm to construct an int<16>
  932. // value, use LI for int<16> value and then use RLDICL without mask to rotate
  933. // it back.
  934. //
  935. // 1) findContiguousZerosAtLeast(Imm, 49)
  936. // +------|--zeros-|------+ +---ones--||---15 bit--+
  937. // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
  938. // +----------------------+ +----------------------+
  939. // 63 0 63 0
  940. //
  941. // 2) findContiguousZerosAtLeast(~Imm, 49)
  942. // +------|--ones--|------+ +---ones--||---15 bit--+
  943. // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
  944. // +----------------------+ +----------------------+
  945. // 63 0 63 0
  946. if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
  947. (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
  948. uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
  949. Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
  950. getI32Imm(RotImm & 0xffff));
  951. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  952. getI32Imm(Shift), getI32Imm(0));
  953. }
  954. // Following patterns use 3 instructions to materialize the Imm.
  955. InstCnt = 3;
  956. // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
  957. // {zeros}{31-bit value}{zeros}
  958. // {zeros}{ones}{31-bit value}
  959. // {ones}{31-bit value}{zeros}
  960. // We can take advantage of LIS's sign-extension semantics to generate leading
  961. // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
  962. // ones in both sides after rotation.
  963. if ((LZ + FO + TZ) > 32) {
  964. uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
  965. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  966. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  967. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  968. getI32Imm((Imm >> TZ) & 0xffff));
  969. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  970. getI32Imm(TZ), getI32Imm(LZ));
  971. }
  972. // 3-2) Pattern : {zeros}{31-bit value}{ones}
  973. // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
  974. // therefore we can take advantage of LIS's sign-extension semantics, add
  975. // the remaining bits with ORI, and then mask them off after rotation.
  976. // This is similar to Pattern 2-3, please refer to the diagram there.
  977. if ((LZ + TO) > 32) {
  978. // Since the immediates with (LZ > 32) have been handled by previous
  979. // patterns, here we have (LZ <= 32) to make sure we will not shift right
  980. // the Imm by a negative value.
  981. assert(LZ <= 32 && "Unexpected shift value.");
  982. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  983. getI32Imm((Imm >> (48 - LZ)) & 0xffff));
  984. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  985. getI32Imm((Imm >> (32 - LZ)) & 0xffff));
  986. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  987. getI32Imm(32 - LZ), getI32Imm(LZ));
  988. }
  989. // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
  990. // {ones}{31-bit value}{ones}
  991. // We can take advantage of LIS's sign-extension semantics to generate leading
  992. // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
  993. // ones in left sides (if required) after rotation.
  994. // This is similar to Pattern 2-4, please refer to the diagram there.
  995. if ((LZ + FO + TO) > 32) {
  996. Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
  997. getI32Imm((Imm >> (TO + 16)) & 0xffff));
  998. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  999. getI32Imm((Imm >> TO) & 0xffff));
  1000. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1001. getI32Imm(TO), getI32Imm(LZ));
  1002. }
  1003. // 3-4) Patterns : High word == Low word
  1004. if (Hi32 == Lo32) {
  1005. // Handle the first 32 bits.
  1006. uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
  1007. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  1008. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  1009. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  1010. getI32Imm(Lo32 & 0xffff));
  1011. // Use rldimi to insert the Low word into High word.
  1012. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
  1013. getI32Imm(0)};
  1014. return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
  1015. }
  1016. // 3-5) Patterns : {******}{33 zeros}{******}
  1017. // {******}{33 ones}{******}
  1018. // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
  1019. // bits remain on both sides. Rotate right the Imm to construct an int<32>
  1020. // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
  1021. // rotate it back.
  1022. // This is similar to Pattern 2-6, please refer to the diagram there.
  1023. if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
  1024. (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
  1025. uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
  1026. uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
  1027. unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
  1028. Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
  1029. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  1030. getI32Imm(RotImm & 0xffff));
  1031. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1032. getI32Imm(Shift), getI32Imm(0));
  1033. }
  1034. InstCnt = 0;
  1035. return nullptr;
  1036. }
  1037. // Try to select instructions to generate a 64 bit immediate using prefix as
  1038. // well as non prefix instructions. The function will return the SDNode
  1039. // to materialize that constant or it will return nullptr if it does not
  1040. // find one. The variable InstCnt is set to the number of instructions that
  1041. // were selected.
  1042. static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
  1043. uint64_t Imm, unsigned &InstCnt) {
  1044. unsigned TZ = countTrailingZeros<uint64_t>(Imm);
  1045. unsigned LZ = countLeadingZeros<uint64_t>(Imm);
  1046. unsigned TO = countTrailingOnes<uint64_t>(Imm);
  1047. unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
  1048. unsigned Hi32 = Hi_32(Imm);
  1049. unsigned Lo32 = Lo_32(Imm);
  1050. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  1051. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  1052. };
  1053. auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
  1054. return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
  1055. };
  1056. // Following patterns use 1 instruction to materialize Imm.
  1057. InstCnt = 1;
  1058. // The pli instruction can materialize up to 34 bits directly.
  1059. // If a constant fits within 34-bits, emit the pli instruction here directly.
  1060. if (isInt<34>(Imm))
  1061. return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1062. CurDAG->getTargetConstant(Imm, dl, MVT::i64));
  1063. // Require at least two instructions.
  1064. InstCnt = 2;
  1065. SDNode *Result = nullptr;
  1066. // Patterns : {zeros}{ones}{33-bit value}{zeros}
  1067. // {zeros}{33-bit value}{zeros}
  1068. // {zeros}{ones}{33-bit value}
  1069. // {ones}{33-bit value}{zeros}
  1070. // We can take advantage of PLI's sign-extension semantics to generate leading
  1071. // ones, and then use RLDIC to mask off the ones on both sides after rotation.
  1072. if ((LZ + FO + TZ) > 30) {
  1073. APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
  1074. APInt Extended = SignedInt34.sext(64);
  1075. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1076. getI64Imm(*Extended.getRawData()));
  1077. return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
  1078. getI32Imm(TZ), getI32Imm(LZ));
  1079. }
  1080. // Pattern : {zeros}{33-bit value}{ones}
  1081. // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
  1082. // therefore we can take advantage of PLI's sign-extension semantics, and then
  1083. // mask them off after rotation.
  1084. //
  1085. // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
  1086. // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
  1087. // +------------------------+ +------------------------+
  1088. // 63 0 63 0
  1089. //
  1090. // +----sext-----|--34-bit--+ +clear-|-----------------+
  1091. // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
  1092. // +------------------------+ +------------------------+
  1093. // 63 0 63 0
  1094. if ((LZ + TO) > 30) {
  1095. APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
  1096. APInt Extended = SignedInt34.sext(64);
  1097. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1098. getI64Imm(*Extended.getRawData()));
  1099. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1100. getI32Imm(30 - LZ), getI32Imm(LZ));
  1101. }
  1102. // Patterns : {zeros}{ones}{33-bit value}{ones}
  1103. // {ones}{33-bit value}{ones}
  1104. // Similar to LI we can take advantage of PLI's sign-extension semantics to
  1105. // generate leading ones, and then use RLDICL to mask off the ones in left
  1106. // sides (if required) after rotation.
  1107. if ((LZ + FO + TO) > 30) {
  1108. APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
  1109. APInt Extended = SignedInt34.sext(64);
  1110. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
  1111. getI64Imm(*Extended.getRawData()));
  1112. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
  1113. getI32Imm(TO), getI32Imm(LZ));
  1114. }
  1115. // Patterns : {******}{31 zeros}{******}
  1116. // : {******}{31 ones}{******}
  1117. // If Imm contains 31 consecutive zeros/ones then the remaining bit count
  1118. // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
  1119. // for the int<33> value and then use RLDICL without a mask to rotate it back.
  1120. //
  1121. // +------|--ones--|------+ +---ones--||---33 bit--+
  1122. // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
  1123. // +----------------------+ +----------------------+
  1124. // 63 0 63 0
  1125. for (unsigned Shift = 0; Shift < 63; ++Shift) {
  1126. uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
  1127. if (isInt<34>(RotImm)) {
  1128. Result =
  1129. CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
  1130. return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  1131. SDValue(Result, 0), getI32Imm(Shift),
  1132. getI32Imm(0));
  1133. }
  1134. }
  1135. // Patterns : High word == Low word
  1136. // This is basically a splat of a 32 bit immediate.
  1137. if (Hi32 == Lo32) {
  1138. Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
  1139. SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
  1140. getI32Imm(0)};
  1141. return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
  1142. }
  1143. InstCnt = 3;
  1144. // Catch-all
  1145. // This pattern can form any 64 bit immediate in 3 instructions.
  1146. SDNode *ResultHi =
  1147. CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
  1148. SDNode *ResultLo =
  1149. CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
  1150. SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
  1151. getI32Imm(0)};
  1152. return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
  1153. }
  1154. static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
  1155. unsigned *InstCnt = nullptr) {
  1156. unsigned InstCntDirect = 0;
  1157. // No more than 3 instructions is used if we can select the i64 immediate
  1158. // directly.
  1159. SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
  1160. const PPCSubtarget &Subtarget =
  1161. CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
  1162. // If we have prefixed instructions and there is a chance we can
  1163. // materialize the constant with fewer prefixed instructions than
  1164. // non-prefixed, try that.
  1165. if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
  1166. unsigned InstCntDirectP = 0;
  1167. SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
  1168. // Use the prefix case in either of two cases:
  1169. // 1) We have no result from the non-prefix case to use.
  1170. // 2) The non-prefix case uses more instructions than the prefix case.
  1171. // If the prefix and non-prefix cases use the same number of instructions
  1172. // we will prefer the non-prefix case.
  1173. if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
  1174. if (InstCnt)
  1175. *InstCnt = InstCntDirectP;
  1176. return ResultP;
  1177. }
  1178. }
  1179. if (Result) {
  1180. if (InstCnt)
  1181. *InstCnt = InstCntDirect;
  1182. return Result;
  1183. }
  1184. auto getI32Imm = [CurDAG, dl](unsigned Imm) {
  1185. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  1186. };
  1187. // Handle the upper 32 bit value.
  1188. Result =
  1189. selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
  1190. // Add in the last bits as required.
  1191. if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
  1192. Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
  1193. SDValue(Result, 0), getI32Imm(Hi16));
  1194. ++InstCntDirect;
  1195. }
  1196. if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
  1197. Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
  1198. getI32Imm(Lo16));
  1199. ++InstCntDirect;
  1200. }
  1201. if (InstCnt)
  1202. *InstCnt = InstCntDirect;
  1203. return Result;
  1204. }
  1205. // Select a 64-bit constant.
  1206. static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
  1207. SDLoc dl(N);
  1208. // Get 64 bit value.
  1209. int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
  1210. if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
  1211. uint64_t SextImm = SignExtend64(Imm, MinSize);
  1212. SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
  1213. if (isInt<16>(SextImm))
  1214. return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
  1215. }
  1216. return selectI64Imm(CurDAG, dl, Imm);
  1217. }
  1218. namespace {
  1219. class BitPermutationSelector {
  1220. struct ValueBit {
  1221. SDValue V;
  1222. // The bit number in the value, using a convention where bit 0 is the
  1223. // lowest-order bit.
  1224. unsigned Idx;
  1225. // ConstZero means a bit we need to mask off.
  1226. // Variable is a bit comes from an input variable.
  1227. // VariableKnownToBeZero is also a bit comes from an input variable,
  1228. // but it is known to be already zero. So we do not need to mask them.
  1229. enum Kind {
  1230. ConstZero,
  1231. Variable,
  1232. VariableKnownToBeZero
  1233. } K;
  1234. ValueBit(SDValue V, unsigned I, Kind K = Variable)
  1235. : V(V), Idx(I), K(K) {}
  1236. ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
  1237. bool isZero() const {
  1238. return K == ConstZero || K == VariableKnownToBeZero;
  1239. }
  1240. bool hasValue() const {
  1241. return K == Variable || K == VariableKnownToBeZero;
  1242. }
  1243. SDValue getValue() const {
  1244. assert(hasValue() && "Cannot get the value of a constant bit");
  1245. return V;
  1246. }
  1247. unsigned getValueBitIndex() const {
  1248. assert(hasValue() && "Cannot get the value bit index of a constant bit");
  1249. return Idx;
  1250. }
  1251. };
  1252. // A bit group has the same underlying value and the same rotate factor.
  1253. struct BitGroup {
  1254. SDValue V;
  1255. unsigned RLAmt;
  1256. unsigned StartIdx, EndIdx;
  1257. // This rotation amount assumes that the lower 32 bits of the quantity are
  1258. // replicated in the high 32 bits by the rotation operator (which is done
  1259. // by rlwinm and friends in 64-bit mode).
  1260. bool Repl32;
  1261. // Did converting to Repl32 == true change the rotation factor? If it did,
  1262. // it decreased it by 32.
  1263. bool Repl32CR;
  1264. // Was this group coalesced after setting Repl32 to true?
  1265. bool Repl32Coalesced;
  1266. BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
  1267. : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
  1268. Repl32Coalesced(false) {
  1269. LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
  1270. << " [" << S << ", " << E << "]\n");
  1271. }
  1272. };
  1273. // Information on each (Value, RLAmt) pair (like the number of groups
  1274. // associated with each) used to choose the lowering method.
  1275. struct ValueRotInfo {
  1276. SDValue V;
  1277. unsigned RLAmt = std::numeric_limits<unsigned>::max();
  1278. unsigned NumGroups = 0;
  1279. unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
  1280. bool Repl32 = false;
  1281. ValueRotInfo() = default;
  1282. // For sorting (in reverse order) by NumGroups, and then by
  1283. // FirstGroupStartIdx.
  1284. bool operator < (const ValueRotInfo &Other) const {
  1285. // We need to sort so that the non-Repl32 come first because, when we're
  1286. // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
  1287. // masking operation.
  1288. if (Repl32 < Other.Repl32)
  1289. return true;
  1290. else if (Repl32 > Other.Repl32)
  1291. return false;
  1292. else if (NumGroups > Other.NumGroups)
  1293. return true;
  1294. else if (NumGroups < Other.NumGroups)
  1295. return false;
  1296. else if (RLAmt == 0 && Other.RLAmt != 0)
  1297. return true;
  1298. else if (RLAmt != 0 && Other.RLAmt == 0)
  1299. return false;
  1300. else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
  1301. return true;
  1302. return false;
  1303. }
  1304. };
  1305. using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
  1306. using ValueBitsMemoizer =
  1307. DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
  1308. ValueBitsMemoizer Memoizer;
  1309. // Return a pair of bool and a SmallVector pointer to a memoization entry.
  1310. // The bool is true if something interesting was deduced, otherwise if we're
  1311. // providing only a generic representation of V (or something else likewise
  1312. // uninteresting for instruction selection) through the SmallVector.
  1313. std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
  1314. unsigned NumBits) {
  1315. auto &ValueEntry = Memoizer[V];
  1316. if (ValueEntry)
  1317. return std::make_pair(ValueEntry->first, &ValueEntry->second);
  1318. ValueEntry.reset(new ValueBitsMemoizedValue());
  1319. bool &Interesting = ValueEntry->first;
  1320. SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
  1321. Bits.resize(NumBits);
  1322. switch (V.getOpcode()) {
  1323. default: break;
  1324. case ISD::ROTL:
  1325. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1326. unsigned RotAmt = V.getConstantOperandVal(1);
  1327. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1328. for (unsigned i = 0; i < NumBits; ++i)
  1329. Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
  1330. return std::make_pair(Interesting = true, &Bits);
  1331. }
  1332. break;
  1333. case ISD::SHL:
  1334. case PPCISD::SHL:
  1335. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1336. unsigned ShiftAmt = V.getConstantOperandVal(1);
  1337. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1338. for (unsigned i = ShiftAmt; i < NumBits; ++i)
  1339. Bits[i] = LHSBits[i - ShiftAmt];
  1340. for (unsigned i = 0; i < ShiftAmt; ++i)
  1341. Bits[i] = ValueBit(ValueBit::ConstZero);
  1342. return std::make_pair(Interesting = true, &Bits);
  1343. }
  1344. break;
  1345. case ISD::SRL:
  1346. case PPCISD::SRL:
  1347. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1348. unsigned ShiftAmt = V.getConstantOperandVal(1);
  1349. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1350. for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
  1351. Bits[i] = LHSBits[i + ShiftAmt];
  1352. for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
  1353. Bits[i] = ValueBit(ValueBit::ConstZero);
  1354. return std::make_pair(Interesting = true, &Bits);
  1355. }
  1356. break;
  1357. case ISD::AND:
  1358. if (isa<ConstantSDNode>(V.getOperand(1))) {
  1359. uint64_t Mask = V.getConstantOperandVal(1);
  1360. const SmallVector<ValueBit, 64> *LHSBits;
  1361. // Mark this as interesting, only if the LHS was also interesting. This
  1362. // prevents the overall procedure from matching a single immediate 'and'
  1363. // (which is non-optimal because such an and might be folded with other
  1364. // things if we don't select it here).
  1365. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
  1366. for (unsigned i = 0; i < NumBits; ++i)
  1367. if (((Mask >> i) & 1) == 1)
  1368. Bits[i] = (*LHSBits)[i];
  1369. else {
  1370. // AND instruction masks this bit. If the input is already zero,
  1371. // we have nothing to do here. Otherwise, make the bit ConstZero.
  1372. if ((*LHSBits)[i].isZero())
  1373. Bits[i] = (*LHSBits)[i];
  1374. else
  1375. Bits[i] = ValueBit(ValueBit::ConstZero);
  1376. }
  1377. return std::make_pair(Interesting, &Bits);
  1378. }
  1379. break;
  1380. case ISD::OR: {
  1381. const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
  1382. const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
  1383. bool AllDisjoint = true;
  1384. SDValue LastVal = SDValue();
  1385. unsigned LastIdx = 0;
  1386. for (unsigned i = 0; i < NumBits; ++i) {
  1387. if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
  1388. // If both inputs are known to be zero and one is ConstZero and
  1389. // another is VariableKnownToBeZero, we can select whichever
  1390. // we like. To minimize the number of bit groups, we select
  1391. // VariableKnownToBeZero if this bit is the next bit of the same
  1392. // input variable from the previous bit. Otherwise, we select
  1393. // ConstZero.
  1394. if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
  1395. LHSBits[i].getValueBitIndex() == LastIdx + 1)
  1396. Bits[i] = LHSBits[i];
  1397. else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
  1398. RHSBits[i].getValueBitIndex() == LastIdx + 1)
  1399. Bits[i] = RHSBits[i];
  1400. else
  1401. Bits[i] = ValueBit(ValueBit::ConstZero);
  1402. }
  1403. else if (LHSBits[i].isZero())
  1404. Bits[i] = RHSBits[i];
  1405. else if (RHSBits[i].isZero())
  1406. Bits[i] = LHSBits[i];
  1407. else {
  1408. AllDisjoint = false;
  1409. break;
  1410. }
  1411. // We remember the value and bit index of this bit.
  1412. if (Bits[i].hasValue()) {
  1413. LastVal = Bits[i].getValue();
  1414. LastIdx = Bits[i].getValueBitIndex();
  1415. }
  1416. else {
  1417. if (LastVal) LastVal = SDValue();
  1418. LastIdx = 0;
  1419. }
  1420. }
  1421. if (!AllDisjoint)
  1422. break;
  1423. return std::make_pair(Interesting = true, &Bits);
  1424. }
  1425. case ISD::ZERO_EXTEND: {
  1426. // We support only the case with zero extension from i32 to i64 so far.
  1427. if (V.getValueType() != MVT::i64 ||
  1428. V.getOperand(0).getValueType() != MVT::i32)
  1429. break;
  1430. const SmallVector<ValueBit, 64> *LHSBits;
  1431. const unsigned NumOperandBits = 32;
  1432. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
  1433. NumOperandBits);
  1434. for (unsigned i = 0; i < NumOperandBits; ++i)
  1435. Bits[i] = (*LHSBits)[i];
  1436. for (unsigned i = NumOperandBits; i < NumBits; ++i)
  1437. Bits[i] = ValueBit(ValueBit::ConstZero);
  1438. return std::make_pair(Interesting, &Bits);
  1439. }
  1440. case ISD::TRUNCATE: {
  1441. EVT FromType = V.getOperand(0).getValueType();
  1442. EVT ToType = V.getValueType();
  1443. // We support only the case with truncate from i64 to i32.
  1444. if (FromType != MVT::i64 || ToType != MVT::i32)
  1445. break;
  1446. const unsigned NumAllBits = FromType.getSizeInBits();
  1447. SmallVector<ValueBit, 64> *InBits;
  1448. std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
  1449. NumAllBits);
  1450. const unsigned NumValidBits = ToType.getSizeInBits();
  1451. // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
  1452. // So, we cannot include this truncate.
  1453. bool UseUpper32bit = false;
  1454. for (unsigned i = 0; i < NumValidBits; ++i)
  1455. if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
  1456. UseUpper32bit = true;
  1457. break;
  1458. }
  1459. if (UseUpper32bit)
  1460. break;
  1461. for (unsigned i = 0; i < NumValidBits; ++i)
  1462. Bits[i] = (*InBits)[i];
  1463. return std::make_pair(Interesting, &Bits);
  1464. }
  1465. case ISD::AssertZext: {
  1466. // For AssertZext, we look through the operand and
  1467. // mark the bits known to be zero.
  1468. const SmallVector<ValueBit, 64> *LHSBits;
  1469. std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
  1470. NumBits);
  1471. EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
  1472. const unsigned NumValidBits = FromType.getSizeInBits();
  1473. for (unsigned i = 0; i < NumValidBits; ++i)
  1474. Bits[i] = (*LHSBits)[i];
  1475. // These bits are known to be zero but the AssertZext may be from a value
  1476. // that already has some constant zero bits (i.e. from a masking and).
  1477. for (unsigned i = NumValidBits; i < NumBits; ++i)
  1478. Bits[i] = (*LHSBits)[i].hasValue()
  1479. ? ValueBit((*LHSBits)[i].getValue(),
  1480. (*LHSBits)[i].getValueBitIndex(),
  1481. ValueBit::VariableKnownToBeZero)
  1482. : ValueBit(ValueBit::ConstZero);
  1483. return std::make_pair(Interesting, &Bits);
  1484. }
  1485. case ISD::LOAD:
  1486. LoadSDNode *LD = cast<LoadSDNode>(V);
  1487. if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
  1488. EVT VT = LD->getMemoryVT();
  1489. const unsigned NumValidBits = VT.getSizeInBits();
  1490. for (unsigned i = 0; i < NumValidBits; ++i)
  1491. Bits[i] = ValueBit(V, i);
  1492. // These bits are known to be zero.
  1493. for (unsigned i = NumValidBits; i < NumBits; ++i)
  1494. Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
  1495. // Zero-extending load itself cannot be optimized. So, it is not
  1496. // interesting by itself though it gives useful information.
  1497. return std::make_pair(Interesting = false, &Bits);
  1498. }
  1499. break;
  1500. }
  1501. for (unsigned i = 0; i < NumBits; ++i)
  1502. Bits[i] = ValueBit(V, i);
  1503. return std::make_pair(Interesting = false, &Bits);
  1504. }
  1505. // For each value (except the constant ones), compute the left-rotate amount
  1506. // to get it from its original to final position.
  1507. void computeRotationAmounts() {
  1508. NeedMask = false;
  1509. RLAmt.resize(Bits.size());
  1510. for (unsigned i = 0; i < Bits.size(); ++i)
  1511. if (Bits[i].hasValue()) {
  1512. unsigned VBI = Bits[i].getValueBitIndex();
  1513. if (i >= VBI)
  1514. RLAmt[i] = i - VBI;
  1515. else
  1516. RLAmt[i] = Bits.size() - (VBI - i);
  1517. } else if (Bits[i].isZero()) {
  1518. NeedMask = true;
  1519. RLAmt[i] = UINT32_MAX;
  1520. } else {
  1521. llvm_unreachable("Unknown value bit type");
  1522. }
  1523. }
  1524. // Collect groups of consecutive bits with the same underlying value and
  1525. // rotation factor. If we're doing late masking, we ignore zeros, otherwise
  1526. // they break up groups.
  1527. void collectBitGroups(bool LateMask) {
  1528. BitGroups.clear();
  1529. unsigned LastRLAmt = RLAmt[0];
  1530. SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
  1531. unsigned LastGroupStartIdx = 0;
  1532. bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
  1533. for (unsigned i = 1; i < Bits.size(); ++i) {
  1534. unsigned ThisRLAmt = RLAmt[i];
  1535. SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
  1536. if (LateMask && !ThisValue) {
  1537. ThisValue = LastValue;
  1538. ThisRLAmt = LastRLAmt;
  1539. // If we're doing late masking, then the first bit group always starts
  1540. // at zero (even if the first bits were zero).
  1541. if (BitGroups.empty())
  1542. LastGroupStartIdx = 0;
  1543. }
  1544. // If this bit is known to be zero and the current group is a bit group
  1545. // of zeros, we do not need to terminate the current bit group even the
  1546. // Value or RLAmt does not match here. Instead, we terminate this group
  1547. // when the first non-zero bit appears later.
  1548. if (IsGroupOfZeros && Bits[i].isZero())
  1549. continue;
  1550. // If this bit has the same underlying value and the same rotate factor as
  1551. // the last one, then they're part of the same group.
  1552. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
  1553. // We cannot continue the current group if this bits is not known to
  1554. // be zero in a bit group of zeros.
  1555. if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
  1556. continue;
  1557. if (LastValue.getNode())
  1558. BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
  1559. i-1));
  1560. LastRLAmt = ThisRLAmt;
  1561. LastValue = ThisValue;
  1562. LastGroupStartIdx = i;
  1563. IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
  1564. }
  1565. if (LastValue.getNode())
  1566. BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
  1567. Bits.size()-1));
  1568. if (BitGroups.empty())
  1569. return;
  1570. // We might be able to combine the first and last groups.
  1571. if (BitGroups.size() > 1) {
  1572. // If the first and last groups are the same, then remove the first group
  1573. // in favor of the last group, making the ending index of the last group
  1574. // equal to the ending index of the to-be-removed first group.
  1575. if (BitGroups[0].StartIdx == 0 &&
  1576. BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
  1577. BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
  1578. BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
  1579. LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
  1580. BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
  1581. BitGroups.erase(BitGroups.begin());
  1582. }
  1583. }
  1584. }
  1585. // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
  1586. // associated with each. If the number of groups are same, we prefer a group
  1587. // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
  1588. // instruction. If there is a degeneracy, pick the one that occurs
  1589. // first (in the final value).
  1590. void collectValueRotInfo() {
  1591. ValueRots.clear();
  1592. for (auto &BG : BitGroups) {
  1593. unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
  1594. ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
  1595. VRI.V = BG.V;
  1596. VRI.RLAmt = BG.RLAmt;
  1597. VRI.Repl32 = BG.Repl32;
  1598. VRI.NumGroups += 1;
  1599. VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
  1600. }
  1601. // Now that we've collected the various ValueRotInfo instances, we need to
  1602. // sort them.
  1603. ValueRotsVec.clear();
  1604. for (auto &I : ValueRots) {
  1605. ValueRotsVec.push_back(I.second);
  1606. }
  1607. llvm::sort(ValueRotsVec);
  1608. }
  1609. // In 64-bit mode, rlwinm and friends have a rotation operator that
  1610. // replicates the low-order 32 bits into the high-order 32-bits. The mask
  1611. // indices of these instructions can only be in the lower 32 bits, so they
  1612. // can only represent some 64-bit bit groups. However, when they can be used,
  1613. // the 32-bit replication can be used to represent, as a single bit group,
  1614. // otherwise separate bit groups. We'll convert to replicated-32-bit bit
  1615. // groups when possible. Returns true if any of the bit groups were
  1616. // converted.
  1617. void assignRepl32BitGroups() {
  1618. // If we have bits like this:
  1619. //
  1620. // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
  1621. // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
  1622. // Groups: | RLAmt = 8 | RLAmt = 40 |
  1623. //
  1624. // But, making use of a 32-bit operation that replicates the low-order 32
  1625. // bits into the high-order 32 bits, this can be one bit group with a RLAmt
  1626. // of 8.
  1627. auto IsAllLow32 = [this](BitGroup & BG) {
  1628. if (BG.StartIdx <= BG.EndIdx) {
  1629. for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
  1630. if (!Bits[i].hasValue())
  1631. continue;
  1632. if (Bits[i].getValueBitIndex() >= 32)
  1633. return false;
  1634. }
  1635. } else {
  1636. for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
  1637. if (!Bits[i].hasValue())
  1638. continue;
  1639. if (Bits[i].getValueBitIndex() >= 32)
  1640. return false;
  1641. }
  1642. for (unsigned i = 0; i <= BG.EndIdx; ++i) {
  1643. if (!Bits[i].hasValue())
  1644. continue;
  1645. if (Bits[i].getValueBitIndex() >= 32)
  1646. return false;
  1647. }
  1648. }
  1649. return true;
  1650. };
  1651. for (auto &BG : BitGroups) {
  1652. // If this bit group has RLAmt of 0 and will not be merged with
  1653. // another bit group, we don't benefit from Repl32. We don't mark
  1654. // such group to give more freedom for later instruction selection.
  1655. if (BG.RLAmt == 0) {
  1656. auto PotentiallyMerged = [this](BitGroup & BG) {
  1657. for (auto &BG2 : BitGroups)
  1658. if (&BG != &BG2 && BG.V == BG2.V &&
  1659. (BG2.RLAmt == 0 || BG2.RLAmt == 32))
  1660. return true;
  1661. return false;
  1662. };
  1663. if (!PotentiallyMerged(BG))
  1664. continue;
  1665. }
  1666. if (BG.StartIdx < 32 && BG.EndIdx < 32) {
  1667. if (IsAllLow32(BG)) {
  1668. if (BG.RLAmt >= 32) {
  1669. BG.RLAmt -= 32;
  1670. BG.Repl32CR = true;
  1671. }
  1672. BG.Repl32 = true;
  1673. LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
  1674. << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
  1675. << BG.StartIdx << ", " << BG.EndIdx << "]\n");
  1676. }
  1677. }
  1678. }
  1679. // Now walk through the bit groups, consolidating where possible.
  1680. for (auto I = BitGroups.begin(); I != BitGroups.end();) {
  1681. // We might want to remove this bit group by merging it with the previous
  1682. // group (which might be the ending group).
  1683. auto IP = (I == BitGroups.begin()) ?
  1684. std::prev(BitGroups.end()) : std::prev(I);
  1685. if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
  1686. I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
  1687. LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
  1688. << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
  1689. << I->StartIdx << ", " << I->EndIdx
  1690. << "] with group with range [" << IP->StartIdx << ", "
  1691. << IP->EndIdx << "]\n");
  1692. IP->EndIdx = I->EndIdx;
  1693. IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
  1694. IP->Repl32Coalesced = true;
  1695. I = BitGroups.erase(I);
  1696. continue;
  1697. } else {
  1698. // There is a special case worth handling: If there is a single group
  1699. // covering the entire upper 32 bits, and it can be merged with both
  1700. // the next and previous groups (which might be the same group), then
  1701. // do so. If it is the same group (so there will be only one group in
  1702. // total), then we need to reverse the order of the range so that it
  1703. // covers the entire 64 bits.
  1704. if (I->StartIdx == 32 && I->EndIdx == 63) {
  1705. assert(std::next(I) == BitGroups.end() &&
  1706. "bit group ends at index 63 but there is another?");
  1707. auto IN = BitGroups.begin();
  1708. if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
  1709. (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
  1710. IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
  1711. IsAllLow32(*I)) {
  1712. LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
  1713. << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
  1714. << ", " << I->EndIdx
  1715. << "] with 32-bit replicated groups with ranges ["
  1716. << IP->StartIdx << ", " << IP->EndIdx << "] and ["
  1717. << IN->StartIdx << ", " << IN->EndIdx << "]\n");
  1718. if (IP == IN) {
  1719. // There is only one other group; change it to cover the whole
  1720. // range (backward, so that it can still be Repl32 but cover the
  1721. // whole 64-bit range).
  1722. IP->StartIdx = 31;
  1723. IP->EndIdx = 30;
  1724. IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
  1725. IP->Repl32Coalesced = true;
  1726. I = BitGroups.erase(I);
  1727. } else {
  1728. // There are two separate groups, one before this group and one
  1729. // after us (at the beginning). We're going to remove this group,
  1730. // but also the group at the very beginning.
  1731. IP->EndIdx = IN->EndIdx;
  1732. IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
  1733. IP->Repl32Coalesced = true;
  1734. I = BitGroups.erase(I);
  1735. BitGroups.erase(BitGroups.begin());
  1736. }
  1737. // This must be the last group in the vector (and we might have
  1738. // just invalidated the iterator above), so break here.
  1739. break;
  1740. }
  1741. }
  1742. }
  1743. ++I;
  1744. }
  1745. }
  1746. SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  1747. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  1748. }
  1749. uint64_t getZerosMask() {
  1750. uint64_t Mask = 0;
  1751. for (unsigned i = 0; i < Bits.size(); ++i) {
  1752. if (Bits[i].hasValue())
  1753. continue;
  1754. Mask |= (UINT64_C(1) << i);
  1755. }
  1756. return ~Mask;
  1757. }
  1758. // This method extends an input value to 64 bit if input is 32-bit integer.
  1759. // While selecting instructions in BitPermutationSelector in 64-bit mode,
  1760. // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
  1761. // In such case, we extend it to 64 bit to be consistent with other values.
  1762. SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
  1763. if (V.getValueSizeInBits() == 64)
  1764. return V;
  1765. assert(V.getValueSizeInBits() == 32);
  1766. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  1767. SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
  1768. MVT::i64), 0);
  1769. SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
  1770. MVT::i64, ImDef, V,
  1771. SubRegIdx), 0);
  1772. return ExtVal;
  1773. }
  1774. SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
  1775. if (V.getValueSizeInBits() == 32)
  1776. return V;
  1777. assert(V.getValueSizeInBits() == 64);
  1778. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  1779. SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
  1780. MVT::i32, V, SubRegIdx), 0);
  1781. return SubVal;
  1782. }
  1783. // Depending on the number of groups for a particular value, it might be
  1784. // better to rotate, mask explicitly (using andi/andis), and then or the
  1785. // result. Select this part of the result first.
  1786. void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
  1787. if (BPermRewriterNoMasking)
  1788. return;
  1789. for (ValueRotInfo &VRI : ValueRotsVec) {
  1790. unsigned Mask = 0;
  1791. for (unsigned i = 0; i < Bits.size(); ++i) {
  1792. if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
  1793. continue;
  1794. if (RLAmt[i] != VRI.RLAmt)
  1795. continue;
  1796. Mask |= (1u << i);
  1797. }
  1798. // Compute the masks for andi/andis that would be necessary.
  1799. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
  1800. assert((ANDIMask != 0 || ANDISMask != 0) &&
  1801. "No set bits in mask for value bit groups");
  1802. bool NeedsRotate = VRI.RLAmt != 0;
  1803. // We're trying to minimize the number of instructions. If we have one
  1804. // group, using one of andi/andis can break even. If we have three
  1805. // groups, we can use both andi and andis and break even (to use both
  1806. // andi and andis we also need to or the results together). We need four
  1807. // groups if we also need to rotate. To use andi/andis we need to do more
  1808. // than break even because rotate-and-mask instructions tend to be easier
  1809. // to schedule.
  1810. // FIXME: We've biased here against using andi/andis, which is right for
  1811. // POWER cores, but not optimal everywhere. For example, on the A2,
  1812. // andi/andis have single-cycle latency whereas the rotate-and-mask
  1813. // instructions take two cycles, and it would be better to bias toward
  1814. // andi/andis in break-even cases.
  1815. unsigned NumAndInsts = (unsigned) NeedsRotate +
  1816. (unsigned) (ANDIMask != 0) +
  1817. (unsigned) (ANDISMask != 0) +
  1818. (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
  1819. (unsigned) (bool) Res;
  1820. LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
  1821. << " RL: " << VRI.RLAmt << ":"
  1822. << "\n\t\t\tisel using masking: " << NumAndInsts
  1823. << " using rotates: " << VRI.NumGroups << "\n");
  1824. if (NumAndInsts >= VRI.NumGroups)
  1825. continue;
  1826. LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
  1827. if (InstCnt) *InstCnt += NumAndInsts;
  1828. SDValue VRot;
  1829. if (VRI.RLAmt) {
  1830. SDValue Ops[] =
  1831. { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
  1832. getI32Imm(0, dl), getI32Imm(31, dl) };
  1833. VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  1834. Ops), 0);
  1835. } else {
  1836. VRot = TruncateToInt32(VRI.V, dl);
  1837. }
  1838. SDValue ANDIVal, ANDISVal;
  1839. if (ANDIMask != 0)
  1840. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
  1841. VRot, getI32Imm(ANDIMask, dl)),
  1842. 0);
  1843. if (ANDISMask != 0)
  1844. ANDISVal =
  1845. SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
  1846. getI32Imm(ANDISMask, dl)),
  1847. 0);
  1848. SDValue TotalVal;
  1849. if (!ANDIVal)
  1850. TotalVal = ANDISVal;
  1851. else if (!ANDISVal)
  1852. TotalVal = ANDIVal;
  1853. else
  1854. TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1855. ANDIVal, ANDISVal), 0);
  1856. if (!Res)
  1857. Res = TotalVal;
  1858. else
  1859. Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1860. Res, TotalVal), 0);
  1861. // Now, remove all groups with this underlying value and rotation
  1862. // factor.
  1863. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  1864. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
  1865. });
  1866. }
  1867. }
  1868. // Instruction selection for the 32-bit case.
  1869. SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
  1870. SDLoc dl(N);
  1871. SDValue Res;
  1872. if (InstCnt) *InstCnt = 0;
  1873. // Take care of cases that should use andi/andis first.
  1874. SelectAndParts32(dl, Res, InstCnt);
  1875. // If we've not yet selected a 'starting' instruction, and we have no zeros
  1876. // to fill in, select the (Value, RLAmt) with the highest priority (largest
  1877. // number of groups), and start with this rotated value.
  1878. if ((!NeedMask || LateMask) && !Res) {
  1879. ValueRotInfo &VRI = ValueRotsVec[0];
  1880. if (VRI.RLAmt) {
  1881. if (InstCnt) *InstCnt += 1;
  1882. SDValue Ops[] =
  1883. { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
  1884. getI32Imm(0, dl), getI32Imm(31, dl) };
  1885. Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
  1886. 0);
  1887. } else {
  1888. Res = TruncateToInt32(VRI.V, dl);
  1889. }
  1890. // Now, remove all groups with this underlying value and rotation factor.
  1891. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  1892. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
  1893. });
  1894. }
  1895. if (InstCnt) *InstCnt += BitGroups.size();
  1896. // Insert the other groups (one at a time).
  1897. for (auto &BG : BitGroups) {
  1898. if (!Res) {
  1899. SDValue Ops[] =
  1900. { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
  1901. getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
  1902. getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
  1903. Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  1904. } else {
  1905. SDValue Ops[] =
  1906. { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
  1907. getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
  1908. getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
  1909. Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
  1910. }
  1911. }
  1912. if (LateMask) {
  1913. unsigned Mask = (unsigned) getZerosMask();
  1914. unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
  1915. assert((ANDIMask != 0 || ANDISMask != 0) &&
  1916. "No set bits in zeros mask?");
  1917. if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
  1918. (unsigned) (ANDISMask != 0) +
  1919. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  1920. SDValue ANDIVal, ANDISVal;
  1921. if (ANDIMask != 0)
  1922. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
  1923. Res, getI32Imm(ANDIMask, dl)),
  1924. 0);
  1925. if (ANDISMask != 0)
  1926. ANDISVal =
  1927. SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
  1928. getI32Imm(ANDISMask, dl)),
  1929. 0);
  1930. if (!ANDIVal)
  1931. Res = ANDISVal;
  1932. else if (!ANDISVal)
  1933. Res = ANDIVal;
  1934. else
  1935. Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
  1936. ANDIVal, ANDISVal), 0);
  1937. }
  1938. return Res.getNode();
  1939. }
  1940. unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
  1941. unsigned MaskStart, unsigned MaskEnd,
  1942. bool IsIns) {
  1943. // In the notation used by the instructions, 'start' and 'end' are reversed
  1944. // because bits are counted from high to low order.
  1945. unsigned InstMaskStart = 64 - MaskEnd - 1,
  1946. InstMaskEnd = 64 - MaskStart - 1;
  1947. if (Repl32)
  1948. return 1;
  1949. if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
  1950. InstMaskEnd == 63 - RLAmt)
  1951. return 1;
  1952. return 2;
  1953. }
  1954. // For 64-bit values, not all combinations of rotates and masks are
  1955. // available. Produce one if it is available.
  1956. SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
  1957. bool Repl32, unsigned MaskStart, unsigned MaskEnd,
  1958. unsigned *InstCnt = nullptr) {
  1959. // In the notation used by the instructions, 'start' and 'end' are reversed
  1960. // because bits are counted from high to low order.
  1961. unsigned InstMaskStart = 64 - MaskEnd - 1,
  1962. InstMaskEnd = 64 - MaskStart - 1;
  1963. if (InstCnt) *InstCnt += 1;
  1964. if (Repl32) {
  1965. // This rotation amount assumes that the lower 32 bits of the quantity
  1966. // are replicated in the high 32 bits by the rotation operator (which is
  1967. // done by rlwinm and friends).
  1968. assert(InstMaskStart >= 32 && "Mask cannot start out of range");
  1969. assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
  1970. SDValue Ops[] =
  1971. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1972. getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
  1973. return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
  1974. Ops), 0);
  1975. }
  1976. if (InstMaskEnd == 63) {
  1977. SDValue Ops[] =
  1978. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1979. getI32Imm(InstMaskStart, dl) };
  1980. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
  1981. }
  1982. if (InstMaskStart == 0) {
  1983. SDValue Ops[] =
  1984. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1985. getI32Imm(InstMaskEnd, dl) };
  1986. return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
  1987. }
  1988. if (InstMaskEnd == 63 - RLAmt) {
  1989. SDValue Ops[] =
  1990. { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  1991. getI32Imm(InstMaskStart, dl) };
  1992. return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
  1993. }
  1994. // We cannot do this with a single instruction, so we'll use two. The
  1995. // problem is that we're not free to choose both a rotation amount and mask
  1996. // start and end independently. We can choose an arbitrary mask start and
  1997. // end, but then the rotation amount is fixed. Rotation, however, can be
  1998. // inverted, and so by applying an "inverse" rotation first, we can get the
  1999. // desired result.
  2000. if (InstCnt) *InstCnt += 1;
  2001. // The rotation mask for the second instruction must be MaskStart.
  2002. unsigned RLAmt2 = MaskStart;
  2003. // The first instruction must rotate V so that the overall rotation amount
  2004. // is RLAmt.
  2005. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
  2006. if (RLAmt1)
  2007. V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
  2008. return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
  2009. }
  2010. // For 64-bit values, not all combinations of rotates and masks are
  2011. // available. Produce a rotate-mask-and-insert if one is available.
  2012. SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
  2013. unsigned RLAmt, bool Repl32, unsigned MaskStart,
  2014. unsigned MaskEnd, unsigned *InstCnt = nullptr) {
  2015. // In the notation used by the instructions, 'start' and 'end' are reversed
  2016. // because bits are counted from high to low order.
  2017. unsigned InstMaskStart = 64 - MaskEnd - 1,
  2018. InstMaskEnd = 64 - MaskStart - 1;
  2019. if (InstCnt) *InstCnt += 1;
  2020. if (Repl32) {
  2021. // This rotation amount assumes that the lower 32 bits of the quantity
  2022. // are replicated in the high 32 bits by the rotation operator (which is
  2023. // done by rlwinm and friends).
  2024. assert(InstMaskStart >= 32 && "Mask cannot start out of range");
  2025. assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
  2026. SDValue Ops[] =
  2027. { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  2028. getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
  2029. return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
  2030. Ops), 0);
  2031. }
  2032. if (InstMaskEnd == 63 - RLAmt) {
  2033. SDValue Ops[] =
  2034. { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
  2035. getI32Imm(InstMaskStart, dl) };
  2036. return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
  2037. }
  2038. // We cannot do this with a single instruction, so we'll use two. The
  2039. // problem is that we're not free to choose both a rotation amount and mask
  2040. // start and end independently. We can choose an arbitrary mask start and
  2041. // end, but then the rotation amount is fixed. Rotation, however, can be
  2042. // inverted, and so by applying an "inverse" rotation first, we can get the
  2043. // desired result.
  2044. if (InstCnt) *InstCnt += 1;
  2045. // The rotation mask for the second instruction must be MaskStart.
  2046. unsigned RLAmt2 = MaskStart;
  2047. // The first instruction must rotate V so that the overall rotation amount
  2048. // is RLAmt.
  2049. unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
  2050. if (RLAmt1)
  2051. V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
  2052. return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
  2053. }
  2054. void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
  2055. if (BPermRewriterNoMasking)
  2056. return;
  2057. // The idea here is the same as in the 32-bit version, but with additional
  2058. // complications from the fact that Repl32 might be true. Because we
  2059. // aggressively convert bit groups to Repl32 form (which, for small
  2060. // rotation factors, involves no other change), and then coalesce, it might
  2061. // be the case that a single 64-bit masking operation could handle both
  2062. // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
  2063. // form allowed coalescing, then we must use a 32-bit rotaton in order to
  2064. // completely capture the new combined bit group.
  2065. for (ValueRotInfo &VRI : ValueRotsVec) {
  2066. uint64_t Mask = 0;
  2067. // We need to add to the mask all bits from the associated bit groups.
  2068. // If Repl32 is false, we need to add bits from bit groups that have
  2069. // Repl32 true, but are trivially convertable to Repl32 false. Such a
  2070. // group is trivially convertable if it overlaps only with the lower 32
  2071. // bits, and the group has not been coalesced.
  2072. auto MatchingBG = [VRI](const BitGroup &BG) {
  2073. if (VRI.V != BG.V)
  2074. return false;
  2075. unsigned EffRLAmt = BG.RLAmt;
  2076. if (!VRI.Repl32 && BG.Repl32) {
  2077. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
  2078. !BG.Repl32Coalesced) {
  2079. if (BG.Repl32CR)
  2080. EffRLAmt += 32;
  2081. } else {
  2082. return false;
  2083. }
  2084. } else if (VRI.Repl32 != BG.Repl32) {
  2085. return false;
  2086. }
  2087. return VRI.RLAmt == EffRLAmt;
  2088. };
  2089. for (auto &BG : BitGroups) {
  2090. if (!MatchingBG(BG))
  2091. continue;
  2092. if (BG.StartIdx <= BG.EndIdx) {
  2093. for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
  2094. Mask |= (UINT64_C(1) << i);
  2095. } else {
  2096. for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
  2097. Mask |= (UINT64_C(1) << i);
  2098. for (unsigned i = 0; i <= BG.EndIdx; ++i)
  2099. Mask |= (UINT64_C(1) << i);
  2100. }
  2101. }
  2102. // We can use the 32-bit andi/andis technique if the mask does not
  2103. // require any higher-order bits. This can save an instruction compared
  2104. // to always using the general 64-bit technique.
  2105. bool Use32BitInsts = isUInt<32>(Mask);
  2106. // Compute the masks for andi/andis that would be necessary.
  2107. unsigned ANDIMask = (Mask & UINT16_MAX),
  2108. ANDISMask = (Mask >> 16) & UINT16_MAX;
  2109. bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
  2110. unsigned NumAndInsts = (unsigned) NeedsRotate +
  2111. (unsigned) (bool) Res;
  2112. unsigned NumOfSelectInsts = 0;
  2113. selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
  2114. assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
  2115. if (Use32BitInsts)
  2116. NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
  2117. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  2118. else
  2119. NumAndInsts += NumOfSelectInsts + /* and */ 1;
  2120. unsigned NumRLInsts = 0;
  2121. bool FirstBG = true;
  2122. bool MoreBG = false;
  2123. for (auto &BG : BitGroups) {
  2124. if (!MatchingBG(BG)) {
  2125. MoreBG = true;
  2126. continue;
  2127. }
  2128. NumRLInsts +=
  2129. SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
  2130. !FirstBG);
  2131. FirstBG = false;
  2132. }
  2133. LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
  2134. << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
  2135. << "\n\t\t\tisel using masking: " << NumAndInsts
  2136. << " using rotates: " << NumRLInsts << "\n");
  2137. // When we'd use andi/andis, we bias toward using the rotates (andi only
  2138. // has a record form, and is cracked on POWER cores). However, when using
  2139. // general 64-bit constant formation, bias toward the constant form,
  2140. // because that exposes more opportunities for CSE.
  2141. if (NumAndInsts > NumRLInsts)
  2142. continue;
  2143. // When merging multiple bit groups, instruction or is used.
  2144. // But when rotate is used, rldimi can inert the rotated value into any
  2145. // register, so instruction or can be avoided.
  2146. if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
  2147. continue;
  2148. LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
  2149. if (InstCnt) *InstCnt += NumAndInsts;
  2150. SDValue VRot;
  2151. // We actually need to generate a rotation if we have a non-zero rotation
  2152. // factor or, in the Repl32 case, if we care about any of the
  2153. // higher-order replicated bits. In the latter case, we generate a mask
  2154. // backward so that it actually includes the entire 64 bits.
  2155. if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
  2156. VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
  2157. VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
  2158. else
  2159. VRot = VRI.V;
  2160. SDValue TotalVal;
  2161. if (Use32BitInsts) {
  2162. assert((ANDIMask != 0 || ANDISMask != 0) &&
  2163. "No set bits in mask when using 32-bit ands for 64-bit value");
  2164. SDValue ANDIVal, ANDISVal;
  2165. if (ANDIMask != 0)
  2166. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
  2167. ExtendToInt64(VRot, dl),
  2168. getI32Imm(ANDIMask, dl)),
  2169. 0);
  2170. if (ANDISMask != 0)
  2171. ANDISVal =
  2172. SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
  2173. ExtendToInt64(VRot, dl),
  2174. getI32Imm(ANDISMask, dl)),
  2175. 0);
  2176. if (!ANDIVal)
  2177. TotalVal = ANDISVal;
  2178. else if (!ANDISVal)
  2179. TotalVal = ANDIVal;
  2180. else
  2181. TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2182. ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
  2183. } else {
  2184. TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
  2185. TotalVal =
  2186. SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
  2187. ExtendToInt64(VRot, dl), TotalVal),
  2188. 0);
  2189. }
  2190. if (!Res)
  2191. Res = TotalVal;
  2192. else
  2193. Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2194. ExtendToInt64(Res, dl), TotalVal),
  2195. 0);
  2196. // Now, remove all groups with this underlying value and rotation
  2197. // factor.
  2198. eraseMatchingBitGroups(MatchingBG);
  2199. }
  2200. }
  2201. // Instruction selection for the 64-bit case.
  2202. SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
  2203. SDLoc dl(N);
  2204. SDValue Res;
  2205. if (InstCnt) *InstCnt = 0;
  2206. // Take care of cases that should use andi/andis first.
  2207. SelectAndParts64(dl, Res, InstCnt);
  2208. // If we've not yet selected a 'starting' instruction, and we have no zeros
  2209. // to fill in, select the (Value, RLAmt) with the highest priority (largest
  2210. // number of groups), and start with this rotated value.
  2211. if ((!NeedMask || LateMask) && !Res) {
  2212. // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
  2213. // groups will come first, and so the VRI representing the largest number
  2214. // of groups might not be first (it might be the first Repl32 groups).
  2215. unsigned MaxGroupsIdx = 0;
  2216. if (!ValueRotsVec[0].Repl32) {
  2217. for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
  2218. if (ValueRotsVec[i].Repl32) {
  2219. if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
  2220. MaxGroupsIdx = i;
  2221. break;
  2222. }
  2223. }
  2224. ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
  2225. bool NeedsRotate = false;
  2226. if (VRI.RLAmt) {
  2227. NeedsRotate = true;
  2228. } else if (VRI.Repl32) {
  2229. for (auto &BG : BitGroups) {
  2230. if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
  2231. BG.Repl32 != VRI.Repl32)
  2232. continue;
  2233. // We don't need a rotate if the bit group is confined to the lower
  2234. // 32 bits.
  2235. if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
  2236. continue;
  2237. NeedsRotate = true;
  2238. break;
  2239. }
  2240. }
  2241. if (NeedsRotate)
  2242. Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
  2243. VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
  2244. InstCnt);
  2245. else
  2246. Res = VRI.V;
  2247. // Now, remove all groups with this underlying value and rotation factor.
  2248. if (Res)
  2249. eraseMatchingBitGroups([VRI](const BitGroup &BG) {
  2250. return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
  2251. BG.Repl32 == VRI.Repl32;
  2252. });
  2253. }
  2254. // Because 64-bit rotates are more flexible than inserts, we might have a
  2255. // preference regarding which one we do first (to save one instruction).
  2256. if (!Res)
  2257. for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
  2258. if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
  2259. false) <
  2260. SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
  2261. true)) {
  2262. if (I != BitGroups.begin()) {
  2263. BitGroup BG = *I;
  2264. BitGroups.erase(I);
  2265. BitGroups.insert(BitGroups.begin(), BG);
  2266. }
  2267. break;
  2268. }
  2269. }
  2270. // Insert the other groups (one at a time).
  2271. for (auto &BG : BitGroups) {
  2272. if (!Res)
  2273. Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
  2274. BG.EndIdx, InstCnt);
  2275. else
  2276. Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
  2277. BG.StartIdx, BG.EndIdx, InstCnt);
  2278. }
  2279. if (LateMask) {
  2280. uint64_t Mask = getZerosMask();
  2281. // We can use the 32-bit andi/andis technique if the mask does not
  2282. // require any higher-order bits. This can save an instruction compared
  2283. // to always using the general 64-bit technique.
  2284. bool Use32BitInsts = isUInt<32>(Mask);
  2285. // Compute the masks for andi/andis that would be necessary.
  2286. unsigned ANDIMask = (Mask & UINT16_MAX),
  2287. ANDISMask = (Mask >> 16) & UINT16_MAX;
  2288. if (Use32BitInsts) {
  2289. assert((ANDIMask != 0 || ANDISMask != 0) &&
  2290. "No set bits in mask when using 32-bit ands for 64-bit value");
  2291. if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
  2292. (unsigned) (ANDISMask != 0) +
  2293. (unsigned) (ANDIMask != 0 && ANDISMask != 0);
  2294. SDValue ANDIVal, ANDISVal;
  2295. if (ANDIMask != 0)
  2296. ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
  2297. ExtendToInt64(Res, dl),
  2298. getI32Imm(ANDIMask, dl)),
  2299. 0);
  2300. if (ANDISMask != 0)
  2301. ANDISVal =
  2302. SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
  2303. ExtendToInt64(Res, dl),
  2304. getI32Imm(ANDISMask, dl)),
  2305. 0);
  2306. if (!ANDIVal)
  2307. Res = ANDISVal;
  2308. else if (!ANDISVal)
  2309. Res = ANDIVal;
  2310. else
  2311. Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2312. ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
  2313. } else {
  2314. unsigned NumOfSelectInsts = 0;
  2315. SDValue MaskVal =
  2316. SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
  2317. Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
  2318. ExtendToInt64(Res, dl), MaskVal),
  2319. 0);
  2320. if (InstCnt)
  2321. *InstCnt += NumOfSelectInsts + /* and */ 1;
  2322. }
  2323. }
  2324. return Res.getNode();
  2325. }
  2326. SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
  2327. // Fill in BitGroups.
  2328. collectBitGroups(LateMask);
  2329. if (BitGroups.empty())
  2330. return nullptr;
  2331. // For 64-bit values, figure out when we can use 32-bit instructions.
  2332. if (Bits.size() == 64)
  2333. assignRepl32BitGroups();
  2334. // Fill in ValueRotsVec.
  2335. collectValueRotInfo();
  2336. if (Bits.size() == 32) {
  2337. return Select32(N, LateMask, InstCnt);
  2338. } else {
  2339. assert(Bits.size() == 64 && "Not 64 bits here?");
  2340. return Select64(N, LateMask, InstCnt);
  2341. }
  2342. return nullptr;
  2343. }
  2344. void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
  2345. erase_if(BitGroups, F);
  2346. }
  2347. SmallVector<ValueBit, 64> Bits;
  2348. bool NeedMask = false;
  2349. SmallVector<unsigned, 64> RLAmt;
  2350. SmallVector<BitGroup, 16> BitGroups;
  2351. DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
  2352. SmallVector<ValueRotInfo, 16> ValueRotsVec;
  2353. SelectionDAG *CurDAG = nullptr;
  2354. public:
  2355. BitPermutationSelector(SelectionDAG *DAG)
  2356. : CurDAG(DAG) {}
  2357. // Here we try to match complex bit permutations into a set of
  2358. // rotate-and-shift/shift/and/or instructions, using a set of heuristics
  2359. // known to produce optimal code for common cases (like i32 byte swapping).
  2360. SDNode *Select(SDNode *N) {
  2361. Memoizer.clear();
  2362. auto Result =
  2363. getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
  2364. if (!Result.first)
  2365. return nullptr;
  2366. Bits = std::move(*Result.second);
  2367. LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
  2368. " selection for: ");
  2369. LLVM_DEBUG(N->dump(CurDAG));
  2370. // Fill it RLAmt and set NeedMask.
  2371. computeRotationAmounts();
  2372. if (!NeedMask)
  2373. return Select(N, false);
  2374. // We currently have two techniques for handling results with zeros: early
  2375. // masking (the default) and late masking. Late masking is sometimes more
  2376. // efficient, but because the structure of the bit groups is different, it
  2377. // is hard to tell without generating both and comparing the results. With
  2378. // late masking, we ignore zeros in the resulting value when inserting each
  2379. // set of bit groups, and then mask in the zeros at the end. With early
  2380. // masking, we only insert the non-zero parts of the result at every step.
  2381. unsigned InstCnt = 0, InstCntLateMask = 0;
  2382. LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
  2383. SDNode *RN = Select(N, false, &InstCnt);
  2384. LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
  2385. LLVM_DEBUG(dbgs() << "\tLate masking:\n");
  2386. SDNode *RNLM = Select(N, true, &InstCntLateMask);
  2387. LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
  2388. << " instructions\n");
  2389. if (InstCnt <= InstCntLateMask) {
  2390. LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
  2391. return RN;
  2392. }
  2393. LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
  2394. return RNLM;
  2395. }
  2396. };
  2397. class IntegerCompareEliminator {
  2398. SelectionDAG *CurDAG;
  2399. PPCDAGToDAGISel *S;
  2400. // Conversion type for interpreting results of a 32-bit instruction as
  2401. // a 64-bit value or vice versa.
  2402. enum ExtOrTruncConversion { Ext, Trunc };
  2403. // Modifiers to guide how an ISD::SETCC node's result is to be computed
  2404. // in a GPR.
  2405. // ZExtOrig - use the original condition code, zero-extend value
  2406. // ZExtInvert - invert the condition code, zero-extend value
  2407. // SExtOrig - use the original condition code, sign-extend value
  2408. // SExtInvert - invert the condition code, sign-extend value
  2409. enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
  2410. // Comparisons against zero to emit GPR code sequences for. Each of these
  2411. // sequences may need to be emitted for two or more equivalent patterns.
  2412. // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
  2413. // matters as well as the extension type: sext (-1/0), zext (1/0).
  2414. // GEZExt - (zext (LHS >= 0))
  2415. // GESExt - (sext (LHS >= 0))
  2416. // LEZExt - (zext (LHS <= 0))
  2417. // LESExt - (sext (LHS <= 0))
  2418. enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
  2419. SDNode *tryEXTEND(SDNode *N);
  2420. SDNode *tryLogicOpOfCompares(SDNode *N);
  2421. SDValue computeLogicOpInGPR(SDValue LogicOp);
  2422. SDValue signExtendInputIfNeeded(SDValue Input);
  2423. SDValue zeroExtendInputIfNeeded(SDValue Input);
  2424. SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
  2425. SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
  2426. ZeroCompare CmpTy);
  2427. SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2428. int64_t RHSValue, SDLoc dl);
  2429. SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2430. int64_t RHSValue, SDLoc dl);
  2431. SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2432. int64_t RHSValue, SDLoc dl);
  2433. SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  2434. int64_t RHSValue, SDLoc dl);
  2435. SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
  2436. public:
  2437. IntegerCompareEliminator(SelectionDAG *DAG,
  2438. PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
  2439. assert(CurDAG->getTargetLoweringInfo()
  2440. .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
  2441. "Only expecting to use this on 64 bit targets.");
  2442. }
  2443. SDNode *Select(SDNode *N) {
  2444. if (CmpInGPR == ICGPR_None)
  2445. return nullptr;
  2446. switch (N->getOpcode()) {
  2447. default: break;
  2448. case ISD::ZERO_EXTEND:
  2449. if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
  2450. CmpInGPR == ICGPR_SextI64)
  2451. return nullptr;
  2452. LLVM_FALLTHROUGH;
  2453. case ISD::SIGN_EXTEND:
  2454. if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
  2455. CmpInGPR == ICGPR_ZextI64)
  2456. return nullptr;
  2457. return tryEXTEND(N);
  2458. case ISD::AND:
  2459. case ISD::OR:
  2460. case ISD::XOR:
  2461. return tryLogicOpOfCompares(N);
  2462. }
  2463. return nullptr;
  2464. }
  2465. };
  2466. static bool isLogicOp(unsigned Opc) {
  2467. return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
  2468. }
  2469. // The obvious case for wanting to keep the value in a GPR. Namely, the
  2470. // result of the comparison is actually needed in a GPR.
  2471. SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
  2472. assert((N->getOpcode() == ISD::ZERO_EXTEND ||
  2473. N->getOpcode() == ISD::SIGN_EXTEND) &&
  2474. "Expecting a zero/sign extend node!");
  2475. SDValue WideRes;
  2476. // If we are zero-extending the result of a logical operation on i1
  2477. // values, we can keep the values in GPRs.
  2478. if (isLogicOp(N->getOperand(0).getOpcode()) &&
  2479. N->getOperand(0).getValueType() == MVT::i1 &&
  2480. N->getOpcode() == ISD::ZERO_EXTEND)
  2481. WideRes = computeLogicOpInGPR(N->getOperand(0));
  2482. else if (N->getOperand(0).getOpcode() != ISD::SETCC)
  2483. return nullptr;
  2484. else
  2485. WideRes =
  2486. getSETCCInGPR(N->getOperand(0),
  2487. N->getOpcode() == ISD::SIGN_EXTEND ?
  2488. SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
  2489. if (!WideRes)
  2490. return nullptr;
  2491. SDLoc dl(N);
  2492. bool Input32Bit = WideRes.getValueType() == MVT::i32;
  2493. bool Output32Bit = N->getValueType(0) == MVT::i32;
  2494. NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
  2495. NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
  2496. SDValue ConvOp = WideRes;
  2497. if (Input32Bit != Output32Bit)
  2498. ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
  2499. ExtOrTruncConversion::Trunc);
  2500. return ConvOp.getNode();
  2501. }
  2502. // Attempt to perform logical operations on the results of comparisons while
  2503. // keeping the values in GPRs. Without doing so, these would end up being
  2504. // lowered to CR-logical operations which suffer from significant latency and
  2505. // low ILP.
  2506. SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
  2507. if (N->getValueType(0) != MVT::i1)
  2508. return nullptr;
  2509. assert(isLogicOp(N->getOpcode()) &&
  2510. "Expected a logic operation on setcc results.");
  2511. SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
  2512. if (!LoweredLogical)
  2513. return nullptr;
  2514. SDLoc dl(N);
  2515. bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
  2516. unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
  2517. SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
  2518. SDValue LHS = LoweredLogical.getOperand(0);
  2519. SDValue RHS = LoweredLogical.getOperand(1);
  2520. SDValue WideOp;
  2521. SDValue OpToConvToRecForm;
  2522. // Look through any 32-bit to 64-bit implicit extend nodes to find the
  2523. // opcode that is input to the XORI.
  2524. if (IsBitwiseNegate &&
  2525. LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
  2526. OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
  2527. else if (IsBitwiseNegate)
  2528. // If the input to the XORI isn't an extension, that's what we're after.
  2529. OpToConvToRecForm = LoweredLogical.getOperand(0);
  2530. else
  2531. // If this is not an XORI, it is a reg-reg logical op and we can convert
  2532. // it to record-form.
  2533. OpToConvToRecForm = LoweredLogical;
  2534. // Get the record-form version of the node we're looking to use to get the
  2535. // CR result from.
  2536. uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
  2537. int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
  2538. // Convert the right node to record-form. This is either the logical we're
  2539. // looking at or it is the input node to the negation (if we're looking at
  2540. // a bitwise negation).
  2541. if (NewOpc != -1 && IsBitwiseNegate) {
  2542. // The input to the XORI has a record-form. Use it.
  2543. assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
  2544. "Expected a PPC::XORI8 only for bitwise negation.");
  2545. // Emit the record-form instruction.
  2546. std::vector<SDValue> Ops;
  2547. for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
  2548. Ops.push_back(OpToConvToRecForm.getOperand(i));
  2549. WideOp =
  2550. SDValue(CurDAG->getMachineNode(NewOpc, dl,
  2551. OpToConvToRecForm.getValueType(),
  2552. MVT::Glue, Ops), 0);
  2553. } else {
  2554. assert((NewOpc != -1 || !IsBitwiseNegate) &&
  2555. "No record form available for AND8/OR8/XOR8?");
  2556. WideOp =
  2557. SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
  2558. dl, MVT::i64, MVT::Glue, LHS, RHS),
  2559. 0);
  2560. }
  2561. // Select this node to a single bit from CR0 set by the record-form node
  2562. // just created. For bitwise negation, use the EQ bit which is the equivalent
  2563. // of negating the result (i.e. it is a bit set when the result of the
  2564. // operation is zero).
  2565. SDValue SRIdxVal =
  2566. CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
  2567. SDValue CRBit =
  2568. SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
  2569. MVT::i1, CR0Reg, SRIdxVal,
  2570. WideOp.getValue(1)), 0);
  2571. return CRBit.getNode();
  2572. }
  2573. // Lower a logical operation on i1 values into a GPR sequence if possible.
  2574. // The result can be kept in a GPR if requested.
  2575. // Three types of inputs can be handled:
  2576. // - SETCC
  2577. // - TRUNCATE
  2578. // - Logical operation (AND/OR/XOR)
  2579. // There is also a special case that is handled (namely a complement operation
  2580. // achieved with xor %a, -1).
  2581. SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
  2582. assert(isLogicOp(LogicOp.getOpcode()) &&
  2583. "Can only handle logic operations here.");
  2584. assert(LogicOp.getValueType() == MVT::i1 &&
  2585. "Can only handle logic operations on i1 values here.");
  2586. SDLoc dl(LogicOp);
  2587. SDValue LHS, RHS;
  2588. // Special case: xor %a, -1
  2589. bool IsBitwiseNegation = isBitwiseNot(LogicOp);
  2590. // Produces a GPR sequence for each operand of the binary logic operation.
  2591. // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
  2592. // the value in a GPR and for logic operations, it will recursively produce
  2593. // a GPR sequence for the operation.
  2594. auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
  2595. unsigned OperandOpcode = Operand.getOpcode();
  2596. if (OperandOpcode == ISD::SETCC)
  2597. return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
  2598. else if (OperandOpcode == ISD::TRUNCATE) {
  2599. SDValue InputOp = Operand.getOperand(0);
  2600. EVT InVT = InputOp.getValueType();
  2601. return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
  2602. PPC::RLDICL, dl, InVT, InputOp,
  2603. S->getI64Imm(0, dl),
  2604. S->getI64Imm(63, dl)), 0);
  2605. } else if (isLogicOp(OperandOpcode))
  2606. return computeLogicOpInGPR(Operand);
  2607. return SDValue();
  2608. };
  2609. LHS = getLogicOperand(LogicOp.getOperand(0));
  2610. RHS = getLogicOperand(LogicOp.getOperand(1));
  2611. // If a GPR sequence can't be produced for the LHS we can't proceed.
  2612. // Not producing a GPR sequence for the RHS is only a problem if this isn't
  2613. // a bitwise negation operation.
  2614. if (!LHS || (!RHS && !IsBitwiseNegation))
  2615. return SDValue();
  2616. NumLogicOpsOnComparison++;
  2617. // We will use the inputs as 64-bit values.
  2618. if (LHS.getValueType() == MVT::i32)
  2619. LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
  2620. if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
  2621. RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
  2622. unsigned NewOpc;
  2623. switch (LogicOp.getOpcode()) {
  2624. default: llvm_unreachable("Unknown logic operation.");
  2625. case ISD::AND: NewOpc = PPC::AND8; break;
  2626. case ISD::OR: NewOpc = PPC::OR8; break;
  2627. case ISD::XOR: NewOpc = PPC::XOR8; break;
  2628. }
  2629. if (IsBitwiseNegation) {
  2630. RHS = S->getI64Imm(1, dl);
  2631. NewOpc = PPC::XORI8;
  2632. }
  2633. return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
  2634. }
  2635. /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
  2636. /// Otherwise just reinterpret it as a 64-bit value.
  2637. /// Useful when emitting comparison code for 32-bit values without using
  2638. /// the compare instruction (which only considers the lower 32-bits).
  2639. SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
  2640. assert(Input.getValueType() == MVT::i32 &&
  2641. "Can only sign-extend 32-bit values here.");
  2642. unsigned Opc = Input.getOpcode();
  2643. // The value was sign extended and then truncated to 32-bits. No need to
  2644. // sign extend it again.
  2645. if (Opc == ISD::TRUNCATE &&
  2646. (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
  2647. Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
  2648. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2649. LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
  2650. // The input is a sign-extending load. All ppc sign-extending loads
  2651. // sign-extend to the full 64-bits.
  2652. if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
  2653. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2654. ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
  2655. // We don't sign-extend constants.
  2656. if (InputConst)
  2657. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2658. SDLoc dl(Input);
  2659. SignExtensionsAdded++;
  2660. return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
  2661. MVT::i64, Input), 0);
  2662. }
  2663. /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
  2664. /// Otherwise just reinterpret it as a 64-bit value.
  2665. /// Useful when emitting comparison code for 32-bit values without using
  2666. /// the compare instruction (which only considers the lower 32-bits).
  2667. SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
  2668. assert(Input.getValueType() == MVT::i32 &&
  2669. "Can only zero-extend 32-bit values here.");
  2670. unsigned Opc = Input.getOpcode();
  2671. // The only condition under which we can omit the actual extend instruction:
  2672. // - The value is a positive constant
  2673. // - The value comes from a load that isn't a sign-extending load
  2674. // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
  2675. bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
  2676. (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
  2677. Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
  2678. if (IsTruncateOfZExt)
  2679. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2680. ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
  2681. if (InputConst && InputConst->getSExtValue() >= 0)
  2682. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2683. LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
  2684. // The input is a load that doesn't sign-extend (it will be zero-extended).
  2685. if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
  2686. return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
  2687. // None of the above, need to zero-extend.
  2688. SDLoc dl(Input);
  2689. ZeroExtensionsAdded++;
  2690. return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
  2691. S->getI64Imm(0, dl),
  2692. S->getI64Imm(32, dl)), 0);
  2693. }
  2694. // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
  2695. // course not actual zero/sign extensions that will generate machine code,
  2696. // they're just a way to reinterpret a 32 bit value in a register as a
  2697. // 64 bit value and vice-versa.
  2698. SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
  2699. ExtOrTruncConversion Conv) {
  2700. SDLoc dl(NatWidthRes);
  2701. // For reinterpreting 32-bit values as 64 bit values, we generate
  2702. // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
  2703. if (Conv == ExtOrTruncConversion::Ext) {
  2704. SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
  2705. SDValue SubRegIdx =
  2706. CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  2707. return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
  2708. ImDef, NatWidthRes, SubRegIdx), 0);
  2709. }
  2710. assert(Conv == ExtOrTruncConversion::Trunc &&
  2711. "Unknown convertion between 32 and 64 bit values.");
  2712. // For reinterpreting 64-bit values as 32-bit values, we just need to
  2713. // EXTRACT_SUBREG (i.e. extract the low word).
  2714. SDValue SubRegIdx =
  2715. CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
  2716. return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
  2717. NatWidthRes, SubRegIdx), 0);
  2718. }
  2719. // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
  2720. // Handle both zero-extensions and sign-extensions.
  2721. SDValue
  2722. IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
  2723. ZeroCompare CmpTy) {
  2724. EVT InVT = LHS.getValueType();
  2725. bool Is32Bit = InVT == MVT::i32;
  2726. SDValue ToExtend;
  2727. // Produce the value that needs to be either zero or sign extended.
  2728. switch (CmpTy) {
  2729. case ZeroCompare::GEZExt:
  2730. case ZeroCompare::GESExt:
  2731. ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
  2732. dl, InVT, LHS, LHS), 0);
  2733. break;
  2734. case ZeroCompare::LEZExt:
  2735. case ZeroCompare::LESExt: {
  2736. if (Is32Bit) {
  2737. // Upper 32 bits cannot be undefined for this sequence.
  2738. LHS = signExtendInputIfNeeded(LHS);
  2739. SDValue Neg =
  2740. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  2741. ToExtend =
  2742. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2743. Neg, S->getI64Imm(1, dl),
  2744. S->getI64Imm(63, dl)), 0);
  2745. } else {
  2746. SDValue Addi =
  2747. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  2748. S->getI64Imm(~0ULL, dl)), 0);
  2749. ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
  2750. Addi, LHS), 0);
  2751. }
  2752. break;
  2753. }
  2754. }
  2755. // For 64-bit sequences, the extensions are the same for the GE/LE cases.
  2756. if (!Is32Bit &&
  2757. (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
  2758. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2759. ToExtend, S->getI64Imm(1, dl),
  2760. S->getI64Imm(63, dl)), 0);
  2761. if (!Is32Bit &&
  2762. (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
  2763. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
  2764. S->getI64Imm(63, dl)), 0);
  2765. assert(Is32Bit && "Should have handled the 32-bit sequences above.");
  2766. // For 32-bit sequences, the extensions differ between GE/LE cases.
  2767. switch (CmpTy) {
  2768. case ZeroCompare::GEZExt: {
  2769. SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
  2770. S->getI32Imm(31, dl) };
  2771. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2772. ShiftOps), 0);
  2773. }
  2774. case ZeroCompare::GESExt:
  2775. return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
  2776. S->getI32Imm(31, dl)), 0);
  2777. case ZeroCompare::LEZExt:
  2778. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
  2779. S->getI32Imm(1, dl)), 0);
  2780. case ZeroCompare::LESExt:
  2781. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
  2782. S->getI32Imm(-1, dl)), 0);
  2783. }
  2784. // The above case covers all the enumerators so it can't have a default clause
  2785. // to avoid compiler warnings.
  2786. llvm_unreachable("Unknown zero-comparison type.");
  2787. }
  2788. /// Produces a zero-extended result of comparing two 32-bit values according to
  2789. /// the passed condition code.
  2790. SDValue
  2791. IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
  2792. ISD::CondCode CC,
  2793. int64_t RHSValue, SDLoc dl) {
  2794. if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
  2795. CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
  2796. return SDValue();
  2797. bool IsRHSZero = RHSValue == 0;
  2798. bool IsRHSOne = RHSValue == 1;
  2799. bool IsRHSNegOne = RHSValue == -1LL;
  2800. switch (CC) {
  2801. default: return SDValue();
  2802. case ISD::SETEQ: {
  2803. // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
  2804. // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
  2805. SDValue Xor = IsRHSZero ? LHS :
  2806. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2807. SDValue Clz =
  2808. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2809. SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
  2810. S->getI32Imm(31, dl) };
  2811. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2812. ShiftOps), 0);
  2813. }
  2814. case ISD::SETNE: {
  2815. // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
  2816. // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
  2817. SDValue Xor = IsRHSZero ? LHS :
  2818. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2819. SDValue Clz =
  2820. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2821. SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
  2822. S->getI32Imm(31, dl) };
  2823. SDValue Shift =
  2824. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
  2825. return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
  2826. S->getI32Imm(1, dl)), 0);
  2827. }
  2828. case ISD::SETGE: {
  2829. // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
  2830. // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
  2831. if(IsRHSZero)
  2832. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2833. // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
  2834. // by swapping inputs and falling through.
  2835. std::swap(LHS, RHS);
  2836. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2837. IsRHSZero = RHSConst && RHSConst->isZero();
  2838. LLVM_FALLTHROUGH;
  2839. }
  2840. case ISD::SETLE: {
  2841. if (CmpInGPR == ICGPR_NonExtIn)
  2842. return SDValue();
  2843. // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
  2844. // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
  2845. if(IsRHSZero) {
  2846. if (CmpInGPR == ICGPR_NonExtIn)
  2847. return SDValue();
  2848. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2849. }
  2850. // The upper 32-bits of the register can't be undefined for this sequence.
  2851. LHS = signExtendInputIfNeeded(LHS);
  2852. RHS = signExtendInputIfNeeded(RHS);
  2853. SDValue Sub =
  2854. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  2855. SDValue Shift =
  2856. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
  2857. S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
  2858. 0);
  2859. return
  2860. SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
  2861. MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
  2862. }
  2863. case ISD::SETGT: {
  2864. // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
  2865. // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
  2866. // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
  2867. // Handle SETLT -1 (which is equivalent to SETGE 0).
  2868. if (IsRHSNegOne)
  2869. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  2870. if (IsRHSZero) {
  2871. if (CmpInGPR == ICGPR_NonExtIn)
  2872. return SDValue();
  2873. // The upper 32-bits of the register can't be undefined for this sequence.
  2874. LHS = signExtendInputIfNeeded(LHS);
  2875. RHS = signExtendInputIfNeeded(RHS);
  2876. SDValue Neg =
  2877. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  2878. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2879. Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
  2880. }
  2881. // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
  2882. // (%b < %a) by swapping inputs and falling through.
  2883. std::swap(LHS, RHS);
  2884. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  2885. IsRHSZero = RHSConst && RHSConst->isZero();
  2886. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  2887. LLVM_FALLTHROUGH;
  2888. }
  2889. case ISD::SETLT: {
  2890. // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
  2891. // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
  2892. // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
  2893. // Handle SETLT 1 (which is equivalent to SETLE 0).
  2894. if (IsRHSOne) {
  2895. if (CmpInGPR == ICGPR_NonExtIn)
  2896. return SDValue();
  2897. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  2898. }
  2899. if (IsRHSZero) {
  2900. SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
  2901. S->getI32Imm(31, dl) };
  2902. return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
  2903. ShiftOps), 0);
  2904. }
  2905. if (CmpInGPR == ICGPR_NonExtIn)
  2906. return SDValue();
  2907. // The upper 32-bits of the register can't be undefined for this sequence.
  2908. LHS = signExtendInputIfNeeded(LHS);
  2909. RHS = signExtendInputIfNeeded(RHS);
  2910. SDValue SUBFNode =
  2911. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2912. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2913. SUBFNode, S->getI64Imm(1, dl),
  2914. S->getI64Imm(63, dl)), 0);
  2915. }
  2916. case ISD::SETUGE:
  2917. // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
  2918. // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
  2919. std::swap(LHS, RHS);
  2920. LLVM_FALLTHROUGH;
  2921. case ISD::SETULE: {
  2922. if (CmpInGPR == ICGPR_NonExtIn)
  2923. return SDValue();
  2924. // The upper 32-bits of the register can't be undefined for this sequence.
  2925. LHS = zeroExtendInputIfNeeded(LHS);
  2926. RHS = zeroExtendInputIfNeeded(RHS);
  2927. SDValue Subtract =
  2928. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  2929. SDValue SrdiNode =
  2930. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2931. Subtract, S->getI64Imm(1, dl),
  2932. S->getI64Imm(63, dl)), 0);
  2933. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
  2934. S->getI32Imm(1, dl)), 0);
  2935. }
  2936. case ISD::SETUGT:
  2937. // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
  2938. // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
  2939. std::swap(LHS, RHS);
  2940. LLVM_FALLTHROUGH;
  2941. case ISD::SETULT: {
  2942. if (CmpInGPR == ICGPR_NonExtIn)
  2943. return SDValue();
  2944. // The upper 32-bits of the register can't be undefined for this sequence.
  2945. LHS = zeroExtendInputIfNeeded(LHS);
  2946. RHS = zeroExtendInputIfNeeded(RHS);
  2947. SDValue Subtract =
  2948. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  2949. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  2950. Subtract, S->getI64Imm(1, dl),
  2951. S->getI64Imm(63, dl)), 0);
  2952. }
  2953. }
  2954. }
  2955. /// Produces a sign-extended result of comparing two 32-bit values according to
  2956. /// the passed condition code.
  2957. SDValue
  2958. IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
  2959. ISD::CondCode CC,
  2960. int64_t RHSValue, SDLoc dl) {
  2961. if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
  2962. CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
  2963. return SDValue();
  2964. bool IsRHSZero = RHSValue == 0;
  2965. bool IsRHSOne = RHSValue == 1;
  2966. bool IsRHSNegOne = RHSValue == -1LL;
  2967. switch (CC) {
  2968. default: return SDValue();
  2969. case ISD::SETEQ: {
  2970. // (sext (setcc %a, %b, seteq)) ->
  2971. // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
  2972. // (sext (setcc %a, 0, seteq)) ->
  2973. // (ashr (shl (ctlz %a), 58), 63)
  2974. SDValue CountInput = IsRHSZero ? LHS :
  2975. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2976. SDValue Cntlzw =
  2977. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
  2978. SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
  2979. S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
  2980. SDValue Slwi =
  2981. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
  2982. return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
  2983. }
  2984. case ISD::SETNE: {
  2985. // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
  2986. // flip the bit, finally take 2's complement.
  2987. // (sext (setcc %a, %b, setne)) ->
  2988. // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
  2989. // Same as above, but the first xor is not needed.
  2990. // (sext (setcc %a, 0, setne)) ->
  2991. // (neg (xor (lshr (ctlz %a), 5), 1))
  2992. SDValue Xor = IsRHSZero ? LHS :
  2993. SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
  2994. SDValue Clz =
  2995. SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
  2996. SDValue ShiftOps[] =
  2997. { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
  2998. SDValue Shift =
  2999. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
  3000. SDValue Xori =
  3001. SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
  3002. S->getI32Imm(1, dl)), 0);
  3003. return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
  3004. }
  3005. case ISD::SETGE: {
  3006. // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
  3007. // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
  3008. if (IsRHSZero)
  3009. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3010. // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
  3011. // by swapping inputs and falling through.
  3012. std::swap(LHS, RHS);
  3013. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3014. IsRHSZero = RHSConst && RHSConst->isZero();
  3015. LLVM_FALLTHROUGH;
  3016. }
  3017. case ISD::SETLE: {
  3018. if (CmpInGPR == ICGPR_NonExtIn)
  3019. return SDValue();
  3020. // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
  3021. // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
  3022. if (IsRHSZero)
  3023. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3024. // The upper 32-bits of the register can't be undefined for this sequence.
  3025. LHS = signExtendInputIfNeeded(LHS);
  3026. RHS = signExtendInputIfNeeded(RHS);
  3027. SDValue SUBFNode =
  3028. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
  3029. LHS, RHS), 0);
  3030. SDValue Srdi =
  3031. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3032. SUBFNode, S->getI64Imm(1, dl),
  3033. S->getI64Imm(63, dl)), 0);
  3034. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
  3035. S->getI32Imm(-1, dl)), 0);
  3036. }
  3037. case ISD::SETGT: {
  3038. // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
  3039. // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
  3040. // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
  3041. if (IsRHSNegOne)
  3042. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3043. if (IsRHSZero) {
  3044. if (CmpInGPR == ICGPR_NonExtIn)
  3045. return SDValue();
  3046. // The upper 32-bits of the register can't be undefined for this sequence.
  3047. LHS = signExtendInputIfNeeded(LHS);
  3048. RHS = signExtendInputIfNeeded(RHS);
  3049. SDValue Neg =
  3050. SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
  3051. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
  3052. S->getI64Imm(63, dl)), 0);
  3053. }
  3054. // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
  3055. // (%b < %a) by swapping inputs and falling through.
  3056. std::swap(LHS, RHS);
  3057. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3058. IsRHSZero = RHSConst && RHSConst->isZero();
  3059. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  3060. LLVM_FALLTHROUGH;
  3061. }
  3062. case ISD::SETLT: {
  3063. // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
  3064. // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
  3065. // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
  3066. if (IsRHSOne) {
  3067. if (CmpInGPR == ICGPR_NonExtIn)
  3068. return SDValue();
  3069. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3070. }
  3071. if (IsRHSZero)
  3072. return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
  3073. S->getI32Imm(31, dl)), 0);
  3074. if (CmpInGPR == ICGPR_NonExtIn)
  3075. return SDValue();
  3076. // The upper 32-bits of the register can't be undefined for this sequence.
  3077. LHS = signExtendInputIfNeeded(LHS);
  3078. RHS = signExtendInputIfNeeded(RHS);
  3079. SDValue SUBFNode =
  3080. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  3081. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3082. SUBFNode, S->getI64Imm(63, dl)), 0);
  3083. }
  3084. case ISD::SETUGE:
  3085. // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
  3086. // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
  3087. std::swap(LHS, RHS);
  3088. LLVM_FALLTHROUGH;
  3089. case ISD::SETULE: {
  3090. if (CmpInGPR == ICGPR_NonExtIn)
  3091. return SDValue();
  3092. // The upper 32-bits of the register can't be undefined for this sequence.
  3093. LHS = zeroExtendInputIfNeeded(LHS);
  3094. RHS = zeroExtendInputIfNeeded(RHS);
  3095. SDValue Subtract =
  3096. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
  3097. SDValue Shift =
  3098. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
  3099. S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
  3100. 0);
  3101. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
  3102. S->getI32Imm(-1, dl)), 0);
  3103. }
  3104. case ISD::SETUGT:
  3105. // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
  3106. // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
  3107. std::swap(LHS, RHS);
  3108. LLVM_FALLTHROUGH;
  3109. case ISD::SETULT: {
  3110. if (CmpInGPR == ICGPR_NonExtIn)
  3111. return SDValue();
  3112. // The upper 32-bits of the register can't be undefined for this sequence.
  3113. LHS = zeroExtendInputIfNeeded(LHS);
  3114. RHS = zeroExtendInputIfNeeded(RHS);
  3115. SDValue Subtract =
  3116. SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
  3117. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3118. Subtract, S->getI64Imm(63, dl)), 0);
  3119. }
  3120. }
  3121. }
  3122. /// Produces a zero-extended result of comparing two 64-bit values according to
  3123. /// the passed condition code.
  3124. SDValue
  3125. IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
  3126. ISD::CondCode CC,
  3127. int64_t RHSValue, SDLoc dl) {
  3128. if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
  3129. CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
  3130. return SDValue();
  3131. bool IsRHSZero = RHSValue == 0;
  3132. bool IsRHSOne = RHSValue == 1;
  3133. bool IsRHSNegOne = RHSValue == -1LL;
  3134. switch (CC) {
  3135. default: return SDValue();
  3136. case ISD::SETEQ: {
  3137. // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
  3138. // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
  3139. SDValue Xor = IsRHSZero ? LHS :
  3140. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3141. SDValue Clz =
  3142. SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
  3143. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
  3144. S->getI64Imm(58, dl),
  3145. S->getI64Imm(63, dl)), 0);
  3146. }
  3147. case ISD::SETNE: {
  3148. // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
  3149. // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
  3150. // {addcz.reg, addcz.CA} = (addcarry %a, -1)
  3151. // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
  3152. SDValue Xor = IsRHSZero ? LHS :
  3153. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3154. SDValue AC =
  3155. SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
  3156. Xor, S->getI32Imm(~0U, dl)), 0);
  3157. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
  3158. Xor, AC.getValue(1)), 0);
  3159. }
  3160. case ISD::SETGE: {
  3161. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3162. // (zext (setcc %a, %b, setge)) ->
  3163. // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
  3164. // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
  3165. if (IsRHSZero)
  3166. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  3167. std::swap(LHS, RHS);
  3168. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3169. IsRHSZero = RHSConst && RHSConst->isZero();
  3170. LLVM_FALLTHROUGH;
  3171. }
  3172. case ISD::SETLE: {
  3173. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3174. // (zext (setcc %a, %b, setge)) ->
  3175. // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
  3176. // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
  3177. if (IsRHSZero)
  3178. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  3179. SDValue ShiftL =
  3180. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  3181. S->getI64Imm(1, dl),
  3182. S->getI64Imm(63, dl)), 0);
  3183. SDValue ShiftR =
  3184. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
  3185. S->getI64Imm(63, dl)), 0);
  3186. SDValue SubtractCarry =
  3187. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3188. LHS, RHS), 1);
  3189. return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3190. ShiftR, ShiftL, SubtractCarry), 0);
  3191. }
  3192. case ISD::SETGT: {
  3193. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3194. // (zext (setcc %a, %b, setgt)) ->
  3195. // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
  3196. // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
  3197. if (IsRHSNegOne)
  3198. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
  3199. if (IsRHSZero) {
  3200. SDValue Addi =
  3201. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  3202. S->getI64Imm(~0ULL, dl)), 0);
  3203. SDValue Nor =
  3204. SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
  3205. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
  3206. S->getI64Imm(1, dl),
  3207. S->getI64Imm(63, dl)), 0);
  3208. }
  3209. std::swap(LHS, RHS);
  3210. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3211. IsRHSZero = RHSConst && RHSConst->isZero();
  3212. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  3213. LLVM_FALLTHROUGH;
  3214. }
  3215. case ISD::SETLT: {
  3216. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3217. // (zext (setcc %a, %b, setlt)) ->
  3218. // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
  3219. // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
  3220. if (IsRHSOne)
  3221. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
  3222. if (IsRHSZero)
  3223. return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  3224. S->getI64Imm(1, dl),
  3225. S->getI64Imm(63, dl)), 0);
  3226. SDValue SRADINode =
  3227. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3228. LHS, S->getI64Imm(63, dl)), 0);
  3229. SDValue SRDINode =
  3230. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3231. RHS, S->getI64Imm(1, dl),
  3232. S->getI64Imm(63, dl)), 0);
  3233. SDValue SUBFC8Carry =
  3234. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3235. RHS, LHS), 1);
  3236. SDValue ADDE8Node =
  3237. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3238. SRDINode, SRADINode, SUBFC8Carry), 0);
  3239. return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  3240. ADDE8Node, S->getI64Imm(1, dl)), 0);
  3241. }
  3242. case ISD::SETUGE:
  3243. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3244. // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
  3245. std::swap(LHS, RHS);
  3246. LLVM_FALLTHROUGH;
  3247. case ISD::SETULE: {
  3248. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3249. // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
  3250. SDValue SUBFC8Carry =
  3251. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3252. LHS, RHS), 1);
  3253. SDValue SUBFE8Node =
  3254. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
  3255. LHS, LHS, SUBFC8Carry), 0);
  3256. return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
  3257. SUBFE8Node, S->getI64Imm(1, dl)), 0);
  3258. }
  3259. case ISD::SETUGT:
  3260. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3261. // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
  3262. std::swap(LHS, RHS);
  3263. LLVM_FALLTHROUGH;
  3264. case ISD::SETULT: {
  3265. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3266. // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
  3267. SDValue SubtractCarry =
  3268. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3269. RHS, LHS), 1);
  3270. SDValue ExtSub =
  3271. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
  3272. LHS, LHS, SubtractCarry), 0);
  3273. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
  3274. ExtSub), 0);
  3275. }
  3276. }
  3277. }
  3278. /// Produces a sign-extended result of comparing two 64-bit values according to
  3279. /// the passed condition code.
  3280. SDValue
  3281. IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
  3282. ISD::CondCode CC,
  3283. int64_t RHSValue, SDLoc dl) {
  3284. if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
  3285. CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
  3286. return SDValue();
  3287. bool IsRHSZero = RHSValue == 0;
  3288. bool IsRHSOne = RHSValue == 1;
  3289. bool IsRHSNegOne = RHSValue == -1LL;
  3290. switch (CC) {
  3291. default: return SDValue();
  3292. case ISD::SETEQ: {
  3293. // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
  3294. // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
  3295. // {addcz.reg, addcz.CA} = (addcarry %a, -1)
  3296. // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
  3297. SDValue AddInput = IsRHSZero ? LHS :
  3298. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3299. SDValue Addic =
  3300. SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
  3301. AddInput, S->getI32Imm(~0U, dl)), 0);
  3302. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
  3303. Addic, Addic.getValue(1)), 0);
  3304. }
  3305. case ISD::SETNE: {
  3306. // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
  3307. // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
  3308. // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
  3309. // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
  3310. SDValue Xor = IsRHSZero ? LHS :
  3311. SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
  3312. SDValue SC =
  3313. SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
  3314. Xor, S->getI32Imm(0, dl)), 0);
  3315. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
  3316. SC, SC.getValue(1)), 0);
  3317. }
  3318. case ISD::SETGE: {
  3319. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3320. // (zext (setcc %a, %b, setge)) ->
  3321. // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
  3322. // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
  3323. if (IsRHSZero)
  3324. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3325. std::swap(LHS, RHS);
  3326. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3327. IsRHSZero = RHSConst && RHSConst->isZero();
  3328. LLVM_FALLTHROUGH;
  3329. }
  3330. case ISD::SETLE: {
  3331. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3332. // (zext (setcc %a, %b, setge)) ->
  3333. // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
  3334. // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
  3335. if (IsRHSZero)
  3336. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3337. SDValue ShiftR =
  3338. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
  3339. S->getI64Imm(63, dl)), 0);
  3340. SDValue ShiftL =
  3341. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
  3342. S->getI64Imm(1, dl),
  3343. S->getI64Imm(63, dl)), 0);
  3344. SDValue SubtractCarry =
  3345. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3346. LHS, RHS), 1);
  3347. SDValue Adde =
  3348. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
  3349. ShiftR, ShiftL, SubtractCarry), 0);
  3350. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
  3351. }
  3352. case ISD::SETGT: {
  3353. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3354. // (zext (setcc %a, %b, setgt)) ->
  3355. // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
  3356. // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
  3357. if (IsRHSNegOne)
  3358. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
  3359. if (IsRHSZero) {
  3360. SDValue Add =
  3361. SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
  3362. S->getI64Imm(-1, dl)), 0);
  3363. SDValue Nor =
  3364. SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
  3365. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
  3366. S->getI64Imm(63, dl)), 0);
  3367. }
  3368. std::swap(LHS, RHS);
  3369. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3370. IsRHSZero = RHSConst && RHSConst->isZero();
  3371. IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
  3372. LLVM_FALLTHROUGH;
  3373. }
  3374. case ISD::SETLT: {
  3375. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3376. // (zext (setcc %a, %b, setlt)) ->
  3377. // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
  3378. // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
  3379. if (IsRHSOne)
  3380. return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
  3381. if (IsRHSZero) {
  3382. return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
  3383. S->getI64Imm(63, dl)), 0);
  3384. }
  3385. SDValue SRADINode =
  3386. SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
  3387. LHS, S->getI64Imm(63, dl)), 0);
  3388. SDValue SRDINode =
  3389. SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
  3390. RHS, S->getI64Imm(1, dl),
  3391. S->getI64Imm(63, dl)), 0);
  3392. SDValue SUBFC8Carry =
  3393. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3394. RHS, LHS), 1);
  3395. SDValue ADDE8Node =
  3396. SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
  3397. SRDINode, SRADINode, SUBFC8Carry), 0);
  3398. SDValue XORI8Node =
  3399. SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  3400. ADDE8Node, S->getI64Imm(1, dl)), 0);
  3401. return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
  3402. XORI8Node), 0);
  3403. }
  3404. case ISD::SETUGE:
  3405. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3406. // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
  3407. std::swap(LHS, RHS);
  3408. LLVM_FALLTHROUGH;
  3409. case ISD::SETULE: {
  3410. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3411. // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
  3412. SDValue SubtractCarry =
  3413. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3414. LHS, RHS), 1);
  3415. SDValue ExtSub =
  3416. SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
  3417. LHS, SubtractCarry), 0);
  3418. return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
  3419. ExtSub, ExtSub), 0);
  3420. }
  3421. case ISD::SETUGT:
  3422. // {subc.reg, subc.CA} = (subcarry %b, %a)
  3423. // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
  3424. std::swap(LHS, RHS);
  3425. LLVM_FALLTHROUGH;
  3426. case ISD::SETULT: {
  3427. // {subc.reg, subc.CA} = (subcarry %a, %b)
  3428. // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
  3429. SDValue SubCarry =
  3430. SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
  3431. RHS, LHS), 1);
  3432. return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
  3433. LHS, LHS, SubCarry), 0);
  3434. }
  3435. }
  3436. }
  3437. /// Do all uses of this SDValue need the result in a GPR?
  3438. /// This is meant to be used on values that have type i1 since
  3439. /// it is somewhat meaningless to ask if values of other types
  3440. /// should be kept in GPR's.
  3441. static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
  3442. assert(Compare.getOpcode() == ISD::SETCC &&
  3443. "An ISD::SETCC node required here.");
  3444. // For values that have a single use, the caller should obviously already have
  3445. // checked if that use is an extending use. We check the other uses here.
  3446. if (Compare.hasOneUse())
  3447. return true;
  3448. // We want the value in a GPR if it is being extended, used for a select, or
  3449. // used in logical operations.
  3450. for (auto CompareUse : Compare.getNode()->uses())
  3451. if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
  3452. CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
  3453. CompareUse->getOpcode() != ISD::SELECT &&
  3454. !isLogicOp(CompareUse->getOpcode())) {
  3455. OmittedForNonExtendUses++;
  3456. return false;
  3457. }
  3458. return true;
  3459. }
  3460. /// Returns an equivalent of a SETCC node but with the result the same width as
  3461. /// the inputs. This can also be used for SELECT_CC if either the true or false
  3462. /// values is a power of two while the other is zero.
  3463. SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
  3464. SetccInGPROpts ConvOpts) {
  3465. assert((Compare.getOpcode() == ISD::SETCC ||
  3466. Compare.getOpcode() == ISD::SELECT_CC) &&
  3467. "An ISD::SETCC node required here.");
  3468. // Don't convert this comparison to a GPR sequence because there are uses
  3469. // of the i1 result (i.e. uses that require the result in the CR).
  3470. if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
  3471. return SDValue();
  3472. SDValue LHS = Compare.getOperand(0);
  3473. SDValue RHS = Compare.getOperand(1);
  3474. // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
  3475. int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
  3476. ISD::CondCode CC =
  3477. cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
  3478. EVT InputVT = LHS.getValueType();
  3479. if (InputVT != MVT::i32 && InputVT != MVT::i64)
  3480. return SDValue();
  3481. if (ConvOpts == SetccInGPROpts::ZExtInvert ||
  3482. ConvOpts == SetccInGPROpts::SExtInvert)
  3483. CC = ISD::getSetCCInverse(CC, InputVT);
  3484. bool Inputs32Bit = InputVT == MVT::i32;
  3485. SDLoc dl(Compare);
  3486. ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
  3487. int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
  3488. bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
  3489. ConvOpts == SetccInGPROpts::SExtInvert;
  3490. if (IsSext && Inputs32Bit)
  3491. return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
  3492. else if (Inputs32Bit)
  3493. return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
  3494. else if (IsSext)
  3495. return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
  3496. return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
  3497. }
  3498. } // end anonymous namespace
  3499. bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
  3500. if (N->getValueType(0) != MVT::i32 &&
  3501. N->getValueType(0) != MVT::i64)
  3502. return false;
  3503. // This optimization will emit code that assumes 64-bit registers
  3504. // so we don't want to run it in 32-bit mode. Also don't run it
  3505. // on functions that are not to be optimized.
  3506. if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
  3507. return false;
  3508. // For POWER10, it is more profitable to use the set boolean extension
  3509. // instructions rather than the integer compare elimination codegen.
  3510. // Users can override this via the command line option, `--ppc-gpr-icmps`.
  3511. if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
  3512. return false;
  3513. switch (N->getOpcode()) {
  3514. default: break;
  3515. case ISD::ZERO_EXTEND:
  3516. case ISD::SIGN_EXTEND:
  3517. case ISD::AND:
  3518. case ISD::OR:
  3519. case ISD::XOR: {
  3520. IntegerCompareEliminator ICmpElim(CurDAG, this);
  3521. if (SDNode *New = ICmpElim.Select(N)) {
  3522. ReplaceNode(N, New);
  3523. return true;
  3524. }
  3525. }
  3526. }
  3527. return false;
  3528. }
  3529. bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
  3530. if (N->getValueType(0) != MVT::i32 &&
  3531. N->getValueType(0) != MVT::i64)
  3532. return false;
  3533. if (!UseBitPermRewriter)
  3534. return false;
  3535. switch (N->getOpcode()) {
  3536. default: break;
  3537. case ISD::ROTL:
  3538. case ISD::SHL:
  3539. case ISD::SRL:
  3540. case ISD::AND:
  3541. case ISD::OR: {
  3542. BitPermutationSelector BPS(CurDAG);
  3543. if (SDNode *New = BPS.Select(N)) {
  3544. ReplaceNode(N, New);
  3545. return true;
  3546. }
  3547. return false;
  3548. }
  3549. }
  3550. return false;
  3551. }
  3552. /// SelectCC - Select a comparison of the specified values with the specified
  3553. /// condition code, returning the CR# of the expression.
  3554. SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
  3555. const SDLoc &dl, SDValue Chain) {
  3556. // Always select the LHS.
  3557. unsigned Opc;
  3558. if (LHS.getValueType() == MVT::i32) {
  3559. unsigned Imm;
  3560. if (CC == ISD::SETEQ || CC == ISD::SETNE) {
  3561. if (isInt32Immediate(RHS, Imm)) {
  3562. // SETEQ/SETNE comparison with 16-bit immediate, fold it.
  3563. if (isUInt<16>(Imm))
  3564. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
  3565. getI32Imm(Imm & 0xFFFF, dl)),
  3566. 0);
  3567. // If this is a 16-bit signed immediate, fold it.
  3568. if (isInt<16>((int)Imm))
  3569. return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
  3570. getI32Imm(Imm & 0xFFFF, dl)),
  3571. 0);
  3572. // For non-equality comparisons, the default code would materialize the
  3573. // constant, then compare against it, like this:
  3574. // lis r2, 4660
  3575. // ori r2, r2, 22136
  3576. // cmpw cr0, r3, r2
  3577. // Since we are just comparing for equality, we can emit this instead:
  3578. // xoris r0,r3,0x1234
  3579. // cmplwi cr0,r0,0x5678
  3580. // beq cr0,L6
  3581. SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
  3582. getI32Imm(Imm >> 16, dl)), 0);
  3583. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
  3584. getI32Imm(Imm & 0xFFFF, dl)), 0);
  3585. }
  3586. Opc = PPC::CMPLW;
  3587. } else if (ISD::isUnsignedIntSetCC(CC)) {
  3588. if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
  3589. return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
  3590. getI32Imm(Imm & 0xFFFF, dl)), 0);
  3591. Opc = PPC::CMPLW;
  3592. } else {
  3593. int16_t SImm;
  3594. if (isIntS16Immediate(RHS, SImm))
  3595. return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
  3596. getI32Imm((int)SImm & 0xFFFF,
  3597. dl)),
  3598. 0);
  3599. Opc = PPC::CMPW;
  3600. }
  3601. } else if (LHS.getValueType() == MVT::i64) {
  3602. uint64_t Imm;
  3603. if (CC == ISD::SETEQ || CC == ISD::SETNE) {
  3604. if (isInt64Immediate(RHS.getNode(), Imm)) {
  3605. // SETEQ/SETNE comparison with 16-bit immediate, fold it.
  3606. if (isUInt<16>(Imm))
  3607. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
  3608. getI32Imm(Imm & 0xFFFF, dl)),
  3609. 0);
  3610. // If this is a 16-bit signed immediate, fold it.
  3611. if (isInt<16>(Imm))
  3612. return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
  3613. getI32Imm(Imm & 0xFFFF, dl)),
  3614. 0);
  3615. // For non-equality comparisons, the default code would materialize the
  3616. // constant, then compare against it, like this:
  3617. // lis r2, 4660
  3618. // ori r2, r2, 22136
  3619. // cmpd cr0, r3, r2
  3620. // Since we are just comparing for equality, we can emit this instead:
  3621. // xoris r0,r3,0x1234
  3622. // cmpldi cr0,r0,0x5678
  3623. // beq cr0,L6
  3624. if (isUInt<32>(Imm)) {
  3625. SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
  3626. getI64Imm(Imm >> 16, dl)), 0);
  3627. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
  3628. getI64Imm(Imm & 0xFFFF, dl)),
  3629. 0);
  3630. }
  3631. }
  3632. Opc = PPC::CMPLD;
  3633. } else if (ISD::isUnsignedIntSetCC(CC)) {
  3634. if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
  3635. return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
  3636. getI64Imm(Imm & 0xFFFF, dl)), 0);
  3637. Opc = PPC::CMPLD;
  3638. } else {
  3639. int16_t SImm;
  3640. if (isIntS16Immediate(RHS, SImm))
  3641. return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
  3642. getI64Imm(SImm & 0xFFFF, dl)),
  3643. 0);
  3644. Opc = PPC::CMPD;
  3645. }
  3646. } else if (LHS.getValueType() == MVT::f32) {
  3647. if (Subtarget->hasSPE()) {
  3648. switch (CC) {
  3649. default:
  3650. case ISD::SETEQ:
  3651. case ISD::SETNE:
  3652. Opc = PPC::EFSCMPEQ;
  3653. break;
  3654. case ISD::SETLT:
  3655. case ISD::SETGE:
  3656. case ISD::SETOLT:
  3657. case ISD::SETOGE:
  3658. case ISD::SETULT:
  3659. case ISD::SETUGE:
  3660. Opc = PPC::EFSCMPLT;
  3661. break;
  3662. case ISD::SETGT:
  3663. case ISD::SETLE:
  3664. case ISD::SETOGT:
  3665. case ISD::SETOLE:
  3666. case ISD::SETUGT:
  3667. case ISD::SETULE:
  3668. Opc = PPC::EFSCMPGT;
  3669. break;
  3670. }
  3671. } else
  3672. Opc = PPC::FCMPUS;
  3673. } else if (LHS.getValueType() == MVT::f64) {
  3674. if (Subtarget->hasSPE()) {
  3675. switch (CC) {
  3676. default:
  3677. case ISD::SETEQ:
  3678. case ISD::SETNE:
  3679. Opc = PPC::EFDCMPEQ;
  3680. break;
  3681. case ISD::SETLT:
  3682. case ISD::SETGE:
  3683. case ISD::SETOLT:
  3684. case ISD::SETOGE:
  3685. case ISD::SETULT:
  3686. case ISD::SETUGE:
  3687. Opc = PPC::EFDCMPLT;
  3688. break;
  3689. case ISD::SETGT:
  3690. case ISD::SETLE:
  3691. case ISD::SETOGT:
  3692. case ISD::SETOLE:
  3693. case ISD::SETUGT:
  3694. case ISD::SETULE:
  3695. Opc = PPC::EFDCMPGT;
  3696. break;
  3697. }
  3698. } else
  3699. Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
  3700. } else {
  3701. assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
  3702. assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
  3703. Opc = PPC::XSCMPUQP;
  3704. }
  3705. if (Chain)
  3706. return SDValue(
  3707. CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
  3708. 0);
  3709. else
  3710. return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
  3711. }
  3712. static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT,
  3713. const PPCSubtarget *Subtarget) {
  3714. // For SPE instructions, the result is in GT bit of the CR
  3715. bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
  3716. switch (CC) {
  3717. case ISD::SETUEQ:
  3718. case ISD::SETONE:
  3719. case ISD::SETOLE:
  3720. case ISD::SETOGE:
  3721. llvm_unreachable("Should be lowered by legalize!");
  3722. default: llvm_unreachable("Unknown condition!");
  3723. case ISD::SETOEQ:
  3724. case ISD::SETEQ:
  3725. return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
  3726. case ISD::SETUNE:
  3727. case ISD::SETNE:
  3728. return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
  3729. case ISD::SETOLT:
  3730. case ISD::SETLT:
  3731. return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
  3732. case ISD::SETULE:
  3733. case ISD::SETLE:
  3734. return PPC::PRED_LE;
  3735. case ISD::SETOGT:
  3736. case ISD::SETGT:
  3737. return PPC::PRED_GT;
  3738. case ISD::SETUGE:
  3739. case ISD::SETGE:
  3740. return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
  3741. case ISD::SETO: return PPC::PRED_NU;
  3742. case ISD::SETUO: return PPC::PRED_UN;
  3743. // These two are invalid for floating point. Assume we have int.
  3744. case ISD::SETULT: return PPC::PRED_LT;
  3745. case ISD::SETUGT: return PPC::PRED_GT;
  3746. }
  3747. }
  3748. /// getCRIdxForSetCC - Return the index of the condition register field
  3749. /// associated with the SetCC condition, and whether or not the field is
  3750. /// treated as inverted. That is, lt = 0; ge = 0 inverted.
  3751. static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
  3752. Invert = false;
  3753. switch (CC) {
  3754. default: llvm_unreachable("Unknown condition!");
  3755. case ISD::SETOLT:
  3756. case ISD::SETLT: return 0; // Bit #0 = SETOLT
  3757. case ISD::SETOGT:
  3758. case ISD::SETGT: return 1; // Bit #1 = SETOGT
  3759. case ISD::SETOEQ:
  3760. case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
  3761. case ISD::SETUO: return 3; // Bit #3 = SETUO
  3762. case ISD::SETUGE:
  3763. case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
  3764. case ISD::SETULE:
  3765. case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
  3766. case ISD::SETUNE:
  3767. case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
  3768. case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
  3769. case ISD::SETUEQ:
  3770. case ISD::SETOGE:
  3771. case ISD::SETOLE:
  3772. case ISD::SETONE:
  3773. llvm_unreachable("Invalid branch code: should be expanded by legalize");
  3774. // These are invalid for floating point. Assume integer.
  3775. case ISD::SETULT: return 0;
  3776. case ISD::SETUGT: return 1;
  3777. }
  3778. }
  3779. // getVCmpInst: return the vector compare instruction for the specified
  3780. // vector type and condition code. Since this is for altivec specific code,
  3781. // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
  3782. // and v4f32).
  3783. static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
  3784. bool HasVSX, bool &Swap, bool &Negate) {
  3785. Swap = false;
  3786. Negate = false;
  3787. if (VecVT.isFloatingPoint()) {
  3788. /* Handle some cases by swapping input operands. */
  3789. switch (CC) {
  3790. case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
  3791. case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
  3792. case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
  3793. case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
  3794. case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
  3795. case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
  3796. default: break;
  3797. }
  3798. /* Handle some cases by negating the result. */
  3799. switch (CC) {
  3800. case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
  3801. case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
  3802. case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
  3803. case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
  3804. default: break;
  3805. }
  3806. /* We have instructions implementing the remaining cases. */
  3807. switch (CC) {
  3808. case ISD::SETEQ:
  3809. case ISD::SETOEQ:
  3810. if (VecVT == MVT::v4f32)
  3811. return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
  3812. else if (VecVT == MVT::v2f64)
  3813. return PPC::XVCMPEQDP;
  3814. break;
  3815. case ISD::SETGT:
  3816. case ISD::SETOGT:
  3817. if (VecVT == MVT::v4f32)
  3818. return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
  3819. else if (VecVT == MVT::v2f64)
  3820. return PPC::XVCMPGTDP;
  3821. break;
  3822. case ISD::SETGE:
  3823. case ISD::SETOGE:
  3824. if (VecVT == MVT::v4f32)
  3825. return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
  3826. else if (VecVT == MVT::v2f64)
  3827. return PPC::XVCMPGEDP;
  3828. break;
  3829. default:
  3830. break;
  3831. }
  3832. llvm_unreachable("Invalid floating-point vector compare condition");
  3833. } else {
  3834. /* Handle some cases by swapping input operands. */
  3835. switch (CC) {
  3836. case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
  3837. case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
  3838. case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
  3839. case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
  3840. default: break;
  3841. }
  3842. /* Handle some cases by negating the result. */
  3843. switch (CC) {
  3844. case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
  3845. case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
  3846. case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
  3847. case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
  3848. default: break;
  3849. }
  3850. /* We have instructions implementing the remaining cases. */
  3851. switch (CC) {
  3852. case ISD::SETEQ:
  3853. case ISD::SETUEQ:
  3854. if (VecVT == MVT::v16i8)
  3855. return PPC::VCMPEQUB;
  3856. else if (VecVT == MVT::v8i16)
  3857. return PPC::VCMPEQUH;
  3858. else if (VecVT == MVT::v4i32)
  3859. return PPC::VCMPEQUW;
  3860. else if (VecVT == MVT::v2i64)
  3861. return PPC::VCMPEQUD;
  3862. else if (VecVT == MVT::v1i128)
  3863. return PPC::VCMPEQUQ;
  3864. break;
  3865. case ISD::SETGT:
  3866. if (VecVT == MVT::v16i8)
  3867. return PPC::VCMPGTSB;
  3868. else if (VecVT == MVT::v8i16)
  3869. return PPC::VCMPGTSH;
  3870. else if (VecVT == MVT::v4i32)
  3871. return PPC::VCMPGTSW;
  3872. else if (VecVT == MVT::v2i64)
  3873. return PPC::VCMPGTSD;
  3874. else if (VecVT == MVT::v1i128)
  3875. return PPC::VCMPGTSQ;
  3876. break;
  3877. case ISD::SETUGT:
  3878. if (VecVT == MVT::v16i8)
  3879. return PPC::VCMPGTUB;
  3880. else if (VecVT == MVT::v8i16)
  3881. return PPC::VCMPGTUH;
  3882. else if (VecVT == MVT::v4i32)
  3883. return PPC::VCMPGTUW;
  3884. else if (VecVT == MVT::v2i64)
  3885. return PPC::VCMPGTUD;
  3886. else if (VecVT == MVT::v1i128)
  3887. return PPC::VCMPGTUQ;
  3888. break;
  3889. default:
  3890. break;
  3891. }
  3892. llvm_unreachable("Invalid integer vector compare condition");
  3893. }
  3894. }
  3895. bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
  3896. SDLoc dl(N);
  3897. unsigned Imm;
  3898. bool IsStrict = N->isStrictFPOpcode();
  3899. ISD::CondCode CC =
  3900. cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
  3901. EVT PtrVT =
  3902. CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
  3903. bool isPPC64 = (PtrVT == MVT::i64);
  3904. SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
  3905. SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
  3906. SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
  3907. if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
  3908. // We can codegen setcc op, imm very efficiently compared to a brcond.
  3909. // Check for those cases here.
  3910. // setcc op, 0
  3911. if (Imm == 0) {
  3912. SDValue Op = LHS;
  3913. switch (CC) {
  3914. default: break;
  3915. case ISD::SETEQ: {
  3916. Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
  3917. SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
  3918. getI32Imm(31, dl) };
  3919. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3920. return true;
  3921. }
  3922. case ISD::SETNE: {
  3923. if (isPPC64) break;
  3924. SDValue AD =
  3925. SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3926. Op, getI32Imm(~0U, dl)), 0);
  3927. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
  3928. return true;
  3929. }
  3930. case ISD::SETLT: {
  3931. SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
  3932. getI32Imm(31, dl) };
  3933. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3934. return true;
  3935. }
  3936. case ISD::SETGT: {
  3937. SDValue T =
  3938. SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
  3939. T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
  3940. SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
  3941. getI32Imm(31, dl) };
  3942. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3943. return true;
  3944. }
  3945. }
  3946. } else if (Imm == ~0U) { // setcc op, -1
  3947. SDValue Op = LHS;
  3948. switch (CC) {
  3949. default: break;
  3950. case ISD::SETEQ:
  3951. if (isPPC64) break;
  3952. Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3953. Op, getI32Imm(1, dl)), 0);
  3954. CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
  3955. SDValue(CurDAG->getMachineNode(PPC::LI, dl,
  3956. MVT::i32,
  3957. getI32Imm(0, dl)),
  3958. 0), Op.getValue(1));
  3959. return true;
  3960. case ISD::SETNE: {
  3961. if (isPPC64) break;
  3962. Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
  3963. SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  3964. Op, getI32Imm(~0U, dl));
  3965. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
  3966. SDValue(AD, 1));
  3967. return true;
  3968. }
  3969. case ISD::SETLT: {
  3970. SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
  3971. getI32Imm(1, dl)), 0);
  3972. SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
  3973. Op), 0);
  3974. SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
  3975. getI32Imm(31, dl) };
  3976. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  3977. return true;
  3978. }
  3979. case ISD::SETGT: {
  3980. SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
  3981. getI32Imm(31, dl) };
  3982. Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  3983. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
  3984. return true;
  3985. }
  3986. }
  3987. }
  3988. }
  3989. // Altivec Vector compare instructions do not set any CR register by default and
  3990. // vector compare operations return the same type as the operands.
  3991. if (!IsStrict && LHS.getValueType().isVector()) {
  3992. if (Subtarget->hasSPE())
  3993. return false;
  3994. EVT VecVT = LHS.getValueType();
  3995. bool Swap, Negate;
  3996. unsigned int VCmpInst =
  3997. getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
  3998. if (Swap)
  3999. std::swap(LHS, RHS);
  4000. EVT ResVT = VecVT.changeVectorElementTypeToInteger();
  4001. if (Negate) {
  4002. SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
  4003. CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
  4004. ResVT, VCmp, VCmp);
  4005. return true;
  4006. }
  4007. CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
  4008. return true;
  4009. }
  4010. if (Subtarget->useCRBits())
  4011. return false;
  4012. bool Inv;
  4013. unsigned Idx = getCRIdxForSetCC(CC, Inv);
  4014. SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
  4015. if (IsStrict)
  4016. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
  4017. SDValue IntCR;
  4018. // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
  4019. // The correct compare instruction is already set by SelectCC()
  4020. if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
  4021. Idx = 1;
  4022. }
  4023. // Force the ccreg into CR7.
  4024. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
  4025. SDValue InFlag; // Null incoming flag value.
  4026. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
  4027. InFlag).getValue(1);
  4028. IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
  4029. CCReg), 0);
  4030. SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
  4031. getI32Imm(31, dl), getI32Imm(31, dl) };
  4032. if (!Inv) {
  4033. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4034. return true;
  4035. }
  4036. // Get the specified bit.
  4037. SDValue Tmp =
  4038. SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  4039. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
  4040. return true;
  4041. }
  4042. /// Does this node represent a load/store node whose address can be represented
  4043. /// with a register plus an immediate that's a multiple of \p Val:
  4044. bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
  4045. LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
  4046. StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
  4047. MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
  4048. SDValue AddrOp;
  4049. if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
  4050. AddrOp = N->getOperand(1);
  4051. else if (STN)
  4052. AddrOp = STN->getOperand(2);
  4053. // If the address points a frame object or a frame object with an offset,
  4054. // we need to check the object alignment.
  4055. short Imm = 0;
  4056. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
  4057. AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
  4058. AddrOp)) {
  4059. // If op0 is a frame index that is under aligned, we can't do it either,
  4060. // because it is translated to r31 or r1 + slot + offset. We won't know the
  4061. // slot number until the stack frame is finalized.
  4062. const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
  4063. unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
  4064. if ((SlotAlign % Val) != 0)
  4065. return false;
  4066. // If we have an offset, we need further check on the offset.
  4067. if (AddrOp.getOpcode() != ISD::ADD)
  4068. return true;
  4069. }
  4070. if (AddrOp.getOpcode() == ISD::ADD)
  4071. return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
  4072. // If the address comes from the outside, the offset will be zero.
  4073. return AddrOp.getOpcode() == ISD::CopyFromReg;
  4074. }
  4075. void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
  4076. // Transfer memoperands.
  4077. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  4078. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
  4079. }
  4080. static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
  4081. bool &NeedSwapOps, bool &IsUnCmp) {
  4082. assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
  4083. SDValue LHS = N->getOperand(0);
  4084. SDValue RHS = N->getOperand(1);
  4085. SDValue TrueRes = N->getOperand(2);
  4086. SDValue FalseRes = N->getOperand(3);
  4087. ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
  4088. if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
  4089. N->getSimpleValueType(0) != MVT::i32))
  4090. return false;
  4091. // We are looking for any of:
  4092. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
  4093. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
  4094. // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
  4095. // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
  4096. int64_t TrueResVal = TrueConst->getSExtValue();
  4097. if ((TrueResVal < -1 || TrueResVal > 1) ||
  4098. (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
  4099. (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
  4100. (TrueResVal == 0 &&
  4101. (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
  4102. return false;
  4103. SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
  4104. ? FalseRes
  4105. : FalseRes.getOperand(0);
  4106. bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
  4107. if (SetOrSelCC.getOpcode() != ISD::SETCC &&
  4108. SetOrSelCC.getOpcode() != ISD::SELECT_CC)
  4109. return false;
  4110. // Without this setb optimization, the outer SELECT_CC will be manually
  4111. // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
  4112. // transforms pseudo instruction to isel instruction. When there are more than
  4113. // one use for result like zext/sext, with current optimization we only see
  4114. // isel is replaced by setb but can't see any significant gain. Since
  4115. // setb has longer latency than original isel, we should avoid this. Another
  4116. // point is that setb requires comparison always kept, it can break the
  4117. // opportunity to get the comparison away if we have in future.
  4118. if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
  4119. return false;
  4120. SDValue InnerLHS = SetOrSelCC.getOperand(0);
  4121. SDValue InnerRHS = SetOrSelCC.getOperand(1);
  4122. ISD::CondCode InnerCC =
  4123. cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
  4124. // If the inner comparison is a select_cc, make sure the true/false values are
  4125. // 1/-1 and canonicalize it if needed.
  4126. if (InnerIsSel) {
  4127. ConstantSDNode *SelCCTrueConst =
  4128. dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
  4129. ConstantSDNode *SelCCFalseConst =
  4130. dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
  4131. if (!SelCCTrueConst || !SelCCFalseConst)
  4132. return false;
  4133. int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
  4134. int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
  4135. // The values must be -1/1 (requiring a swap) or 1/-1.
  4136. if (SelCCTVal == -1 && SelCCFVal == 1) {
  4137. std::swap(InnerLHS, InnerRHS);
  4138. } else if (SelCCTVal != 1 || SelCCFVal != -1)
  4139. return false;
  4140. }
  4141. // Canonicalize unsigned case
  4142. if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
  4143. IsUnCmp = true;
  4144. InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
  4145. }
  4146. bool InnerSwapped = false;
  4147. if (LHS == InnerRHS && RHS == InnerLHS)
  4148. InnerSwapped = true;
  4149. else if (LHS != InnerLHS || RHS != InnerRHS)
  4150. return false;
  4151. switch (CC) {
  4152. // (select_cc lhs, rhs, 0, \
  4153. // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
  4154. case ISD::SETEQ:
  4155. if (!InnerIsSel)
  4156. return false;
  4157. if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
  4158. return false;
  4159. NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
  4160. break;
  4161. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
  4162. // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
  4163. // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
  4164. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
  4165. // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
  4166. // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
  4167. case ISD::SETULT:
  4168. if (!IsUnCmp && InnerCC != ISD::SETNE)
  4169. return false;
  4170. IsUnCmp = true;
  4171. LLVM_FALLTHROUGH;
  4172. case ISD::SETLT:
  4173. if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
  4174. (InnerCC == ISD::SETLT && InnerSwapped))
  4175. NeedSwapOps = (TrueResVal == 1);
  4176. else
  4177. return false;
  4178. break;
  4179. // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
  4180. // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
  4181. // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
  4182. // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
  4183. // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
  4184. // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
  4185. case ISD::SETUGT:
  4186. if (!IsUnCmp && InnerCC != ISD::SETNE)
  4187. return false;
  4188. IsUnCmp = true;
  4189. LLVM_FALLTHROUGH;
  4190. case ISD::SETGT:
  4191. if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
  4192. (InnerCC == ISD::SETGT && InnerSwapped))
  4193. NeedSwapOps = (TrueResVal == -1);
  4194. else
  4195. return false;
  4196. break;
  4197. default:
  4198. return false;
  4199. }
  4200. LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
  4201. LLVM_DEBUG(N->dump());
  4202. return true;
  4203. }
  4204. // Return true if it's a software square-root/divide operand.
  4205. static bool isSWTestOp(SDValue N) {
  4206. if (N.getOpcode() == PPCISD::FTSQRT)
  4207. return true;
  4208. if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
  4209. return false;
  4210. switch (N.getConstantOperandVal(0)) {
  4211. case Intrinsic::ppc_vsx_xvtdivdp:
  4212. case Intrinsic::ppc_vsx_xvtdivsp:
  4213. case Intrinsic::ppc_vsx_xvtsqrtdp:
  4214. case Intrinsic::ppc_vsx_xvtsqrtsp:
  4215. return true;
  4216. }
  4217. return false;
  4218. }
  4219. bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
  4220. assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
  4221. // We are looking for following patterns, where `truncate to i1` actually has
  4222. // the same semantic with `and 1`.
  4223. // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
  4224. // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
  4225. // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
  4226. // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
  4227. // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
  4228. // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
  4229. // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
  4230. // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
  4231. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
  4232. if (CC != ISD::SETEQ && CC != ISD::SETNE)
  4233. return false;
  4234. SDValue CmpRHS = N->getOperand(3);
  4235. if (!isa<ConstantSDNode>(CmpRHS) ||
  4236. cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
  4237. return false;
  4238. SDValue CmpLHS = N->getOperand(2);
  4239. if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
  4240. return false;
  4241. unsigned PCC = 0;
  4242. bool IsCCNE = CC == ISD::SETNE;
  4243. if (CmpLHS.getOpcode() == ISD::AND &&
  4244. isa<ConstantSDNode>(CmpLHS.getOperand(1)))
  4245. switch (CmpLHS.getConstantOperandVal(1)) {
  4246. case 1:
  4247. PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
  4248. break;
  4249. case 2:
  4250. PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
  4251. break;
  4252. case 4:
  4253. PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
  4254. break;
  4255. case 8:
  4256. PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
  4257. break;
  4258. default:
  4259. return false;
  4260. }
  4261. else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
  4262. CmpLHS.getValueType() == MVT::i1)
  4263. PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
  4264. if (PCC) {
  4265. SDLoc dl(N);
  4266. SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
  4267. N->getOperand(0)};
  4268. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  4269. return true;
  4270. }
  4271. return false;
  4272. }
  4273. bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
  4274. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4275. unsigned Imm;
  4276. if (!isInt32Immediate(N->getOperand(1), Imm))
  4277. return false;
  4278. SDLoc dl(N);
  4279. SDValue Val = N->getOperand(0);
  4280. unsigned SH, MB, ME;
  4281. // If this is an and of a value rotated between 0 and 31 bits and then and'd
  4282. // with a mask, emit rlwinm
  4283. if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
  4284. Val = Val.getOperand(0);
  4285. SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
  4286. getI32Imm(ME, dl)};
  4287. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4288. return true;
  4289. }
  4290. // If this is just a masked value where the input is not handled, and
  4291. // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
  4292. if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
  4293. SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
  4294. getI32Imm(ME, dl)};
  4295. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4296. return true;
  4297. }
  4298. // AND X, 0 -> 0, not "rlwinm 32".
  4299. if (Imm == 0) {
  4300. ReplaceUses(SDValue(N, 0), N->getOperand(1));
  4301. return true;
  4302. }
  4303. return false;
  4304. }
  4305. bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
  4306. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4307. uint64_t Imm64;
  4308. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
  4309. return false;
  4310. unsigned MB, ME;
  4311. if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
  4312. // MB ME
  4313. // +----------------------+
  4314. // |xxxxxxxxxxx00011111000|
  4315. // +----------------------+
  4316. // 0 32 64
  4317. // We can only do it if the MB is larger than 32 and MB <= ME
  4318. // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
  4319. // we didn't rotate it.
  4320. SDLoc dl(N);
  4321. SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
  4322. getI64Imm(ME - 32, dl)};
  4323. CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
  4324. return true;
  4325. }
  4326. return false;
  4327. }
  4328. bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
  4329. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4330. uint64_t Imm64;
  4331. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
  4332. return false;
  4333. // Do nothing if it is 16-bit imm as the pattern in the .td file handle
  4334. // it well with "andi.".
  4335. if (isUInt<16>(Imm64))
  4336. return false;
  4337. SDLoc Loc(N);
  4338. SDValue Val = N->getOperand(0);
  4339. // Optimized with two rldicl's as follows:
  4340. // Add missing bits on left to the mask and check that the mask is a
  4341. // wrapped run of ones, i.e.
  4342. // Change pattern |0001111100000011111111|
  4343. // to |1111111100000011111111|.
  4344. unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
  4345. if (NumOfLeadingZeros != 0)
  4346. Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
  4347. unsigned MB, ME;
  4348. if (!isRunOfOnes64(Imm64, MB, ME))
  4349. return false;
  4350. // ME MB MB-ME+63
  4351. // +----------------------+ +----------------------+
  4352. // |1111111100000011111111| -> |0000001111111111111111|
  4353. // +----------------------+ +----------------------+
  4354. // 0 63 0 63
  4355. // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
  4356. unsigned OnesOnLeft = ME + 1;
  4357. unsigned ZerosInBetween = (MB - ME + 63) & 63;
  4358. // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
  4359. // on the left the bits that are already zeros in the mask.
  4360. Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
  4361. getI64Imm(OnesOnLeft, Loc),
  4362. getI64Imm(ZerosInBetween, Loc)),
  4363. 0);
  4364. // MB-ME+63 ME MB
  4365. // +----------------------+ +----------------------+
  4366. // |0000001111111111111111| -> |0001111100000011111111|
  4367. // +----------------------+ +----------------------+
  4368. // 0 63 0 63
  4369. // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
  4370. // left the number of ones we previously added.
  4371. SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
  4372. getI64Imm(NumOfLeadingZeros, Loc)};
  4373. CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
  4374. return true;
  4375. }
  4376. bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
  4377. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4378. unsigned Imm;
  4379. if (!isInt32Immediate(N->getOperand(1), Imm))
  4380. return false;
  4381. SDValue Val = N->getOperand(0);
  4382. unsigned Imm2;
  4383. // ISD::OR doesn't get all the bitfield insertion fun.
  4384. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
  4385. // bitfield insert.
  4386. if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
  4387. return false;
  4388. // The idea here is to check whether this is equivalent to:
  4389. // (c1 & m) | (x & ~m)
  4390. // where m is a run-of-ones mask. The logic here is that, for each bit in
  4391. // c1 and c2:
  4392. // - if both are 1, then the output will be 1.
  4393. // - if both are 0, then the output will be 0.
  4394. // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
  4395. // come from x.
  4396. // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
  4397. // be 0.
  4398. // If that last condition is never the case, then we can form m from the
  4399. // bits that are the same between c1 and c2.
  4400. unsigned MB, ME;
  4401. if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
  4402. SDLoc dl(N);
  4403. SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
  4404. getI32Imm(MB, dl), getI32Imm(ME, dl)};
  4405. ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
  4406. return true;
  4407. }
  4408. return false;
  4409. }
  4410. bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
  4411. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4412. uint64_t Imm64;
  4413. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
  4414. return false;
  4415. // If this is a 64-bit zero-extension mask, emit rldicl.
  4416. unsigned MB = 64 - countTrailingOnes(Imm64);
  4417. unsigned SH = 0;
  4418. unsigned Imm;
  4419. SDValue Val = N->getOperand(0);
  4420. SDLoc dl(N);
  4421. if (Val.getOpcode() == ISD::ANY_EXTEND) {
  4422. auto Op0 = Val.getOperand(0);
  4423. if (Op0.getOpcode() == ISD::SRL &&
  4424. isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
  4425. auto ResultType = Val.getNode()->getValueType(0);
  4426. auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
  4427. SDValue IDVal(ImDef, 0);
  4428. Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
  4429. IDVal, Op0.getOperand(0),
  4430. getI32Imm(1, dl)),
  4431. 0);
  4432. SH = 64 - Imm;
  4433. }
  4434. }
  4435. // If the operand is a logical right shift, we can fold it into this
  4436. // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
  4437. // for n <= mb. The right shift is really a left rotate followed by a
  4438. // mask, and this mask is a more-restrictive sub-mask of the mask implied
  4439. // by the shift.
  4440. if (Val.getOpcode() == ISD::SRL &&
  4441. isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
  4442. assert(Imm < 64 && "Illegal shift amount");
  4443. Val = Val.getOperand(0);
  4444. SH = 64 - Imm;
  4445. }
  4446. SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
  4447. CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
  4448. return true;
  4449. }
  4450. bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
  4451. assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
  4452. uint64_t Imm64;
  4453. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
  4454. !isMask_64(~Imm64))
  4455. return false;
  4456. // If this is a negated 64-bit zero-extension mask,
  4457. // i.e. the immediate is a sequence of ones from most significant side
  4458. // and all zero for reminder, we should use rldicr.
  4459. unsigned MB = 63 - countTrailingOnes(~Imm64);
  4460. unsigned SH = 0;
  4461. SDLoc dl(N);
  4462. SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
  4463. CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
  4464. return true;
  4465. }
  4466. bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
  4467. assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
  4468. uint64_t Imm64;
  4469. unsigned MB, ME;
  4470. SDValue N0 = N->getOperand(0);
  4471. // We won't get fewer instructions if the imm is 32-bit integer.
  4472. // rldimi requires the imm to have consecutive ones with both sides zero.
  4473. // Also, make sure the first Op has only one use, otherwise this may increase
  4474. // register pressure since rldimi is destructive.
  4475. if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
  4476. isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
  4477. return false;
  4478. unsigned SH = 63 - ME;
  4479. SDLoc Dl(N);
  4480. // Use select64Imm for making LI instr instead of directly putting Imm64
  4481. SDValue Ops[] = {
  4482. N->getOperand(0),
  4483. SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
  4484. getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
  4485. CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
  4486. return true;
  4487. }
  4488. // Select - Convert the specified operand from a target-independent to a
  4489. // target-specific node if it hasn't already been changed.
  4490. void PPCDAGToDAGISel::Select(SDNode *N) {
  4491. SDLoc dl(N);
  4492. if (N->isMachineOpcode()) {
  4493. N->setNodeId(-1);
  4494. return; // Already selected.
  4495. }
  4496. // In case any misguided DAG-level optimizations form an ADD with a
  4497. // TargetConstant operand, crash here instead of miscompiling (by selecting
  4498. // an r+r add instead of some kind of r+i add).
  4499. if (N->getOpcode() == ISD::ADD &&
  4500. N->getOperand(1).getOpcode() == ISD::TargetConstant)
  4501. llvm_unreachable("Invalid ADD with TargetConstant operand");
  4502. // Try matching complex bit permutations before doing anything else.
  4503. if (tryBitPermutation(N))
  4504. return;
  4505. // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
  4506. if (tryIntCompareInGPR(N))
  4507. return;
  4508. switch (N->getOpcode()) {
  4509. default: break;
  4510. case ISD::Constant:
  4511. if (N->getValueType(0) == MVT::i64) {
  4512. ReplaceNode(N, selectI64Imm(CurDAG, N));
  4513. return;
  4514. }
  4515. break;
  4516. case ISD::INTRINSIC_VOID: {
  4517. auto IntrinsicID = N->getConstantOperandVal(1);
  4518. if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) {
  4519. unsigned Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI;
  4520. SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
  4521. int16_t SImmOperand2;
  4522. int16_t SImmOperand3;
  4523. int16_t SImmOperand4;
  4524. bool isOperand2IntS16Immediate =
  4525. isIntS16Immediate(N->getOperand(2), SImmOperand2);
  4526. bool isOperand3IntS16Immediate =
  4527. isIntS16Immediate(N->getOperand(3), SImmOperand3);
  4528. // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
  4529. // reg or imm + imm. The imm + imm form will be optimized to either an
  4530. // unconditional trap or a nop in a later pass.
  4531. if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
  4532. Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
  4533. else if (isOperand3IntS16Immediate)
  4534. // The 2nd and 3rd operands are reg + imm.
  4535. Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
  4536. else {
  4537. // The 2nd and 3rd operands are imm + reg.
  4538. bool isOperand4IntS16Immediate =
  4539. isIntS16Immediate(N->getOperand(4), SImmOperand4);
  4540. (void)isOperand4IntS16Immediate;
  4541. assert(isOperand4IntS16Immediate &&
  4542. "The 4th operand is not an Immediate");
  4543. // We need to flip the condition immediate TO.
  4544. int16_t TO = int(SImmOperand4) & 0x1F;
  4545. // We swap the first and second bit of TO if they are not same.
  4546. if ((TO & 0x1) != ((TO & 0x2) >> 1))
  4547. TO = (TO & 0x1) ? TO + 1 : TO - 1;
  4548. // We swap the fourth and fifth bit of TO if they are not same.
  4549. if ((TO & 0x8) != ((TO & 0x10) >> 1))
  4550. TO = (TO & 0x8) ? TO + 8 : TO - 8;
  4551. Ops[0] = getI32Imm(TO, dl);
  4552. Ops[1] = N->getOperand(3);
  4553. Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
  4554. }
  4555. CurDAG->SelectNodeTo(N, Opcode, MVT::Other, Ops);
  4556. return;
  4557. }
  4558. break;
  4559. }
  4560. case ISD::INTRINSIC_WO_CHAIN: {
  4561. // We emit the PPC::FSELS instruction here because of type conflicts with
  4562. // the comparison operand. The FSELS instruction is defined to use an 8-byte
  4563. // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
  4564. // value for the comparison. When selecting through a .td file, a type
  4565. // error is raised. Must check this first so we never break on the
  4566. // !Subtarget->isISA3_1() check.
  4567. auto IntID = N->getConstantOperandVal(0);
  4568. if (IntID == Intrinsic::ppc_fsels) {
  4569. SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
  4570. CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
  4571. return;
  4572. }
  4573. if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
  4574. auto Pred = N->getConstantOperandVal(1);
  4575. unsigned Opcode =
  4576. IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
  4577. unsigned SubReg = 0;
  4578. unsigned ShiftVal = 0;
  4579. bool Reverse = false;
  4580. switch (Pred) {
  4581. case 0:
  4582. SubReg = PPC::sub_eq;
  4583. ShiftVal = 1;
  4584. break;
  4585. case 1:
  4586. SubReg = PPC::sub_eq;
  4587. ShiftVal = 1;
  4588. Reverse = true;
  4589. break;
  4590. case 2:
  4591. SubReg = PPC::sub_lt;
  4592. ShiftVal = 3;
  4593. break;
  4594. case 3:
  4595. SubReg = PPC::sub_lt;
  4596. ShiftVal = 3;
  4597. Reverse = true;
  4598. break;
  4599. case 4:
  4600. SubReg = PPC::sub_gt;
  4601. ShiftVal = 2;
  4602. break;
  4603. case 5:
  4604. SubReg = PPC::sub_gt;
  4605. ShiftVal = 2;
  4606. Reverse = true;
  4607. break;
  4608. case 6:
  4609. SubReg = PPC::sub_un;
  4610. break;
  4611. case 7:
  4612. SubReg = PPC::sub_un;
  4613. Reverse = true;
  4614. break;
  4615. }
  4616. EVT VTs[] = {MVT::v16i8, MVT::Glue};
  4617. SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
  4618. CurDAG->getTargetConstant(0, dl, MVT::i32)};
  4619. SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
  4620. SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
  4621. // On Power10, we can use SETBC[R]. On prior architectures, we have to use
  4622. // MFOCRF and shift/negate the value.
  4623. if (Subtarget->isISA3_1()) {
  4624. SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
  4625. SDValue CRBit = SDValue(
  4626. CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
  4627. CR6Reg, SubRegIdx, BCDOp.getValue(1)),
  4628. 0);
  4629. CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
  4630. CRBit);
  4631. } else {
  4632. SDValue Move =
  4633. SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
  4634. BCDOp.getValue(1)),
  4635. 0);
  4636. SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
  4637. getI32Imm(31, dl), getI32Imm(31, dl)};
  4638. if (!Reverse)
  4639. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4640. else {
  4641. SDValue Shift = SDValue(
  4642. CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
  4643. CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
  4644. }
  4645. }
  4646. return;
  4647. }
  4648. if (!Subtarget->isISA3_1())
  4649. break;
  4650. unsigned Opcode = 0;
  4651. switch (IntID) {
  4652. default:
  4653. break;
  4654. case Intrinsic::ppc_altivec_vstribr_p:
  4655. Opcode = PPC::VSTRIBR_rec;
  4656. break;
  4657. case Intrinsic::ppc_altivec_vstribl_p:
  4658. Opcode = PPC::VSTRIBL_rec;
  4659. break;
  4660. case Intrinsic::ppc_altivec_vstrihr_p:
  4661. Opcode = PPC::VSTRIHR_rec;
  4662. break;
  4663. case Intrinsic::ppc_altivec_vstrihl_p:
  4664. Opcode = PPC::VSTRIHL_rec;
  4665. break;
  4666. }
  4667. if (!Opcode)
  4668. break;
  4669. // Generate the appropriate vector string isolate intrinsic to match.
  4670. EVT VTs[] = {MVT::v16i8, MVT::Glue};
  4671. SDValue VecStrOp =
  4672. SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
  4673. // Vector string isolate instructions update the EQ bit of CR6.
  4674. // Generate a SETBC instruction to extract the bit and place it in a GPR.
  4675. SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
  4676. SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
  4677. SDValue CRBit = SDValue(
  4678. CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
  4679. CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
  4680. 0);
  4681. CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
  4682. return;
  4683. }
  4684. case ISD::SETCC:
  4685. case ISD::STRICT_FSETCC:
  4686. case ISD::STRICT_FSETCCS:
  4687. if (trySETCC(N))
  4688. return;
  4689. break;
  4690. // These nodes will be transformed into GETtlsADDR32 node, which
  4691. // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
  4692. case PPCISD::ADDI_TLSLD_L_ADDR:
  4693. case PPCISD::ADDI_TLSGD_L_ADDR: {
  4694. const Module *Mod = MF->getFunction().getParent();
  4695. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
  4696. !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
  4697. Mod->getPICLevel() == PICLevel::SmallPIC)
  4698. break;
  4699. // Attach global base pointer on GETtlsADDR32 node in order to
  4700. // generate secure plt code for TLS symbols.
  4701. getGlobalBaseReg();
  4702. } break;
  4703. case PPCISD::CALL: {
  4704. if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
  4705. !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
  4706. !Subtarget->isTargetELF())
  4707. break;
  4708. SDValue Op = N->getOperand(1);
  4709. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
  4710. if (GA->getTargetFlags() == PPCII::MO_PLT)
  4711. getGlobalBaseReg();
  4712. }
  4713. else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
  4714. if (ES->getTargetFlags() == PPCII::MO_PLT)
  4715. getGlobalBaseReg();
  4716. }
  4717. }
  4718. break;
  4719. case PPCISD::GlobalBaseReg:
  4720. ReplaceNode(N, getGlobalBaseReg());
  4721. return;
  4722. case ISD::FrameIndex:
  4723. selectFrameIndex(N, N);
  4724. return;
  4725. case PPCISD::MFOCRF: {
  4726. SDValue InFlag = N->getOperand(1);
  4727. ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
  4728. N->getOperand(0), InFlag));
  4729. return;
  4730. }
  4731. case PPCISD::READ_TIME_BASE:
  4732. ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
  4733. MVT::Other, N->getOperand(0)));
  4734. return;
  4735. case PPCISD::SRA_ADDZE: {
  4736. SDValue N0 = N->getOperand(0);
  4737. SDValue ShiftAmt =
  4738. CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
  4739. getConstantIntValue(), dl,
  4740. N->getValueType(0));
  4741. if (N->getValueType(0) == MVT::i64) {
  4742. SDNode *Op =
  4743. CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
  4744. N0, ShiftAmt);
  4745. CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
  4746. SDValue(Op, 1));
  4747. return;
  4748. } else {
  4749. assert(N->getValueType(0) == MVT::i32 &&
  4750. "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
  4751. SDNode *Op =
  4752. CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
  4753. N0, ShiftAmt);
  4754. CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
  4755. SDValue(Op, 1));
  4756. return;
  4757. }
  4758. }
  4759. case ISD::STORE: {
  4760. // Change TLS initial-exec D-form stores to X-form stores.
  4761. StoreSDNode *ST = cast<StoreSDNode>(N);
  4762. if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
  4763. ST->getAddressingMode() != ISD::PRE_INC)
  4764. if (tryTLSXFormStore(ST))
  4765. return;
  4766. break;
  4767. }
  4768. case ISD::LOAD: {
  4769. // Handle preincrement loads.
  4770. LoadSDNode *LD = cast<LoadSDNode>(N);
  4771. EVT LoadedVT = LD->getMemoryVT();
  4772. // Normal loads are handled by code generated from the .td file.
  4773. if (LD->getAddressingMode() != ISD::PRE_INC) {
  4774. // Change TLS initial-exec D-form loads to X-form loads.
  4775. if (EnableTLSOpt && Subtarget->isELFv2ABI())
  4776. if (tryTLSXFormLoad(LD))
  4777. return;
  4778. break;
  4779. }
  4780. SDValue Offset = LD->getOffset();
  4781. if (Offset.getOpcode() == ISD::TargetConstant ||
  4782. Offset.getOpcode() == ISD::TargetGlobalAddress) {
  4783. unsigned Opcode;
  4784. bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
  4785. if (LD->getValueType(0) != MVT::i64) {
  4786. // Handle PPC32 integer and normal FP loads.
  4787. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4788. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4789. default: llvm_unreachable("Invalid PPC load type!");
  4790. case MVT::f64: Opcode = PPC::LFDU; break;
  4791. case MVT::f32: Opcode = PPC::LFSU; break;
  4792. case MVT::i32: Opcode = PPC::LWZU; break;
  4793. case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
  4794. case MVT::i1:
  4795. case MVT::i8: Opcode = PPC::LBZU; break;
  4796. }
  4797. } else {
  4798. assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
  4799. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4800. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4801. default: llvm_unreachable("Invalid PPC load type!");
  4802. case MVT::i64: Opcode = PPC::LDU; break;
  4803. case MVT::i32: Opcode = PPC::LWZU8; break;
  4804. case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
  4805. case MVT::i1:
  4806. case MVT::i8: Opcode = PPC::LBZU8; break;
  4807. }
  4808. }
  4809. SDValue Chain = LD->getChain();
  4810. SDValue Base = LD->getBasePtr();
  4811. SDValue Ops[] = { Offset, Base, Chain };
  4812. SDNode *MN = CurDAG->getMachineNode(
  4813. Opcode, dl, LD->getValueType(0),
  4814. PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
  4815. transferMemOperands(N, MN);
  4816. ReplaceNode(N, MN);
  4817. return;
  4818. } else {
  4819. unsigned Opcode;
  4820. bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
  4821. if (LD->getValueType(0) != MVT::i64) {
  4822. // Handle PPC32 integer and normal FP loads.
  4823. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
  4824. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4825. default: llvm_unreachable("Invalid PPC load type!");
  4826. case MVT::f64: Opcode = PPC::LFDUX; break;
  4827. case MVT::f32: Opcode = PPC::LFSUX; break;
  4828. case MVT::i32: Opcode = PPC::LWZUX; break;
  4829. case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
  4830. case MVT::i1:
  4831. case MVT::i8: Opcode = PPC::LBZUX; break;
  4832. }
  4833. } else {
  4834. assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
  4835. assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
  4836. "Invalid sext update load");
  4837. switch (LoadedVT.getSimpleVT().SimpleTy) {
  4838. default: llvm_unreachable("Invalid PPC load type!");
  4839. case MVT::i64: Opcode = PPC::LDUX; break;
  4840. case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
  4841. case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
  4842. case MVT::i1:
  4843. case MVT::i8: Opcode = PPC::LBZUX8; break;
  4844. }
  4845. }
  4846. SDValue Chain = LD->getChain();
  4847. SDValue Base = LD->getBasePtr();
  4848. SDValue Ops[] = { Base, Offset, Chain };
  4849. SDNode *MN = CurDAG->getMachineNode(
  4850. Opcode, dl, LD->getValueType(0),
  4851. PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
  4852. transferMemOperands(N, MN);
  4853. ReplaceNode(N, MN);
  4854. return;
  4855. }
  4856. }
  4857. case ISD::AND:
  4858. // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
  4859. if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
  4860. tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
  4861. return;
  4862. // Other cases are autogenerated.
  4863. break;
  4864. case ISD::OR: {
  4865. if (N->getValueType(0) == MVT::i32)
  4866. if (tryBitfieldInsert(N))
  4867. return;
  4868. int16_t Imm;
  4869. if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
  4870. isIntS16Immediate(N->getOperand(1), Imm)) {
  4871. KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
  4872. // If this is equivalent to an add, then we can fold it with the
  4873. // FrameIndex calculation.
  4874. if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
  4875. selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
  4876. return;
  4877. }
  4878. }
  4879. // If this is 'or' against an imm with consecutive ones and both sides zero,
  4880. // try to emit rldimi
  4881. if (tryAsSingleRLDIMI(N))
  4882. return;
  4883. // OR with a 32-bit immediate can be handled by ori + oris
  4884. // without creating an immediate in a GPR.
  4885. uint64_t Imm64 = 0;
  4886. bool IsPPC64 = Subtarget->isPPC64();
  4887. if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
  4888. (Imm64 & ~0xFFFFFFFFuLL) == 0) {
  4889. // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
  4890. uint64_t ImmHi = Imm64 >> 16;
  4891. uint64_t ImmLo = Imm64 & 0xFFFF;
  4892. if (ImmHi != 0 && ImmLo != 0) {
  4893. SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
  4894. N->getOperand(0),
  4895. getI16Imm(ImmLo, dl));
  4896. SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
  4897. CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
  4898. return;
  4899. }
  4900. }
  4901. // Other cases are autogenerated.
  4902. break;
  4903. }
  4904. case ISD::XOR: {
  4905. // XOR with a 32-bit immediate can be handled by xori + xoris
  4906. // without creating an immediate in a GPR.
  4907. uint64_t Imm64 = 0;
  4908. bool IsPPC64 = Subtarget->isPPC64();
  4909. if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
  4910. (Imm64 & ~0xFFFFFFFFuLL) == 0) {
  4911. // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
  4912. uint64_t ImmHi = Imm64 >> 16;
  4913. uint64_t ImmLo = Imm64 & 0xFFFF;
  4914. if (ImmHi != 0 && ImmLo != 0) {
  4915. SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
  4916. N->getOperand(0),
  4917. getI16Imm(ImmLo, dl));
  4918. SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
  4919. CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
  4920. return;
  4921. }
  4922. }
  4923. break;
  4924. }
  4925. case ISD::ADD: {
  4926. int16_t Imm;
  4927. if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
  4928. isIntS16Immediate(N->getOperand(1), Imm)) {
  4929. selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
  4930. return;
  4931. }
  4932. break;
  4933. }
  4934. case ISD::SHL: {
  4935. unsigned Imm, SH, MB, ME;
  4936. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
  4937. isRotateAndMask(N, Imm, true, SH, MB, ME)) {
  4938. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  4939. getI32Imm(SH, dl), getI32Imm(MB, dl),
  4940. getI32Imm(ME, dl) };
  4941. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4942. return;
  4943. }
  4944. // Other cases are autogenerated.
  4945. break;
  4946. }
  4947. case ISD::SRL: {
  4948. unsigned Imm, SH, MB, ME;
  4949. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
  4950. isRotateAndMask(N, Imm, true, SH, MB, ME)) {
  4951. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  4952. getI32Imm(SH, dl), getI32Imm(MB, dl),
  4953. getI32Imm(ME, dl) };
  4954. CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
  4955. return;
  4956. }
  4957. // Other cases are autogenerated.
  4958. break;
  4959. }
  4960. case ISD::MUL: {
  4961. SDValue Op1 = N->getOperand(1);
  4962. if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64)
  4963. break;
  4964. // If the multiplier fits int16, we can handle it with mulli.
  4965. int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
  4966. unsigned Shift = countTrailingZeros<uint64_t>(Imm);
  4967. if (isInt<16>(Imm) || !Shift)
  4968. break;
  4969. // If the shifted value fits int16, we can do this transformation:
  4970. // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
  4971. // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
  4972. uint64_t ImmSh = Imm >> Shift;
  4973. if (isInt<16>(ImmSh)) {
  4974. uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
  4975. SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
  4976. SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
  4977. N->getOperand(0), SDImm);
  4978. CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0),
  4979. getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl));
  4980. return;
  4981. }
  4982. break;
  4983. }
  4984. // FIXME: Remove this once the ANDI glue bug is fixed:
  4985. case PPCISD::ANDI_rec_1_EQ_BIT:
  4986. case PPCISD::ANDI_rec_1_GT_BIT: {
  4987. if (!ANDIGlueBug)
  4988. break;
  4989. EVT InVT = N->getOperand(0).getValueType();
  4990. assert((InVT == MVT::i64 || InVT == MVT::i32) &&
  4991. "Invalid input type for ANDI_rec_1_EQ_BIT");
  4992. unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
  4993. SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
  4994. N->getOperand(0),
  4995. CurDAG->getTargetConstant(1, dl, InVT)),
  4996. 0);
  4997. SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
  4998. SDValue SRIdxVal = CurDAG->getTargetConstant(
  4999. N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
  5000. dl, MVT::i32);
  5001. CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
  5002. SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
  5003. return;
  5004. }
  5005. case ISD::SELECT_CC: {
  5006. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
  5007. EVT PtrVT =
  5008. CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
  5009. bool isPPC64 = (PtrVT == MVT::i64);
  5010. // If this is a select of i1 operands, we'll pattern match it.
  5011. if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
  5012. break;
  5013. if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
  5014. bool NeedSwapOps = false;
  5015. bool IsUnCmp = false;
  5016. if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
  5017. SDValue LHS = N->getOperand(0);
  5018. SDValue RHS = N->getOperand(1);
  5019. if (NeedSwapOps)
  5020. std::swap(LHS, RHS);
  5021. // Make use of SelectCC to generate the comparison to set CR bits, for
  5022. // equality comparisons having one literal operand, SelectCC probably
  5023. // doesn't need to materialize the whole literal and just use xoris to
  5024. // check it first, it leads the following comparison result can't
  5025. // exactly represent GT/LT relationship. So to avoid this we specify
  5026. // SETGT/SETUGT here instead of SETEQ.
  5027. SDValue GenCC =
  5028. SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
  5029. CurDAG->SelectNodeTo(
  5030. N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
  5031. N->getValueType(0), GenCC);
  5032. NumP9Setb++;
  5033. return;
  5034. }
  5035. }
  5036. // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
  5037. if (!isPPC64)
  5038. if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
  5039. if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
  5040. if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
  5041. if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
  5042. CC == ISD::SETNE &&
  5043. // FIXME: Implement this optzn for PPC64.
  5044. N->getValueType(0) == MVT::i32) {
  5045. SDNode *Tmp =
  5046. CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
  5047. N->getOperand(0), getI32Imm(~0U, dl));
  5048. CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
  5049. N->getOperand(0), SDValue(Tmp, 1));
  5050. return;
  5051. }
  5052. SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
  5053. if (N->getValueType(0) == MVT::i1) {
  5054. // An i1 select is: (c & t) | (!c & f).
  5055. bool Inv;
  5056. unsigned Idx = getCRIdxForSetCC(CC, Inv);
  5057. unsigned SRI;
  5058. switch (Idx) {
  5059. default: llvm_unreachable("Invalid CC index");
  5060. case 0: SRI = PPC::sub_lt; break;
  5061. case 1: SRI = PPC::sub_gt; break;
  5062. case 2: SRI = PPC::sub_eq; break;
  5063. case 3: SRI = PPC::sub_un; break;
  5064. }
  5065. SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
  5066. SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
  5067. CCBit, CCBit), 0);
  5068. SDValue C = Inv ? NotCCBit : CCBit,
  5069. NotC = Inv ? CCBit : NotCCBit;
  5070. SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
  5071. C, N->getOperand(2)), 0);
  5072. SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
  5073. NotC, N->getOperand(3)), 0);
  5074. CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
  5075. return;
  5076. }
  5077. unsigned BROpc =
  5078. getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
  5079. unsigned SelectCCOp;
  5080. if (N->getValueType(0) == MVT::i32)
  5081. SelectCCOp = PPC::SELECT_CC_I4;
  5082. else if (N->getValueType(0) == MVT::i64)
  5083. SelectCCOp = PPC::SELECT_CC_I8;
  5084. else if (N->getValueType(0) == MVT::f32) {
  5085. if (Subtarget->hasP8Vector())
  5086. SelectCCOp = PPC::SELECT_CC_VSSRC;
  5087. else if (Subtarget->hasSPE())
  5088. SelectCCOp = PPC::SELECT_CC_SPE4;
  5089. else
  5090. SelectCCOp = PPC::SELECT_CC_F4;
  5091. } else if (N->getValueType(0) == MVT::f64) {
  5092. if (Subtarget->hasVSX())
  5093. SelectCCOp = PPC::SELECT_CC_VSFRC;
  5094. else if (Subtarget->hasSPE())
  5095. SelectCCOp = PPC::SELECT_CC_SPE;
  5096. else
  5097. SelectCCOp = PPC::SELECT_CC_F8;
  5098. } else if (N->getValueType(0) == MVT::f128)
  5099. SelectCCOp = PPC::SELECT_CC_F16;
  5100. else if (Subtarget->hasSPE())
  5101. SelectCCOp = PPC::SELECT_CC_SPE;
  5102. else if (N->getValueType(0) == MVT::v2f64 ||
  5103. N->getValueType(0) == MVT::v2i64)
  5104. SelectCCOp = PPC::SELECT_CC_VSRC;
  5105. else
  5106. SelectCCOp = PPC::SELECT_CC_VRRC;
  5107. SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
  5108. getI32Imm(BROpc, dl) };
  5109. CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
  5110. return;
  5111. }
  5112. case ISD::VECTOR_SHUFFLE:
  5113. if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
  5114. N->getValueType(0) == MVT::v2i64)) {
  5115. ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
  5116. SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
  5117. Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
  5118. unsigned DM[2];
  5119. for (int i = 0; i < 2; ++i)
  5120. if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
  5121. DM[i] = 0;
  5122. else
  5123. DM[i] = 1;
  5124. if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
  5125. Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
  5126. isa<LoadSDNode>(Op1.getOperand(0))) {
  5127. LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
  5128. SDValue Base, Offset;
  5129. if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
  5130. (LD->getMemoryVT() == MVT::f64 ||
  5131. LD->getMemoryVT() == MVT::i64) &&
  5132. SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
  5133. SDValue Chain = LD->getChain();
  5134. SDValue Ops[] = { Base, Offset, Chain };
  5135. MachineMemOperand *MemOp = LD->getMemOperand();
  5136. SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
  5137. N->getValueType(0), Ops);
  5138. CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
  5139. return;
  5140. }
  5141. }
  5142. // For little endian, we must swap the input operands and adjust
  5143. // the mask elements (reverse and invert them).
  5144. if (Subtarget->isLittleEndian()) {
  5145. std::swap(Op1, Op2);
  5146. unsigned tmp = DM[0];
  5147. DM[0] = 1 - DM[1];
  5148. DM[1] = 1 - tmp;
  5149. }
  5150. SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
  5151. MVT::i32);
  5152. SDValue Ops[] = { Op1, Op2, DMV };
  5153. CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
  5154. return;
  5155. }
  5156. break;
  5157. case PPCISD::BDNZ:
  5158. case PPCISD::BDZ: {
  5159. bool IsPPC64 = Subtarget->isPPC64();
  5160. SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
  5161. CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
  5162. ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
  5163. : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
  5164. MVT::Other, Ops);
  5165. return;
  5166. }
  5167. case PPCISD::COND_BRANCH: {
  5168. // Op #0 is the Chain.
  5169. // Op #1 is the PPC::PRED_* number.
  5170. // Op #2 is the CR#
  5171. // Op #3 is the Dest MBB
  5172. // Op #4 is the Flag.
  5173. // Prevent PPC::PRED_* from being selected into LI.
  5174. unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
  5175. if (EnableBranchHint)
  5176. PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
  5177. SDValue Pred = getI32Imm(PCC, dl);
  5178. SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
  5179. N->getOperand(0), N->getOperand(4) };
  5180. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  5181. return;
  5182. }
  5183. case ISD::BR_CC: {
  5184. if (tryFoldSWTestBRCC(N))
  5185. return;
  5186. ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
  5187. unsigned PCC =
  5188. getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
  5189. if (N->getOperand(2).getValueType() == MVT::i1) {
  5190. unsigned Opc;
  5191. bool Swap;
  5192. switch (PCC) {
  5193. default: llvm_unreachable("Unexpected Boolean-operand predicate");
  5194. case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
  5195. case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
  5196. case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
  5197. case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
  5198. case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
  5199. case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
  5200. }
  5201. // A signed comparison of i1 values produces the opposite result to an
  5202. // unsigned one if the condition code includes less-than or greater-than.
  5203. // This is because 1 is the most negative signed i1 number and the most
  5204. // positive unsigned i1 number. The CR-logical operations used for such
  5205. // comparisons are non-commutative so for signed comparisons vs. unsigned
  5206. // ones, the input operands just need to be swapped.
  5207. if (ISD::isSignedIntSetCC(CC))
  5208. Swap = !Swap;
  5209. SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
  5210. N->getOperand(Swap ? 3 : 2),
  5211. N->getOperand(Swap ? 2 : 3)), 0);
  5212. CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
  5213. N->getOperand(0));
  5214. return;
  5215. }
  5216. if (EnableBranchHint)
  5217. PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
  5218. SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
  5219. SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
  5220. N->getOperand(4), N->getOperand(0) };
  5221. CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
  5222. return;
  5223. }
  5224. case ISD::BRIND: {
  5225. // FIXME: Should custom lower this.
  5226. SDValue Chain = N->getOperand(0);
  5227. SDValue Target = N->getOperand(1);
  5228. unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
  5229. unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
  5230. Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
  5231. Chain), 0);
  5232. CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
  5233. return;
  5234. }
  5235. case PPCISD::TOC_ENTRY: {
  5236. const bool isPPC64 = Subtarget->isPPC64();
  5237. const bool isELFABI = Subtarget->isSVR4ABI();
  5238. const bool isAIXABI = Subtarget->isAIXABI();
  5239. // PowerPC only support small, medium and large code model.
  5240. const CodeModel::Model CModel = TM.getCodeModel();
  5241. assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
  5242. "PowerPC doesn't support tiny or kernel code models.");
  5243. if (isAIXABI && CModel == CodeModel::Medium)
  5244. report_fatal_error("Medium code model is not supported on AIX.");
  5245. // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
  5246. // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
  5247. // small code model, we need to check for a toc-data attribute.
  5248. if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
  5249. break;
  5250. auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
  5251. EVT OperandTy) {
  5252. SDValue GA = TocEntry->getOperand(0);
  5253. SDValue TocBase = TocEntry->getOperand(1);
  5254. SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
  5255. transferMemOperands(TocEntry, MN);
  5256. ReplaceNode(TocEntry, MN);
  5257. };
  5258. // Handle 32-bit small code model.
  5259. if (!isPPC64 && CModel == CodeModel::Small) {
  5260. // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
  5261. // PPC::ADDItoc, or PPC::LWZtoc
  5262. if (isELFABI) {
  5263. assert(TM.isPositionIndependent() &&
  5264. "32-bit ELF can only have TOC entries in position independent"
  5265. " code.");
  5266. // 32-bit ELF always uses a small code model toc access.
  5267. replaceWith(PPC::LWZtoc, N, MVT::i32);
  5268. return;
  5269. }
  5270. assert(isAIXABI && "ELF ABI already handled");
  5271. if (hasTocDataAttr(N->getOperand(0),
  5272. CurDAG->getDataLayout().getPointerSize())) {
  5273. replaceWith(PPC::ADDItoc, N, MVT::i32);
  5274. return;
  5275. }
  5276. replaceWith(PPC::LWZtoc, N, MVT::i32);
  5277. return;
  5278. }
  5279. if (isPPC64 && CModel == CodeModel::Small) {
  5280. assert(isAIXABI && "ELF ABI handled in common SelectCode");
  5281. if (hasTocDataAttr(N->getOperand(0),
  5282. CurDAG->getDataLayout().getPointerSize())) {
  5283. replaceWith(PPC::ADDItoc8, N, MVT::i64);
  5284. return;
  5285. }
  5286. // Break if it doesn't have toc data attribute. Proceed with common
  5287. // SelectCode.
  5288. break;
  5289. }
  5290. assert(CModel != CodeModel::Small && "All small code models handled.");
  5291. assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
  5292. " ELF/AIX or 32-bit AIX in the following.");
  5293. // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
  5294. // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
  5295. // generate two instructions as described below. The first source operand
  5296. // is a symbol reference. If it must be toc-referenced according to
  5297. // Subtarget, we generate:
  5298. // [32-bit AIX]
  5299. // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
  5300. // [64-bit ELF/AIX]
  5301. // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
  5302. // Otherwise we generate:
  5303. // ADDItocL(ADDIStocHA8(%x2, @sym), @sym)
  5304. SDValue GA = N->getOperand(0);
  5305. SDValue TOCbase = N->getOperand(1);
  5306. EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
  5307. SDNode *Tmp = CurDAG->getMachineNode(
  5308. isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
  5309. if (PPCLowering->isAccessedAsGotIndirect(GA)) {
  5310. // If it is accessed as got-indirect, we need an extra LWZ/LD to load
  5311. // the address.
  5312. SDNode *MN = CurDAG->getMachineNode(
  5313. isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
  5314. transferMemOperands(N, MN);
  5315. ReplaceNode(N, MN);
  5316. return;
  5317. }
  5318. // Build the address relative to the TOC-pointer.
  5319. ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
  5320. SDValue(Tmp, 0), GA));
  5321. return;
  5322. }
  5323. case PPCISD::PPC32_PICGOT:
  5324. // Generate a PIC-safe GOT reference.
  5325. assert(Subtarget->is32BitELFABI() &&
  5326. "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
  5327. CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
  5328. PPCLowering->getPointerTy(CurDAG->getDataLayout()),
  5329. MVT::i32);
  5330. return;
  5331. case PPCISD::VADD_SPLAT: {
  5332. // This expands into one of three sequences, depending on whether
  5333. // the first operand is odd or even, positive or negative.
  5334. assert(isa<ConstantSDNode>(N->getOperand(0)) &&
  5335. isa<ConstantSDNode>(N->getOperand(1)) &&
  5336. "Invalid operand on VADD_SPLAT!");
  5337. int Elt = N->getConstantOperandVal(0);
  5338. int EltSize = N->getConstantOperandVal(1);
  5339. unsigned Opc1, Opc2, Opc3;
  5340. EVT VT;
  5341. if (EltSize == 1) {
  5342. Opc1 = PPC::VSPLTISB;
  5343. Opc2 = PPC::VADDUBM;
  5344. Opc3 = PPC::VSUBUBM;
  5345. VT = MVT::v16i8;
  5346. } else if (EltSize == 2) {
  5347. Opc1 = PPC::VSPLTISH;
  5348. Opc2 = PPC::VADDUHM;
  5349. Opc3 = PPC::VSUBUHM;
  5350. VT = MVT::v8i16;
  5351. } else {
  5352. assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
  5353. Opc1 = PPC::VSPLTISW;
  5354. Opc2 = PPC::VADDUWM;
  5355. Opc3 = PPC::VSUBUWM;
  5356. VT = MVT::v4i32;
  5357. }
  5358. if ((Elt & 1) == 0) {
  5359. // Elt is even, in the range [-32,-18] + [16,30].
  5360. //
  5361. // Convert: VADD_SPLAT elt, size
  5362. // Into: tmp = VSPLTIS[BHW] elt
  5363. // VADDU[BHW]M tmp, tmp
  5364. // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
  5365. SDValue EltVal = getI32Imm(Elt >> 1, dl);
  5366. SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5367. SDValue TmpVal = SDValue(Tmp, 0);
  5368. ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
  5369. return;
  5370. } else if (Elt > 0) {
  5371. // Elt is odd and positive, in the range [17,31].
  5372. //
  5373. // Convert: VADD_SPLAT elt, size
  5374. // Into: tmp1 = VSPLTIS[BHW] elt-16
  5375. // tmp2 = VSPLTIS[BHW] -16
  5376. // VSUBU[BHW]M tmp1, tmp2
  5377. SDValue EltVal = getI32Imm(Elt - 16, dl);
  5378. SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5379. EltVal = getI32Imm(-16, dl);
  5380. SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5381. ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
  5382. SDValue(Tmp2, 0)));
  5383. return;
  5384. } else {
  5385. // Elt is odd and negative, in the range [-31,-17].
  5386. //
  5387. // Convert: VADD_SPLAT elt, size
  5388. // Into: tmp1 = VSPLTIS[BHW] elt+16
  5389. // tmp2 = VSPLTIS[BHW] -16
  5390. // VADDU[BHW]M tmp1, tmp2
  5391. SDValue EltVal = getI32Imm(Elt + 16, dl);
  5392. SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5393. EltVal = getI32Imm(-16, dl);
  5394. SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
  5395. ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
  5396. SDValue(Tmp2, 0)));
  5397. return;
  5398. }
  5399. }
  5400. case PPCISD::LD_SPLAT: {
  5401. // Here we want to handle splat load for type v16i8 and v8i16 when there is
  5402. // no direct move, we don't need to use stack for this case. If target has
  5403. // direct move, we should be able to get the best selection in the .td file.
  5404. if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
  5405. break;
  5406. EVT Type = N->getValueType(0);
  5407. if (Type != MVT::v16i8 && Type != MVT::v8i16)
  5408. break;
  5409. // If the alignment for the load is 16 or bigger, we don't need the
  5410. // permutated mask to get the required value. The value must be the 0
  5411. // element in big endian target or 7/15 in little endian target in the
  5412. // result vsx register of lvx instruction.
  5413. // Select the instruction in the .td file.
  5414. if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
  5415. isOffsetMultipleOf(N, 16))
  5416. break;
  5417. SDValue ZeroReg =
  5418. CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
  5419. Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
  5420. unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
  5421. // v16i8 LD_SPLAT addr
  5422. // ======>
  5423. // Mask = LVSR/LVSL 0, addr
  5424. // LoadLow = LVX 0, addr
  5425. // Perm = VPERM LoadLow, LoadLow, Mask
  5426. // Splat = VSPLTB 15/0, Perm
  5427. //
  5428. // v8i16 LD_SPLAT addr
  5429. // ======>
  5430. // Mask = LVSR/LVSL 0, addr
  5431. // LoadLow = LVX 0, addr
  5432. // LoadHigh = LVX (LI, 1), addr
  5433. // Perm = VPERM LoadLow, LoadHigh, Mask
  5434. // Splat = VSPLTH 7/0, Perm
  5435. unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
  5436. unsigned SplatElemIndex =
  5437. Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
  5438. SDNode *Mask = CurDAG->getMachineNode(
  5439. Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
  5440. N->getOperand(1));
  5441. SDNode *LoadLow =
  5442. CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
  5443. {ZeroReg, N->getOperand(1), N->getOperand(0)});
  5444. SDNode *LoadHigh = LoadLow;
  5445. if (Type == MVT::v8i16) {
  5446. LoadHigh = CurDAG->getMachineNode(
  5447. PPC::LVX, dl, MVT::v16i8, MVT::Other,
  5448. {SDValue(CurDAG->getMachineNode(
  5449. LIOpcode, dl, MVT::i32,
  5450. CurDAG->getTargetConstant(1, dl, MVT::i8)),
  5451. 0),
  5452. N->getOperand(1), SDValue(LoadLow, 1)});
  5453. }
  5454. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
  5455. transferMemOperands(N, LoadHigh);
  5456. SDNode *Perm =
  5457. CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
  5458. SDValue(LoadHigh, 0), SDValue(Mask, 0));
  5459. CurDAG->SelectNodeTo(N, SplatOp, Type,
  5460. CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
  5461. SDValue(Perm, 0));
  5462. return;
  5463. }
  5464. }
  5465. SelectCode(N);
  5466. }
  5467. // If the target supports the cmpb instruction, do the idiom recognition here.
  5468. // We don't do this as a DAG combine because we don't want to do it as nodes
  5469. // are being combined (because we might miss part of the eventual idiom). We
  5470. // don't want to do it during instruction selection because we want to reuse
  5471. // the logic for lowering the masking operations already part of the
  5472. // instruction selector.
  5473. SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
  5474. SDLoc dl(N);
  5475. assert(N->getOpcode() == ISD::OR &&
  5476. "Only OR nodes are supported for CMPB");
  5477. SDValue Res;
  5478. if (!Subtarget->hasCMPB())
  5479. return Res;
  5480. if (N->getValueType(0) != MVT::i32 &&
  5481. N->getValueType(0) != MVT::i64)
  5482. return Res;
  5483. EVT VT = N->getValueType(0);
  5484. SDValue RHS, LHS;
  5485. bool BytesFound[8] = {false, false, false, false, false, false, false, false};
  5486. uint64_t Mask = 0, Alt = 0;
  5487. auto IsByteSelectCC = [this](SDValue O, unsigned &b,
  5488. uint64_t &Mask, uint64_t &Alt,
  5489. SDValue &LHS, SDValue &RHS) {
  5490. if (O.getOpcode() != ISD::SELECT_CC)
  5491. return false;
  5492. ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
  5493. if (!isa<ConstantSDNode>(O.getOperand(2)) ||
  5494. !isa<ConstantSDNode>(O.getOperand(3)))
  5495. return false;
  5496. uint64_t PM = O.getConstantOperandVal(2);
  5497. uint64_t PAlt = O.getConstantOperandVal(3);
  5498. for (b = 0; b < 8; ++b) {
  5499. uint64_t Mask = UINT64_C(0xFF) << (8*b);
  5500. if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
  5501. break;
  5502. }
  5503. if (b == 8)
  5504. return false;
  5505. Mask |= PM;
  5506. Alt |= PAlt;
  5507. if (!isa<ConstantSDNode>(O.getOperand(1)) ||
  5508. O.getConstantOperandVal(1) != 0) {
  5509. SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
  5510. if (Op0.getOpcode() == ISD::TRUNCATE)
  5511. Op0 = Op0.getOperand(0);
  5512. if (Op1.getOpcode() == ISD::TRUNCATE)
  5513. Op1 = Op1.getOperand(0);
  5514. if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
  5515. Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
  5516. isa<ConstantSDNode>(Op0.getOperand(1))) {
  5517. unsigned Bits = Op0.getValueSizeInBits();
  5518. if (b != Bits/8-1)
  5519. return false;
  5520. if (Op0.getConstantOperandVal(1) != Bits-8)
  5521. return false;
  5522. LHS = Op0.getOperand(0);
  5523. RHS = Op1.getOperand(0);
  5524. return true;
  5525. }
  5526. // When we have small integers (i16 to be specific), the form present
  5527. // post-legalization uses SETULT in the SELECT_CC for the
  5528. // higher-order byte, depending on the fact that the
  5529. // even-higher-order bytes are known to all be zero, for example:
  5530. // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
  5531. // (so when the second byte is the same, because all higher-order
  5532. // bits from bytes 3 and 4 are known to be zero, the result of the
  5533. // xor can be at most 255)
  5534. if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
  5535. isa<ConstantSDNode>(O.getOperand(1))) {
  5536. uint64_t ULim = O.getConstantOperandVal(1);
  5537. if (ULim != (UINT64_C(1) << b*8))
  5538. return false;
  5539. // Now we need to make sure that the upper bytes are known to be
  5540. // zero.
  5541. unsigned Bits = Op0.getValueSizeInBits();
  5542. if (!CurDAG->MaskedValueIsZero(
  5543. Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
  5544. return false;
  5545. LHS = Op0.getOperand(0);
  5546. RHS = Op0.getOperand(1);
  5547. return true;
  5548. }
  5549. return false;
  5550. }
  5551. if (CC != ISD::SETEQ)
  5552. return false;
  5553. SDValue Op = O.getOperand(0);
  5554. if (Op.getOpcode() == ISD::AND) {
  5555. if (!isa<ConstantSDNode>(Op.getOperand(1)))
  5556. return false;
  5557. if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
  5558. return false;
  5559. SDValue XOR = Op.getOperand(0);
  5560. if (XOR.getOpcode() == ISD::TRUNCATE)
  5561. XOR = XOR.getOperand(0);
  5562. if (XOR.getOpcode() != ISD::XOR)
  5563. return false;
  5564. LHS = XOR.getOperand(0);
  5565. RHS = XOR.getOperand(1);
  5566. return true;
  5567. } else if (Op.getOpcode() == ISD::SRL) {
  5568. if (!isa<ConstantSDNode>(Op.getOperand(1)))
  5569. return false;
  5570. unsigned Bits = Op.getValueSizeInBits();
  5571. if (b != Bits/8-1)
  5572. return false;
  5573. if (Op.getConstantOperandVal(1) != Bits-8)
  5574. return false;
  5575. SDValue XOR = Op.getOperand(0);
  5576. if (XOR.getOpcode() == ISD::TRUNCATE)
  5577. XOR = XOR.getOperand(0);
  5578. if (XOR.getOpcode() != ISD::XOR)
  5579. return false;
  5580. LHS = XOR.getOperand(0);
  5581. RHS = XOR.getOperand(1);
  5582. return true;
  5583. }
  5584. return false;
  5585. };
  5586. SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
  5587. while (!Queue.empty()) {
  5588. SDValue V = Queue.pop_back_val();
  5589. for (const SDValue &O : V.getNode()->ops()) {
  5590. unsigned b = 0;
  5591. uint64_t M = 0, A = 0;
  5592. SDValue OLHS, ORHS;
  5593. if (O.getOpcode() == ISD::OR) {
  5594. Queue.push_back(O);
  5595. } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
  5596. if (!LHS) {
  5597. LHS = OLHS;
  5598. RHS = ORHS;
  5599. BytesFound[b] = true;
  5600. Mask |= M;
  5601. Alt |= A;
  5602. } else if ((LHS == ORHS && RHS == OLHS) ||
  5603. (RHS == ORHS && LHS == OLHS)) {
  5604. BytesFound[b] = true;
  5605. Mask |= M;
  5606. Alt |= A;
  5607. } else {
  5608. return Res;
  5609. }
  5610. } else {
  5611. return Res;
  5612. }
  5613. }
  5614. }
  5615. unsigned LastB = 0, BCnt = 0;
  5616. for (unsigned i = 0; i < 8; ++i)
  5617. if (BytesFound[LastB]) {
  5618. ++BCnt;
  5619. LastB = i;
  5620. }
  5621. if (!LastB || BCnt < 2)
  5622. return Res;
  5623. // Because we'll be zero-extending the output anyway if don't have a specific
  5624. // value for each input byte (via the Mask), we can 'anyext' the inputs.
  5625. if (LHS.getValueType() != VT) {
  5626. LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
  5627. RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
  5628. }
  5629. Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
  5630. bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
  5631. if (NonTrivialMask && !Alt) {
  5632. // Res = Mask & CMPB
  5633. Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
  5634. CurDAG->getConstant(Mask, dl, VT));
  5635. } else if (Alt) {
  5636. // Res = (CMPB & Mask) | (~CMPB & Alt)
  5637. // Which, as suggested here:
  5638. // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
  5639. // can be written as:
  5640. // Res = Alt ^ ((Alt ^ Mask) & CMPB)
  5641. // useful because the (Alt ^ Mask) can be pre-computed.
  5642. Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
  5643. CurDAG->getConstant(Mask ^ Alt, dl, VT));
  5644. Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
  5645. CurDAG->getConstant(Alt, dl, VT));
  5646. }
  5647. return Res;
  5648. }
  5649. // When CR bit registers are enabled, an extension of an i1 variable to a i32
  5650. // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
  5651. // involves constant materialization of a 0 or a 1 or both. If the result of
  5652. // the extension is then operated upon by some operator that can be constant
  5653. // folded with a constant 0 or 1, and that constant can be materialized using
  5654. // only one instruction (like a zero or one), then we should fold in those
  5655. // operations with the select.
  5656. void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
  5657. if (!Subtarget->useCRBits())
  5658. return;
  5659. if (N->getOpcode() != ISD::ZERO_EXTEND &&
  5660. N->getOpcode() != ISD::SIGN_EXTEND &&
  5661. N->getOpcode() != ISD::ANY_EXTEND)
  5662. return;
  5663. if (N->getOperand(0).getValueType() != MVT::i1)
  5664. return;
  5665. if (!N->hasOneUse())
  5666. return;
  5667. SDLoc dl(N);
  5668. EVT VT = N->getValueType(0);
  5669. SDValue Cond = N->getOperand(0);
  5670. SDValue ConstTrue =
  5671. CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
  5672. SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
  5673. do {
  5674. SDNode *User = *N->use_begin();
  5675. if (User->getNumOperands() != 2)
  5676. break;
  5677. auto TryFold = [this, N, User, dl](SDValue Val) {
  5678. SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
  5679. SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
  5680. SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
  5681. return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
  5682. User->getValueType(0), {O0, O1});
  5683. };
  5684. // FIXME: When the semantics of the interaction between select and undef
  5685. // are clearly defined, it may turn out to be unnecessary to break here.
  5686. SDValue TrueRes = TryFold(ConstTrue);
  5687. if (!TrueRes || TrueRes.isUndef())
  5688. break;
  5689. SDValue FalseRes = TryFold(ConstFalse);
  5690. if (!FalseRes || FalseRes.isUndef())
  5691. break;
  5692. // For us to materialize these using one instruction, we must be able to
  5693. // represent them as signed 16-bit integers.
  5694. uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
  5695. False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
  5696. if (!isInt<16>(True) || !isInt<16>(False))
  5697. break;
  5698. // We can replace User with a new SELECT node, and try again to see if we
  5699. // can fold the select with its user.
  5700. Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
  5701. N = User;
  5702. ConstTrue = TrueRes;
  5703. ConstFalse = FalseRes;
  5704. } while (N->hasOneUse());
  5705. }
  5706. void PPCDAGToDAGISel::PreprocessISelDAG() {
  5707. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  5708. bool MadeChange = false;
  5709. while (Position != CurDAG->allnodes_begin()) {
  5710. SDNode *N = &*--Position;
  5711. if (N->use_empty())
  5712. continue;
  5713. SDValue Res;
  5714. switch (N->getOpcode()) {
  5715. default: break;
  5716. case ISD::OR:
  5717. Res = combineToCMPB(N);
  5718. break;
  5719. }
  5720. if (!Res)
  5721. foldBoolExts(Res, N);
  5722. if (Res) {
  5723. LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
  5724. LLVM_DEBUG(N->dump(CurDAG));
  5725. LLVM_DEBUG(dbgs() << "\nNew: ");
  5726. LLVM_DEBUG(Res.getNode()->dump(CurDAG));
  5727. LLVM_DEBUG(dbgs() << "\n");
  5728. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
  5729. MadeChange = true;
  5730. }
  5731. }
  5732. if (MadeChange)
  5733. CurDAG->RemoveDeadNodes();
  5734. }
  5735. /// PostprocessISelDAG - Perform some late peephole optimizations
  5736. /// on the DAG representation.
  5737. void PPCDAGToDAGISel::PostprocessISelDAG() {
  5738. // Skip peepholes at -O0.
  5739. if (TM.getOptLevel() == CodeGenOpt::None)
  5740. return;
  5741. PeepholePPC64();
  5742. PeepholeCROps();
  5743. PeepholePPC64ZExt();
  5744. }
  5745. // Check if all users of this node will become isel where the second operand
  5746. // is the constant zero. If this is so, and if we can negate the condition,
  5747. // then we can flip the true and false operands. This will allow the zero to
  5748. // be folded with the isel so that we don't need to materialize a register
  5749. // containing zero.
  5750. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
  5751. for (const SDNode *User : N->uses()) {
  5752. if (!User->isMachineOpcode())
  5753. return false;
  5754. if (User->getMachineOpcode() != PPC::SELECT_I4 &&
  5755. User->getMachineOpcode() != PPC::SELECT_I8)
  5756. return false;
  5757. SDNode *Op1 = User->getOperand(1).getNode();
  5758. SDNode *Op2 = User->getOperand(2).getNode();
  5759. // If we have a degenerate select with two equal operands, swapping will
  5760. // not do anything, and we may run into an infinite loop.
  5761. if (Op1 == Op2)
  5762. return false;
  5763. if (!Op2->isMachineOpcode())
  5764. return false;
  5765. if (Op2->getMachineOpcode() != PPC::LI &&
  5766. Op2->getMachineOpcode() != PPC::LI8)
  5767. return false;
  5768. ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
  5769. if (!C)
  5770. return false;
  5771. if (!C->isZero())
  5772. return false;
  5773. }
  5774. return true;
  5775. }
  5776. void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
  5777. SmallVector<SDNode *, 4> ToReplace;
  5778. for (SDNode *User : N->uses()) {
  5779. assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
  5780. User->getMachineOpcode() == PPC::SELECT_I8) &&
  5781. "Must have all select users");
  5782. ToReplace.push_back(User);
  5783. }
  5784. for (SDNode *User : ToReplace) {
  5785. SDNode *ResNode =
  5786. CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
  5787. User->getValueType(0), User->getOperand(0),
  5788. User->getOperand(2),
  5789. User->getOperand(1));
  5790. LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
  5791. LLVM_DEBUG(User->dump(CurDAG));
  5792. LLVM_DEBUG(dbgs() << "\nNew: ");
  5793. LLVM_DEBUG(ResNode->dump(CurDAG));
  5794. LLVM_DEBUG(dbgs() << "\n");
  5795. ReplaceUses(User, ResNode);
  5796. }
  5797. }
  5798. void PPCDAGToDAGISel::PeepholeCROps() {
  5799. bool IsModified;
  5800. do {
  5801. IsModified = false;
  5802. for (SDNode &Node : CurDAG->allnodes()) {
  5803. MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
  5804. if (!MachineNode || MachineNode->use_empty())
  5805. continue;
  5806. SDNode *ResNode = MachineNode;
  5807. bool Op1Set = false, Op1Unset = false,
  5808. Op1Not = false,
  5809. Op2Set = false, Op2Unset = false,
  5810. Op2Not = false;
  5811. unsigned Opcode = MachineNode->getMachineOpcode();
  5812. switch (Opcode) {
  5813. default: break;
  5814. case PPC::CRAND:
  5815. case PPC::CRNAND:
  5816. case PPC::CROR:
  5817. case PPC::CRXOR:
  5818. case PPC::CRNOR:
  5819. case PPC::CREQV:
  5820. case PPC::CRANDC:
  5821. case PPC::CRORC: {
  5822. SDValue Op = MachineNode->getOperand(1);
  5823. if (Op.isMachineOpcode()) {
  5824. if (Op.getMachineOpcode() == PPC::CRSET)
  5825. Op2Set = true;
  5826. else if (Op.getMachineOpcode() == PPC::CRUNSET)
  5827. Op2Unset = true;
  5828. else if (Op.getMachineOpcode() == PPC::CRNOR &&
  5829. Op.getOperand(0) == Op.getOperand(1))
  5830. Op2Not = true;
  5831. }
  5832. LLVM_FALLTHROUGH;
  5833. }
  5834. case PPC::BC:
  5835. case PPC::BCn:
  5836. case PPC::SELECT_I4:
  5837. case PPC::SELECT_I8:
  5838. case PPC::SELECT_F4:
  5839. case PPC::SELECT_F8:
  5840. case PPC::SELECT_SPE:
  5841. case PPC::SELECT_SPE4:
  5842. case PPC::SELECT_VRRC:
  5843. case PPC::SELECT_VSFRC:
  5844. case PPC::SELECT_VSSRC:
  5845. case PPC::SELECT_VSRC: {
  5846. SDValue Op = MachineNode->getOperand(0);
  5847. if (Op.isMachineOpcode()) {
  5848. if (Op.getMachineOpcode() == PPC::CRSET)
  5849. Op1Set = true;
  5850. else if (Op.getMachineOpcode() == PPC::CRUNSET)
  5851. Op1Unset = true;
  5852. else if (Op.getMachineOpcode() == PPC::CRNOR &&
  5853. Op.getOperand(0) == Op.getOperand(1))
  5854. Op1Not = true;
  5855. }
  5856. }
  5857. break;
  5858. }
  5859. bool SelectSwap = false;
  5860. switch (Opcode) {
  5861. default: break;
  5862. case PPC::CRAND:
  5863. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5864. // x & x = x
  5865. ResNode = MachineNode->getOperand(0).getNode();
  5866. else if (Op1Set)
  5867. // 1 & y = y
  5868. ResNode = MachineNode->getOperand(1).getNode();
  5869. else if (Op2Set)
  5870. // x & 1 = x
  5871. ResNode = MachineNode->getOperand(0).getNode();
  5872. else if (Op1Unset || Op2Unset)
  5873. // x & 0 = 0 & y = 0
  5874. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5875. MVT::i1);
  5876. else if (Op1Not)
  5877. // ~x & y = andc(y, x)
  5878. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5879. MVT::i1, MachineNode->getOperand(1),
  5880. MachineNode->getOperand(0).
  5881. getOperand(0));
  5882. else if (Op2Not)
  5883. // x & ~y = andc(x, y)
  5884. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  5885. MVT::i1, MachineNode->getOperand(0),
  5886. MachineNode->getOperand(1).
  5887. getOperand(0));
  5888. else if (AllUsersSelectZero(MachineNode)) {
  5889. ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
  5890. MVT::i1, MachineNode->getOperand(0),
  5891. MachineNode->getOperand(1));
  5892. SelectSwap = true;
  5893. }
  5894. break;
  5895. case PPC::CRNAND:
  5896. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5897. // nand(x, x) -> nor(x, x)
  5898. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5899. MVT::i1, MachineNode->getOperand(0),
  5900. MachineNode->getOperand(0));
  5901. else if (Op1Set)
  5902. // nand(1, y) -> nor(y, y)
  5903. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5904. MVT::i1, MachineNode->getOperand(1),
  5905. MachineNode->getOperand(1));
  5906. else if (Op2Set)
  5907. // nand(x, 1) -> nor(x, x)
  5908. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5909. MVT::i1, MachineNode->getOperand(0),
  5910. MachineNode->getOperand(0));
  5911. else if (Op1Unset || Op2Unset)
  5912. // nand(x, 0) = nand(0, y) = 1
  5913. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  5914. MVT::i1);
  5915. else if (Op1Not)
  5916. // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
  5917. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5918. MVT::i1, MachineNode->getOperand(0).
  5919. getOperand(0),
  5920. MachineNode->getOperand(1));
  5921. else if (Op2Not)
  5922. // nand(x, ~y) = ~x | y = orc(y, x)
  5923. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5924. MVT::i1, MachineNode->getOperand(1).
  5925. getOperand(0),
  5926. MachineNode->getOperand(0));
  5927. else if (AllUsersSelectZero(MachineNode)) {
  5928. ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
  5929. MVT::i1, MachineNode->getOperand(0),
  5930. MachineNode->getOperand(1));
  5931. SelectSwap = true;
  5932. }
  5933. break;
  5934. case PPC::CROR:
  5935. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5936. // x | x = x
  5937. ResNode = MachineNode->getOperand(0).getNode();
  5938. else if (Op1Set || Op2Set)
  5939. // x | 1 = 1 | y = 1
  5940. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  5941. MVT::i1);
  5942. else if (Op1Unset)
  5943. // 0 | y = y
  5944. ResNode = MachineNode->getOperand(1).getNode();
  5945. else if (Op2Unset)
  5946. // x | 0 = x
  5947. ResNode = MachineNode->getOperand(0).getNode();
  5948. else if (Op1Not)
  5949. // ~x | y = orc(y, x)
  5950. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5951. MVT::i1, MachineNode->getOperand(1),
  5952. MachineNode->getOperand(0).
  5953. getOperand(0));
  5954. else if (Op2Not)
  5955. // x | ~y = orc(x, y)
  5956. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  5957. MVT::i1, MachineNode->getOperand(0),
  5958. MachineNode->getOperand(1).
  5959. getOperand(0));
  5960. else if (AllUsersSelectZero(MachineNode)) {
  5961. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5962. MVT::i1, MachineNode->getOperand(0),
  5963. MachineNode->getOperand(1));
  5964. SelectSwap = true;
  5965. }
  5966. break;
  5967. case PPC::CRXOR:
  5968. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  5969. // xor(x, x) = 0
  5970. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  5971. MVT::i1);
  5972. else if (Op1Set)
  5973. // xor(1, y) -> nor(y, y)
  5974. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5975. MVT::i1, MachineNode->getOperand(1),
  5976. MachineNode->getOperand(1));
  5977. else if (Op2Set)
  5978. // xor(x, 1) -> nor(x, x)
  5979. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  5980. MVT::i1, MachineNode->getOperand(0),
  5981. MachineNode->getOperand(0));
  5982. else if (Op1Unset)
  5983. // xor(0, y) = y
  5984. ResNode = MachineNode->getOperand(1).getNode();
  5985. else if (Op2Unset)
  5986. // xor(x, 0) = x
  5987. ResNode = MachineNode->getOperand(0).getNode();
  5988. else if (Op1Not)
  5989. // xor(~x, y) = eqv(x, y)
  5990. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  5991. MVT::i1, MachineNode->getOperand(0).
  5992. getOperand(0),
  5993. MachineNode->getOperand(1));
  5994. else if (Op2Not)
  5995. // xor(x, ~y) = eqv(x, y)
  5996. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  5997. MVT::i1, MachineNode->getOperand(0),
  5998. MachineNode->getOperand(1).
  5999. getOperand(0));
  6000. else if (AllUsersSelectZero(MachineNode)) {
  6001. ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
  6002. MVT::i1, MachineNode->getOperand(0),
  6003. MachineNode->getOperand(1));
  6004. SelectSwap = true;
  6005. }
  6006. break;
  6007. case PPC::CRNOR:
  6008. if (Op1Set || Op2Set)
  6009. // nor(1, y) -> 0
  6010. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  6011. MVT::i1);
  6012. else if (Op1Unset)
  6013. // nor(0, y) = ~y -> nor(y, y)
  6014. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6015. MVT::i1, MachineNode->getOperand(1),
  6016. MachineNode->getOperand(1));
  6017. else if (Op2Unset)
  6018. // nor(x, 0) = ~x
  6019. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6020. MVT::i1, MachineNode->getOperand(0),
  6021. MachineNode->getOperand(0));
  6022. else if (Op1Not)
  6023. // nor(~x, y) = andc(x, y)
  6024. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  6025. MVT::i1, MachineNode->getOperand(0).
  6026. getOperand(0),
  6027. MachineNode->getOperand(1));
  6028. else if (Op2Not)
  6029. // nor(x, ~y) = andc(y, x)
  6030. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  6031. MVT::i1, MachineNode->getOperand(1).
  6032. getOperand(0),
  6033. MachineNode->getOperand(0));
  6034. else if (AllUsersSelectZero(MachineNode)) {
  6035. ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
  6036. MVT::i1, MachineNode->getOperand(0),
  6037. MachineNode->getOperand(1));
  6038. SelectSwap = true;
  6039. }
  6040. break;
  6041. case PPC::CREQV:
  6042. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6043. // eqv(x, x) = 1
  6044. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6045. MVT::i1);
  6046. else if (Op1Set)
  6047. // eqv(1, y) = y
  6048. ResNode = MachineNode->getOperand(1).getNode();
  6049. else if (Op2Set)
  6050. // eqv(x, 1) = x
  6051. ResNode = MachineNode->getOperand(0).getNode();
  6052. else if (Op1Unset)
  6053. // eqv(0, y) = ~y -> nor(y, y)
  6054. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6055. MVT::i1, MachineNode->getOperand(1),
  6056. MachineNode->getOperand(1));
  6057. else if (Op2Unset)
  6058. // eqv(x, 0) = ~x
  6059. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6060. MVT::i1, MachineNode->getOperand(0),
  6061. MachineNode->getOperand(0));
  6062. else if (Op1Not)
  6063. // eqv(~x, y) = xor(x, y)
  6064. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  6065. MVT::i1, MachineNode->getOperand(0).
  6066. getOperand(0),
  6067. MachineNode->getOperand(1));
  6068. else if (Op2Not)
  6069. // eqv(x, ~y) = xor(x, y)
  6070. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  6071. MVT::i1, MachineNode->getOperand(0),
  6072. MachineNode->getOperand(1).
  6073. getOperand(0));
  6074. else if (AllUsersSelectZero(MachineNode)) {
  6075. ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
  6076. MVT::i1, MachineNode->getOperand(0),
  6077. MachineNode->getOperand(1));
  6078. SelectSwap = true;
  6079. }
  6080. break;
  6081. case PPC::CRANDC:
  6082. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6083. // andc(x, x) = 0
  6084. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  6085. MVT::i1);
  6086. else if (Op1Set)
  6087. // andc(1, y) = ~y
  6088. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6089. MVT::i1, MachineNode->getOperand(1),
  6090. MachineNode->getOperand(1));
  6091. else if (Op1Unset || Op2Set)
  6092. // andc(0, y) = andc(x, 1) = 0
  6093. ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
  6094. MVT::i1);
  6095. else if (Op2Unset)
  6096. // andc(x, 0) = x
  6097. ResNode = MachineNode->getOperand(0).getNode();
  6098. else if (Op1Not)
  6099. // andc(~x, y) = ~(x | y) = nor(x, y)
  6100. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6101. MVT::i1, MachineNode->getOperand(0).
  6102. getOperand(0),
  6103. MachineNode->getOperand(1));
  6104. else if (Op2Not)
  6105. // andc(x, ~y) = x & y
  6106. ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
  6107. MVT::i1, MachineNode->getOperand(0),
  6108. MachineNode->getOperand(1).
  6109. getOperand(0));
  6110. else if (AllUsersSelectZero(MachineNode)) {
  6111. ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
  6112. MVT::i1, MachineNode->getOperand(1),
  6113. MachineNode->getOperand(0));
  6114. SelectSwap = true;
  6115. }
  6116. break;
  6117. case PPC::CRORC:
  6118. if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
  6119. // orc(x, x) = 1
  6120. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6121. MVT::i1);
  6122. else if (Op1Set || Op2Unset)
  6123. // orc(1, y) = orc(x, 0) = 1
  6124. ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
  6125. MVT::i1);
  6126. else if (Op2Set)
  6127. // orc(x, 1) = x
  6128. ResNode = MachineNode->getOperand(0).getNode();
  6129. else if (Op1Unset)
  6130. // orc(0, y) = ~y
  6131. ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
  6132. MVT::i1, MachineNode->getOperand(1),
  6133. MachineNode->getOperand(1));
  6134. else if (Op1Not)
  6135. // orc(~x, y) = ~(x & y) = nand(x, y)
  6136. ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
  6137. MVT::i1, MachineNode->getOperand(0).
  6138. getOperand(0),
  6139. MachineNode->getOperand(1));
  6140. else if (Op2Not)
  6141. // orc(x, ~y) = x | y
  6142. ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
  6143. MVT::i1, MachineNode->getOperand(0),
  6144. MachineNode->getOperand(1).
  6145. getOperand(0));
  6146. else if (AllUsersSelectZero(MachineNode)) {
  6147. ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
  6148. MVT::i1, MachineNode->getOperand(1),
  6149. MachineNode->getOperand(0));
  6150. SelectSwap = true;
  6151. }
  6152. break;
  6153. case PPC::SELECT_I4:
  6154. case PPC::SELECT_I8:
  6155. case PPC::SELECT_F4:
  6156. case PPC::SELECT_F8:
  6157. case PPC::SELECT_SPE:
  6158. case PPC::SELECT_SPE4:
  6159. case PPC::SELECT_VRRC:
  6160. case PPC::SELECT_VSFRC:
  6161. case PPC::SELECT_VSSRC:
  6162. case PPC::SELECT_VSRC:
  6163. if (Op1Set)
  6164. ResNode = MachineNode->getOperand(1).getNode();
  6165. else if (Op1Unset)
  6166. ResNode = MachineNode->getOperand(2).getNode();
  6167. else if (Op1Not)
  6168. ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
  6169. SDLoc(MachineNode),
  6170. MachineNode->getValueType(0),
  6171. MachineNode->getOperand(0).
  6172. getOperand(0),
  6173. MachineNode->getOperand(2),
  6174. MachineNode->getOperand(1));
  6175. break;
  6176. case PPC::BC:
  6177. case PPC::BCn:
  6178. if (Op1Not)
  6179. ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
  6180. PPC::BC,
  6181. SDLoc(MachineNode),
  6182. MVT::Other,
  6183. MachineNode->getOperand(0).
  6184. getOperand(0),
  6185. MachineNode->getOperand(1),
  6186. MachineNode->getOperand(2));
  6187. // FIXME: Handle Op1Set, Op1Unset here too.
  6188. break;
  6189. }
  6190. // If we're inverting this node because it is used only by selects that
  6191. // we'd like to swap, then swap the selects before the node replacement.
  6192. if (SelectSwap)
  6193. SwapAllSelectUsers(MachineNode);
  6194. if (ResNode != MachineNode) {
  6195. LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
  6196. LLVM_DEBUG(MachineNode->dump(CurDAG));
  6197. LLVM_DEBUG(dbgs() << "\nNew: ");
  6198. LLVM_DEBUG(ResNode->dump(CurDAG));
  6199. LLVM_DEBUG(dbgs() << "\n");
  6200. ReplaceUses(MachineNode, ResNode);
  6201. IsModified = true;
  6202. }
  6203. }
  6204. if (IsModified)
  6205. CurDAG->RemoveDeadNodes();
  6206. } while (IsModified);
  6207. }
  6208. // Gather the set of 32-bit operations that are known to have their
  6209. // higher-order 32 bits zero, where ToPromote contains all such operations.
  6210. static bool PeepholePPC64ZExtGather(SDValue Op32,
  6211. SmallPtrSetImpl<SDNode *> &ToPromote) {
  6212. if (!Op32.isMachineOpcode())
  6213. return false;
  6214. // First, check for the "frontier" instructions (those that will clear the
  6215. // higher-order 32 bits.
  6216. // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
  6217. // around. If it does not, then these instructions will clear the
  6218. // higher-order bits.
  6219. if ((Op32.getMachineOpcode() == PPC::RLWINM ||
  6220. Op32.getMachineOpcode() == PPC::RLWNM) &&
  6221. Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
  6222. ToPromote.insert(Op32.getNode());
  6223. return true;
  6224. }
  6225. // SLW and SRW always clear the higher-order bits.
  6226. if (Op32.getMachineOpcode() == PPC::SLW ||
  6227. Op32.getMachineOpcode() == PPC::SRW) {
  6228. ToPromote.insert(Op32.getNode());
  6229. return true;
  6230. }
  6231. // For LI and LIS, we need the immediate to be positive (so that it is not
  6232. // sign extended).
  6233. if (Op32.getMachineOpcode() == PPC::LI ||
  6234. Op32.getMachineOpcode() == PPC::LIS) {
  6235. if (!isUInt<15>(Op32.getConstantOperandVal(0)))
  6236. return false;
  6237. ToPromote.insert(Op32.getNode());
  6238. return true;
  6239. }
  6240. // LHBRX and LWBRX always clear the higher-order bits.
  6241. if (Op32.getMachineOpcode() == PPC::LHBRX ||
  6242. Op32.getMachineOpcode() == PPC::LWBRX) {
  6243. ToPromote.insert(Op32.getNode());
  6244. return true;
  6245. }
  6246. // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
  6247. if (Op32.getMachineOpcode() == PPC::CNTLZW ||
  6248. Op32.getMachineOpcode() == PPC::CNTTZW) {
  6249. ToPromote.insert(Op32.getNode());
  6250. return true;
  6251. }
  6252. // Next, check for those instructions we can look through.
  6253. // Assuming the mask does not wrap around, then the higher-order bits are
  6254. // taken directly from the first operand.
  6255. if (Op32.getMachineOpcode() == PPC::RLWIMI &&
  6256. Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
  6257. SmallPtrSet<SDNode *, 16> ToPromote1;
  6258. if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
  6259. return false;
  6260. ToPromote.insert(Op32.getNode());
  6261. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6262. return true;
  6263. }
  6264. // For OR, the higher-order bits are zero if that is true for both operands.
  6265. // For SELECT_I4, the same is true (but the relevant operand numbers are
  6266. // shifted by 1).
  6267. if (Op32.getMachineOpcode() == PPC::OR ||
  6268. Op32.getMachineOpcode() == PPC::SELECT_I4) {
  6269. unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
  6270. SmallPtrSet<SDNode *, 16> ToPromote1;
  6271. if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
  6272. return false;
  6273. if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
  6274. return false;
  6275. ToPromote.insert(Op32.getNode());
  6276. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6277. return true;
  6278. }
  6279. // For ORI and ORIS, we need the higher-order bits of the first operand to be
  6280. // zero, and also for the constant to be positive (so that it is not sign
  6281. // extended).
  6282. if (Op32.getMachineOpcode() == PPC::ORI ||
  6283. Op32.getMachineOpcode() == PPC::ORIS) {
  6284. SmallPtrSet<SDNode *, 16> ToPromote1;
  6285. if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
  6286. return false;
  6287. if (!isUInt<15>(Op32.getConstantOperandVal(1)))
  6288. return false;
  6289. ToPromote.insert(Op32.getNode());
  6290. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6291. return true;
  6292. }
  6293. // The higher-order bits of AND are zero if that is true for at least one of
  6294. // the operands.
  6295. if (Op32.getMachineOpcode() == PPC::AND) {
  6296. SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
  6297. bool Op0OK =
  6298. PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
  6299. bool Op1OK =
  6300. PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
  6301. if (!Op0OK && !Op1OK)
  6302. return false;
  6303. ToPromote.insert(Op32.getNode());
  6304. if (Op0OK)
  6305. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6306. if (Op1OK)
  6307. ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
  6308. return true;
  6309. }
  6310. // For ANDI and ANDIS, the higher-order bits are zero if either that is true
  6311. // of the first operand, or if the second operand is positive (so that it is
  6312. // not sign extended).
  6313. if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
  6314. Op32.getMachineOpcode() == PPC::ANDIS_rec) {
  6315. SmallPtrSet<SDNode *, 16> ToPromote1;
  6316. bool Op0OK =
  6317. PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
  6318. bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
  6319. if (!Op0OK && !Op1OK)
  6320. return false;
  6321. ToPromote.insert(Op32.getNode());
  6322. if (Op0OK)
  6323. ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
  6324. return true;
  6325. }
  6326. return false;
  6327. }
  6328. void PPCDAGToDAGISel::PeepholePPC64ZExt() {
  6329. if (!Subtarget->isPPC64())
  6330. return;
  6331. // When we zero-extend from i32 to i64, we use a pattern like this:
  6332. // def : Pat<(i64 (zext i32:$in)),
  6333. // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
  6334. // 0, 32)>;
  6335. // There are several 32-bit shift/rotate instructions, however, that will
  6336. // clear the higher-order bits of their output, rendering the RLDICL
  6337. // unnecessary. When that happens, we remove it here, and redefine the
  6338. // relevant 32-bit operation to be a 64-bit operation.
  6339. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  6340. bool MadeChange = false;
  6341. while (Position != CurDAG->allnodes_begin()) {
  6342. SDNode *N = &*--Position;
  6343. // Skip dead nodes and any non-machine opcodes.
  6344. if (N->use_empty() || !N->isMachineOpcode())
  6345. continue;
  6346. if (N->getMachineOpcode() != PPC::RLDICL)
  6347. continue;
  6348. if (N->getConstantOperandVal(1) != 0 ||
  6349. N->getConstantOperandVal(2) != 32)
  6350. continue;
  6351. SDValue ISR = N->getOperand(0);
  6352. if (!ISR.isMachineOpcode() ||
  6353. ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
  6354. continue;
  6355. if (!ISR.hasOneUse())
  6356. continue;
  6357. if (ISR.getConstantOperandVal(2) != PPC::sub_32)
  6358. continue;
  6359. SDValue IDef = ISR.getOperand(0);
  6360. if (!IDef.isMachineOpcode() ||
  6361. IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
  6362. continue;
  6363. // We now know that we're looking at a canonical i32 -> i64 zext. See if we
  6364. // can get rid of it.
  6365. SDValue Op32 = ISR->getOperand(1);
  6366. if (!Op32.isMachineOpcode())
  6367. continue;
  6368. // There are some 32-bit instructions that always clear the high-order 32
  6369. // bits, there are also some instructions (like AND) that we can look
  6370. // through.
  6371. SmallPtrSet<SDNode *, 16> ToPromote;
  6372. if (!PeepholePPC64ZExtGather(Op32, ToPromote))
  6373. continue;
  6374. // If the ToPromote set contains nodes that have uses outside of the set
  6375. // (except for the original INSERT_SUBREG), then abort the transformation.
  6376. bool OutsideUse = false;
  6377. for (SDNode *PN : ToPromote) {
  6378. for (SDNode *UN : PN->uses()) {
  6379. if (!ToPromote.count(UN) && UN != ISR.getNode()) {
  6380. OutsideUse = true;
  6381. break;
  6382. }
  6383. }
  6384. if (OutsideUse)
  6385. break;
  6386. }
  6387. if (OutsideUse)
  6388. continue;
  6389. MadeChange = true;
  6390. // We now know that this zero extension can be removed by promoting to
  6391. // nodes in ToPromote to 64-bit operations, where for operations in the
  6392. // frontier of the set, we need to insert INSERT_SUBREGs for their
  6393. // operands.
  6394. for (SDNode *PN : ToPromote) {
  6395. unsigned NewOpcode;
  6396. switch (PN->getMachineOpcode()) {
  6397. default:
  6398. llvm_unreachable("Don't know the 64-bit variant of this instruction");
  6399. case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
  6400. case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
  6401. case PPC::SLW: NewOpcode = PPC::SLW8; break;
  6402. case PPC::SRW: NewOpcode = PPC::SRW8; break;
  6403. case PPC::LI: NewOpcode = PPC::LI8; break;
  6404. case PPC::LIS: NewOpcode = PPC::LIS8; break;
  6405. case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
  6406. case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
  6407. case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
  6408. case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
  6409. case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
  6410. case PPC::OR: NewOpcode = PPC::OR8; break;
  6411. case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
  6412. case PPC::ORI: NewOpcode = PPC::ORI8; break;
  6413. case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
  6414. case PPC::AND: NewOpcode = PPC::AND8; break;
  6415. case PPC::ANDI_rec:
  6416. NewOpcode = PPC::ANDI8_rec;
  6417. break;
  6418. case PPC::ANDIS_rec:
  6419. NewOpcode = PPC::ANDIS8_rec;
  6420. break;
  6421. }
  6422. // Note: During the replacement process, the nodes will be in an
  6423. // inconsistent state (some instructions will have operands with values
  6424. // of the wrong type). Once done, however, everything should be right
  6425. // again.
  6426. SmallVector<SDValue, 4> Ops;
  6427. for (const SDValue &V : PN->ops()) {
  6428. if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
  6429. !isa<ConstantSDNode>(V)) {
  6430. SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
  6431. SDNode *ReplOp =
  6432. CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
  6433. ISR.getNode()->getVTList(), ReplOpOps);
  6434. Ops.push_back(SDValue(ReplOp, 0));
  6435. } else {
  6436. Ops.push_back(V);
  6437. }
  6438. }
  6439. // Because all to-be-promoted nodes only have users that are other
  6440. // promoted nodes (or the original INSERT_SUBREG), we can safely replace
  6441. // the i32 result value type with i64.
  6442. SmallVector<EVT, 2> NewVTs;
  6443. SDVTList VTs = PN->getVTList();
  6444. for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
  6445. if (VTs.VTs[i] == MVT::i32)
  6446. NewVTs.push_back(MVT::i64);
  6447. else
  6448. NewVTs.push_back(VTs.VTs[i]);
  6449. LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
  6450. LLVM_DEBUG(PN->dump(CurDAG));
  6451. CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
  6452. LLVM_DEBUG(dbgs() << "\nNew: ");
  6453. LLVM_DEBUG(PN->dump(CurDAG));
  6454. LLVM_DEBUG(dbgs() << "\n");
  6455. }
  6456. // Now we replace the original zero extend and its associated INSERT_SUBREG
  6457. // with the value feeding the INSERT_SUBREG (which has now been promoted to
  6458. // return an i64).
  6459. LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
  6460. LLVM_DEBUG(N->dump(CurDAG));
  6461. LLVM_DEBUG(dbgs() << "\nNew: ");
  6462. LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
  6463. LLVM_DEBUG(dbgs() << "\n");
  6464. ReplaceUses(N, Op32.getNode());
  6465. }
  6466. if (MadeChange)
  6467. CurDAG->RemoveDeadNodes();
  6468. }
  6469. static bool isVSXSwap(SDValue N) {
  6470. if (!N->isMachineOpcode())
  6471. return false;
  6472. unsigned Opc = N->getMachineOpcode();
  6473. // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
  6474. // operand is 2.
  6475. if (Opc == PPC::XXPERMDIs) {
  6476. return isa<ConstantSDNode>(N->getOperand(1)) &&
  6477. N->getConstantOperandVal(1) == 2;
  6478. } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
  6479. return N->getOperand(0) == N->getOperand(1) &&
  6480. isa<ConstantSDNode>(N->getOperand(2)) &&
  6481. N->getConstantOperandVal(2) == 2;
  6482. }
  6483. return false;
  6484. }
  6485. // TODO: Make this complete and replace with a table-gen bit.
  6486. static bool isLaneInsensitive(SDValue N) {
  6487. if (!N->isMachineOpcode())
  6488. return false;
  6489. unsigned Opc = N->getMachineOpcode();
  6490. switch (Opc) {
  6491. default:
  6492. return false;
  6493. case PPC::VAVGSB:
  6494. case PPC::VAVGUB:
  6495. case PPC::VAVGSH:
  6496. case PPC::VAVGUH:
  6497. case PPC::VAVGSW:
  6498. case PPC::VAVGUW:
  6499. case PPC::VMAXFP:
  6500. case PPC::VMAXSB:
  6501. case PPC::VMAXUB:
  6502. case PPC::VMAXSH:
  6503. case PPC::VMAXUH:
  6504. case PPC::VMAXSW:
  6505. case PPC::VMAXUW:
  6506. case PPC::VMINFP:
  6507. case PPC::VMINSB:
  6508. case PPC::VMINUB:
  6509. case PPC::VMINSH:
  6510. case PPC::VMINUH:
  6511. case PPC::VMINSW:
  6512. case PPC::VMINUW:
  6513. case PPC::VADDFP:
  6514. case PPC::VADDUBM:
  6515. case PPC::VADDUHM:
  6516. case PPC::VADDUWM:
  6517. case PPC::VSUBFP:
  6518. case PPC::VSUBUBM:
  6519. case PPC::VSUBUHM:
  6520. case PPC::VSUBUWM:
  6521. case PPC::VAND:
  6522. case PPC::VANDC:
  6523. case PPC::VOR:
  6524. case PPC::VORC:
  6525. case PPC::VXOR:
  6526. case PPC::VNOR:
  6527. case PPC::VMULUWM:
  6528. return true;
  6529. }
  6530. }
  6531. // Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
  6532. // lane-insensitive.
  6533. static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
  6534. // Our desired xxswap might be source of COPY_TO_REGCLASS.
  6535. // TODO: Can we put this a common method for DAG?
  6536. auto SkipRCCopy = [](SDValue V) {
  6537. while (V->isMachineOpcode() &&
  6538. V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
  6539. // All values in the chain should have single use.
  6540. if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
  6541. return SDValue();
  6542. V = V->getOperand(0);
  6543. }
  6544. return V.hasOneUse() ? V : SDValue();
  6545. };
  6546. SDValue VecOp = SkipRCCopy(N->getOperand(0));
  6547. if (!VecOp || !isLaneInsensitive(VecOp))
  6548. return;
  6549. SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
  6550. RHS = SkipRCCopy(VecOp.getOperand(1));
  6551. if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
  6552. return;
  6553. // These swaps may still have chain-uses here, count on dead code elimination
  6554. // in following passes to remove them.
  6555. DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
  6556. DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
  6557. DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
  6558. }
  6559. void PPCDAGToDAGISel::PeepholePPC64() {
  6560. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  6561. while (Position != CurDAG->allnodes_begin()) {
  6562. SDNode *N = &*--Position;
  6563. // Skip dead nodes and any non-machine opcodes.
  6564. if (N->use_empty() || !N->isMachineOpcode())
  6565. continue;
  6566. if (isVSXSwap(SDValue(N, 0)))
  6567. reduceVSXSwap(N, CurDAG);
  6568. unsigned FirstOp;
  6569. unsigned StorageOpcode = N->getMachineOpcode();
  6570. bool RequiresMod4Offset = false;
  6571. switch (StorageOpcode) {
  6572. default: continue;
  6573. case PPC::LWA:
  6574. case PPC::LD:
  6575. case PPC::DFLOADf64:
  6576. case PPC::DFLOADf32:
  6577. RequiresMod4Offset = true;
  6578. LLVM_FALLTHROUGH;
  6579. case PPC::LBZ:
  6580. case PPC::LBZ8:
  6581. case PPC::LFD:
  6582. case PPC::LFS:
  6583. case PPC::LHA:
  6584. case PPC::LHA8:
  6585. case PPC::LHZ:
  6586. case PPC::LHZ8:
  6587. case PPC::LWZ:
  6588. case PPC::LWZ8:
  6589. FirstOp = 0;
  6590. break;
  6591. case PPC::STD:
  6592. case PPC::DFSTOREf64:
  6593. case PPC::DFSTOREf32:
  6594. RequiresMod4Offset = true;
  6595. LLVM_FALLTHROUGH;
  6596. case PPC::STB:
  6597. case PPC::STB8:
  6598. case PPC::STFD:
  6599. case PPC::STFS:
  6600. case PPC::STH:
  6601. case PPC::STH8:
  6602. case PPC::STW:
  6603. case PPC::STW8:
  6604. FirstOp = 1;
  6605. break;
  6606. }
  6607. // If this is a load or store with a zero offset, or within the alignment,
  6608. // we may be able to fold an add-immediate into the memory operation.
  6609. // The check against alignment is below, as it can't occur until we check
  6610. // the arguments to N
  6611. if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
  6612. continue;
  6613. SDValue Base = N->getOperand(FirstOp + 1);
  6614. if (!Base.isMachineOpcode())
  6615. continue;
  6616. unsigned Flags = 0;
  6617. bool ReplaceFlags = true;
  6618. // When the feeding operation is an add-immediate of some sort,
  6619. // determine whether we need to add relocation information to the
  6620. // target flags on the immediate operand when we fold it into the
  6621. // load instruction.
  6622. //
  6623. // For something like ADDItocL, the relocation information is
  6624. // inferred from the opcode; when we process it in the AsmPrinter,
  6625. // we add the necessary relocation there. A load, though, can receive
  6626. // relocation from various flavors of ADDIxxx, so we need to carry
  6627. // the relocation information in the target flags.
  6628. switch (Base.getMachineOpcode()) {
  6629. default: continue;
  6630. case PPC::ADDI8:
  6631. case PPC::ADDI:
  6632. // In some cases (such as TLS) the relocation information
  6633. // is already in place on the operand, so copying the operand
  6634. // is sufficient.
  6635. ReplaceFlags = false;
  6636. // For these cases, the immediate may not be divisible by 4, in
  6637. // which case the fold is illegal for DS-form instructions. (The
  6638. // other cases provide aligned addresses and are always safe.)
  6639. if (RequiresMod4Offset &&
  6640. (!isa<ConstantSDNode>(Base.getOperand(1)) ||
  6641. Base.getConstantOperandVal(1) % 4 != 0))
  6642. continue;
  6643. break;
  6644. case PPC::ADDIdtprelL:
  6645. Flags = PPCII::MO_DTPREL_LO;
  6646. break;
  6647. case PPC::ADDItlsldL:
  6648. Flags = PPCII::MO_TLSLD_LO;
  6649. break;
  6650. case PPC::ADDItocL:
  6651. Flags = PPCII::MO_TOC_LO;
  6652. break;
  6653. }
  6654. SDValue ImmOpnd = Base.getOperand(1);
  6655. // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
  6656. // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
  6657. // we might have needed different @ha relocation values for the offset
  6658. // pointers).
  6659. int MaxDisplacement = 7;
  6660. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
  6661. const GlobalValue *GV = GA->getGlobal();
  6662. Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
  6663. MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
  6664. }
  6665. bool UpdateHBase = false;
  6666. SDValue HBase = Base.getOperand(0);
  6667. int Offset = N->getConstantOperandVal(FirstOp);
  6668. if (ReplaceFlags) {
  6669. if (Offset < 0 || Offset > MaxDisplacement) {
  6670. // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
  6671. // one use, then we can do this for any offset, we just need to also
  6672. // update the offset (i.e. the symbol addend) on the addis also.
  6673. if (Base.getMachineOpcode() != PPC::ADDItocL)
  6674. continue;
  6675. if (!HBase.isMachineOpcode() ||
  6676. HBase.getMachineOpcode() != PPC::ADDIStocHA8)
  6677. continue;
  6678. if (!Base.hasOneUse() || !HBase.hasOneUse())
  6679. continue;
  6680. SDValue HImmOpnd = HBase.getOperand(1);
  6681. if (HImmOpnd != ImmOpnd)
  6682. continue;
  6683. UpdateHBase = true;
  6684. }
  6685. } else {
  6686. // If we're directly folding the addend from an addi instruction, then:
  6687. // 1. In general, the offset on the memory access must be zero.
  6688. // 2. If the addend is a constant, then it can be combined with a
  6689. // non-zero offset, but only if the result meets the encoding
  6690. // requirements.
  6691. if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
  6692. Offset += C->getSExtValue();
  6693. if (RequiresMod4Offset && (Offset % 4) != 0)
  6694. continue;
  6695. if (!isInt<16>(Offset))
  6696. continue;
  6697. ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
  6698. ImmOpnd.getValueType());
  6699. } else if (Offset != 0) {
  6700. continue;
  6701. }
  6702. }
  6703. // We found an opportunity. Reverse the operands from the add
  6704. // immediate and substitute them into the load or store. If
  6705. // needed, update the target flags for the immediate operand to
  6706. // reflect the necessary relocation information.
  6707. LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
  6708. LLVM_DEBUG(Base->dump(CurDAG));
  6709. LLVM_DEBUG(dbgs() << "\nN: ");
  6710. LLVM_DEBUG(N->dump(CurDAG));
  6711. LLVM_DEBUG(dbgs() << "\n");
  6712. // If the relocation information isn't already present on the
  6713. // immediate operand, add it now.
  6714. if (ReplaceFlags) {
  6715. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
  6716. SDLoc dl(GA);
  6717. const GlobalValue *GV = GA->getGlobal();
  6718. Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
  6719. // We can't perform this optimization for data whose alignment
  6720. // is insufficient for the instruction encoding.
  6721. if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
  6722. LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
  6723. continue;
  6724. }
  6725. ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
  6726. } else if (ConstantPoolSDNode *CP =
  6727. dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
  6728. const Constant *C = CP->getConstVal();
  6729. ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
  6730. Offset, Flags);
  6731. }
  6732. }
  6733. if (FirstOp == 1) // Store
  6734. (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
  6735. Base.getOperand(0), N->getOperand(3));
  6736. else // Load
  6737. (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
  6738. N->getOperand(2));
  6739. if (UpdateHBase)
  6740. (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
  6741. ImmOpnd);
  6742. // The add-immediate may now be dead, in which case remove it.
  6743. if (Base.getNode()->use_empty())
  6744. CurDAG->RemoveDeadNode(Base.getNode());
  6745. }
  6746. }
  6747. /// createPPCISelDag - This pass converts a legalized DAG into a
  6748. /// PowerPC-specific DAG, ready for instruction scheduling.
  6749. ///
  6750. FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
  6751. CodeGenOpt::Level OptLevel) {
  6752. return new PPCDAGToDAGISel(TM, OptLevel);
  6753. }