AArch64InstructionSelector.cpp 243 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631
  1. //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file implements the targeting of the InstructionSelector class for
  10. /// AArch64.
  11. /// \todo This should be generated by TableGen.
  12. //===----------------------------------------------------------------------===//
  13. #include "AArch64GlobalISelUtils.h"
  14. #include "AArch64InstrInfo.h"
  15. #include "AArch64MachineFunctionInfo.h"
  16. #include "AArch64RegisterBankInfo.h"
  17. #include "AArch64RegisterInfo.h"
  18. #include "AArch64Subtarget.h"
  19. #include "AArch64TargetMachine.h"
  20. #include "MCTargetDesc/AArch64AddressingModes.h"
  21. #include "MCTargetDesc/AArch64MCTargetDesc.h"
  22. #include "llvm/ADT/Optional.h"
  23. #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
  24. #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  25. #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
  26. #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  27. #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
  28. #include "llvm/CodeGen/MachineBasicBlock.h"
  29. #include "llvm/CodeGen/MachineConstantPool.h"
  30. #include "llvm/CodeGen/MachineFunction.h"
  31. #include "llvm/CodeGen/MachineInstr.h"
  32. #include "llvm/CodeGen/MachineInstrBuilder.h"
  33. #include "llvm/CodeGen/MachineMemOperand.h"
  34. #include "llvm/CodeGen/MachineOperand.h"
  35. #include "llvm/CodeGen/MachineRegisterInfo.h"
  36. #include "llvm/CodeGen/TargetOpcodes.h"
  37. #include "llvm/IR/Constants.h"
  38. #include "llvm/IR/DerivedTypes.h"
  39. #include "llvm/IR/Instructions.h"
  40. #include "llvm/IR/PatternMatch.h"
  41. #include "llvm/IR/Type.h"
  42. #include "llvm/IR/IntrinsicsAArch64.h"
  43. #include "llvm/Pass.h"
  44. #include "llvm/Support/Debug.h"
  45. #include "llvm/Support/raw_ostream.h"
  46. #define DEBUG_TYPE "aarch64-isel"
  47. using namespace llvm;
  48. using namespace MIPatternMatch;
  49. using namespace AArch64GISelUtils;
  50. namespace llvm {
  51. class BlockFrequencyInfo;
  52. class ProfileSummaryInfo;
  53. }
  54. namespace {
  55. #define GET_GLOBALISEL_PREDICATE_BITSET
  56. #include "AArch64GenGlobalISel.inc"
  57. #undef GET_GLOBALISEL_PREDICATE_BITSET
  58. class AArch64InstructionSelector : public InstructionSelector {
  59. public:
  60. AArch64InstructionSelector(const AArch64TargetMachine &TM,
  61. const AArch64Subtarget &STI,
  62. const AArch64RegisterBankInfo &RBI);
  63. bool select(MachineInstr &I) override;
  64. static const char *getName() { return DEBUG_TYPE; }
  65. void setupMF(MachineFunction &MF, GISelKnownBits *KB,
  66. CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
  67. BlockFrequencyInfo *BFI) override {
  68. InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
  69. MIB.setMF(MF);
  70. // hasFnAttribute() is expensive to call on every BRCOND selection, so
  71. // cache it here for each run of the selector.
  72. ProduceNonFlagSettingCondBr =
  73. !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
  74. MFReturnAddr = Register();
  75. processPHIs(MF);
  76. }
  77. private:
  78. /// tblgen-erated 'select' implementation, used as the initial selector for
  79. /// the patterns that don't require complex C++.
  80. bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
  81. // A lowering phase that runs before any selection attempts.
  82. // Returns true if the instruction was modified.
  83. bool preISelLower(MachineInstr &I);
  84. // An early selection function that runs before the selectImpl() call.
  85. bool earlySelect(MachineInstr &I);
  86. // Do some preprocessing of G_PHIs before we begin selection.
  87. void processPHIs(MachineFunction &MF);
  88. bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
  89. /// Eliminate same-sized cross-bank copies into stores before selectImpl().
  90. bool contractCrossBankCopyIntoStore(MachineInstr &I,
  91. MachineRegisterInfo &MRI);
  92. bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
  93. bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
  94. MachineRegisterInfo &MRI) const;
  95. bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
  96. MachineRegisterInfo &MRI) const;
  97. ///@{
  98. /// Helper functions for selectCompareBranch.
  99. bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
  100. MachineIRBuilder &MIB) const;
  101. bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
  102. MachineIRBuilder &MIB) const;
  103. bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
  104. MachineIRBuilder &MIB) const;
  105. bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
  106. MachineBasicBlock *DstMBB,
  107. MachineIRBuilder &MIB) const;
  108. ///@}
  109. bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
  110. MachineRegisterInfo &MRI);
  111. bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
  112. bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
  113. // Helper to generate an equivalent of scalar_to_vector into a new register,
  114. // returned via 'Dst'.
  115. MachineInstr *emitScalarToVector(unsigned EltSize,
  116. const TargetRegisterClass *DstRC,
  117. Register Scalar,
  118. MachineIRBuilder &MIRBuilder) const;
  119. /// Emit a lane insert into \p DstReg, or a new vector register if None is
  120. /// provided.
  121. ///
  122. /// The lane inserted into is defined by \p LaneIdx. The vector source
  123. /// register is given by \p SrcReg. The register containing the element is
  124. /// given by \p EltReg.
  125. MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
  126. Register EltReg, unsigned LaneIdx,
  127. const RegisterBank &RB,
  128. MachineIRBuilder &MIRBuilder) const;
  129. /// Emit a sequence of instructions representing a constant \p CV for a
  130. /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
  131. ///
  132. /// \returns the last instruction in the sequence on success, and nullptr
  133. /// otherwise.
  134. MachineInstr *emitConstantVector(Register Dst, Constant *CV,
  135. MachineIRBuilder &MIRBuilder,
  136. MachineRegisterInfo &MRI);
  137. bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
  138. bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
  139. MachineRegisterInfo &MRI);
  140. /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
  141. /// SUBREG_TO_REG.
  142. bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
  143. bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
  144. bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
  145. bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
  146. bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
  147. bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
  148. bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
  149. bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
  150. /// Helper function to select vector load intrinsics like
  151. /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
  152. /// \p Opc is the opcode that the selected instruction should use.
  153. /// \p NumVecs is the number of vector destinations for the instruction.
  154. /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
  155. bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
  156. MachineInstr &I);
  157. bool selectIntrinsicWithSideEffects(MachineInstr &I,
  158. MachineRegisterInfo &MRI);
  159. bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
  160. bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
  161. bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
  162. bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
  163. bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
  164. bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
  165. bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
  166. bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
  167. bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
  168. bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
  169. unsigned emitConstantPoolEntry(const Constant *CPVal,
  170. MachineFunction &MF) const;
  171. MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
  172. MachineIRBuilder &MIRBuilder) const;
  173. // Emit a vector concat operation.
  174. MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
  175. Register Op2,
  176. MachineIRBuilder &MIRBuilder) const;
  177. // Emit an integer compare between LHS and RHS, which checks for Predicate.
  178. MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
  179. MachineOperand &Predicate,
  180. MachineIRBuilder &MIRBuilder) const;
  181. /// Emit a floating point comparison between \p LHS and \p RHS.
  182. /// \p Pred if given is the intended predicate to use.
  183. MachineInstr *emitFPCompare(Register LHS, Register RHS,
  184. MachineIRBuilder &MIRBuilder,
  185. Optional<CmpInst::Predicate> = None) const;
  186. MachineInstr *emitInstr(unsigned Opcode,
  187. std::initializer_list<llvm::DstOp> DstOps,
  188. std::initializer_list<llvm::SrcOp> SrcOps,
  189. MachineIRBuilder &MIRBuilder,
  190. const ComplexRendererFns &RenderFns = None) const;
  191. /// Helper function to emit an add or sub instruction.
  192. ///
  193. /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
  194. /// in a specific order.
  195. ///
  196. /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
  197. ///
  198. /// \code
  199. /// const std::array<std::array<unsigned, 2>, 4> Table {
  200. /// {{AArch64::ADDXri, AArch64::ADDWri},
  201. /// {AArch64::ADDXrs, AArch64::ADDWrs},
  202. /// {AArch64::ADDXrr, AArch64::ADDWrr},
  203. /// {AArch64::SUBXri, AArch64::SUBWri},
  204. /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
  205. /// \endcode
  206. ///
  207. /// Each row in the table corresponds to a different addressing mode. Each
  208. /// column corresponds to a different register size.
  209. ///
  210. /// \attention Rows must be structured as follows:
  211. /// - Row 0: The ri opcode variants
  212. /// - Row 1: The rs opcode variants
  213. /// - Row 2: The rr opcode variants
  214. /// - Row 3: The ri opcode variants for negative immediates
  215. /// - Row 4: The rx opcode variants
  216. ///
  217. /// \attention Columns must be structured as follows:
  218. /// - Column 0: The 64-bit opcode variants
  219. /// - Column 1: The 32-bit opcode variants
  220. ///
  221. /// \p Dst is the destination register of the binop to emit.
  222. /// \p LHS is the left-hand operand of the binop to emit.
  223. /// \p RHS is the right-hand operand of the binop to emit.
  224. MachineInstr *emitAddSub(
  225. const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
  226. Register Dst, MachineOperand &LHS, MachineOperand &RHS,
  227. MachineIRBuilder &MIRBuilder) const;
  228. MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
  229. MachineOperand &RHS,
  230. MachineIRBuilder &MIRBuilder) const;
  231. MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
  232. MachineIRBuilder &MIRBuilder) const;
  233. MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
  234. MachineIRBuilder &MIRBuilder) const;
  235. MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
  236. MachineIRBuilder &MIRBuilder) const;
  237. MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
  238. MachineIRBuilder &MIRBuilder) const;
  239. MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
  240. AArch64CC::CondCode CC,
  241. MachineIRBuilder &MIRBuilder) const;
  242. MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
  243. const RegisterBank &DstRB, LLT ScalarTy,
  244. Register VecReg, unsigned LaneIdx,
  245. MachineIRBuilder &MIRBuilder) const;
  246. MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
  247. AArch64CC::CondCode Pred,
  248. MachineIRBuilder &MIRBuilder) const;
  249. /// Emit a CSet for a FP compare.
  250. ///
  251. /// \p Dst is expected to be a 32-bit scalar register.
  252. MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
  253. MachineIRBuilder &MIRBuilder) const;
  254. /// Emit the overflow op for \p Opcode.
  255. ///
  256. /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
  257. /// G_USUBO, etc.
  258. std::pair<MachineInstr *, AArch64CC::CondCode>
  259. emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
  260. MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
  261. /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
  262. /// \p IsNegative is true if the test should be "not zero".
  263. /// This will also optimize the test bit instruction when possible.
  264. MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
  265. MachineBasicBlock *DstMBB,
  266. MachineIRBuilder &MIB) const;
  267. /// Emit a CB(N)Z instruction which branches to \p DestMBB.
  268. MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
  269. MachineBasicBlock *DestMBB,
  270. MachineIRBuilder &MIB) const;
  271. // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
  272. // We use these manually instead of using the importer since it doesn't
  273. // support SDNodeXForm.
  274. ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
  275. ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
  276. ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
  277. ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
  278. ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
  279. ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
  280. ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
  281. ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
  282. unsigned Size) const;
  283. ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
  284. return selectAddrModeUnscaled(Root, 1);
  285. }
  286. ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
  287. return selectAddrModeUnscaled(Root, 2);
  288. }
  289. ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
  290. return selectAddrModeUnscaled(Root, 4);
  291. }
  292. ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
  293. return selectAddrModeUnscaled(Root, 8);
  294. }
  295. ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
  296. return selectAddrModeUnscaled(Root, 16);
  297. }
  298. /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
  299. /// from complex pattern matchers like selectAddrModeIndexed().
  300. ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
  301. MachineRegisterInfo &MRI) const;
  302. ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
  303. unsigned Size) const;
  304. template <int Width>
  305. ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
  306. return selectAddrModeIndexed(Root, Width / 8);
  307. }
  308. bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
  309. const MachineRegisterInfo &MRI) const;
  310. ComplexRendererFns
  311. selectAddrModeShiftedExtendXReg(MachineOperand &Root,
  312. unsigned SizeInBytes) const;
  313. /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
  314. /// or not a shift + extend should be folded into an addressing mode. Returns
  315. /// None when this is not profitable or possible.
  316. ComplexRendererFns
  317. selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
  318. MachineOperand &Offset, unsigned SizeInBytes,
  319. bool WantsExt) const;
  320. ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
  321. ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
  322. unsigned SizeInBytes) const;
  323. template <int Width>
  324. ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
  325. return selectAddrModeXRO(Root, Width / 8);
  326. }
  327. ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
  328. unsigned SizeInBytes) const;
  329. template <int Width>
  330. ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
  331. return selectAddrModeWRO(Root, Width / 8);
  332. }
  333. ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
  334. bool AllowROR = false) const;
  335. ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
  336. return selectShiftedRegister(Root);
  337. }
  338. ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
  339. return selectShiftedRegister(Root, true);
  340. }
  341. /// Given an extend instruction, determine the correct shift-extend type for
  342. /// that instruction.
  343. ///
  344. /// If the instruction is going to be used in a load or store, pass
  345. /// \p IsLoadStore = true.
  346. AArch64_AM::ShiftExtendType
  347. getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
  348. bool IsLoadStore = false) const;
  349. /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
  350. ///
  351. /// \returns Either \p Reg if no change was necessary, or the new register
  352. /// created by moving \p Reg.
  353. ///
  354. /// Note: This uses emitCopy right now.
  355. Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
  356. MachineIRBuilder &MIB) const;
  357. ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
  358. void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
  359. int OpIdx = -1) const;
  360. void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
  361. int OpIdx = -1) const;
  362. void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
  363. int OpIdx = -1) const;
  364. void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
  365. int OpIdx = -1) const;
  366. void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
  367. int OpIdx = -1) const;
  368. void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
  369. int OpIdx = -1) const;
  370. // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
  371. void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
  372. // Optimization methods.
  373. bool tryOptSelect(MachineInstr &MI);
  374. MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
  375. MachineOperand &Predicate,
  376. MachineIRBuilder &MIRBuilder) const;
  377. /// Return true if \p MI is a load or store of \p NumBytes bytes.
  378. bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
  379. /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
  380. /// register zeroed out. In other words, the result of MI has been explicitly
  381. /// zero extended.
  382. bool isDef32(const MachineInstr &MI) const;
  383. const AArch64TargetMachine &TM;
  384. const AArch64Subtarget &STI;
  385. const AArch64InstrInfo &TII;
  386. const AArch64RegisterInfo &TRI;
  387. const AArch64RegisterBankInfo &RBI;
  388. bool ProduceNonFlagSettingCondBr = false;
  389. // Some cached values used during selection.
  390. // We use LR as a live-in register, and we keep track of it here as it can be
  391. // clobbered by calls.
  392. Register MFReturnAddr;
  393. MachineIRBuilder MIB;
  394. #define GET_GLOBALISEL_PREDICATES_DECL
  395. #include "AArch64GenGlobalISel.inc"
  396. #undef GET_GLOBALISEL_PREDICATES_DECL
  397. // We declare the temporaries used by selectImpl() in the class to minimize the
  398. // cost of constructing placeholder values.
  399. #define GET_GLOBALISEL_TEMPORARIES_DECL
  400. #include "AArch64GenGlobalISel.inc"
  401. #undef GET_GLOBALISEL_TEMPORARIES_DECL
  402. };
  403. } // end anonymous namespace
  404. #define GET_GLOBALISEL_IMPL
  405. #include "AArch64GenGlobalISel.inc"
  406. #undef GET_GLOBALISEL_IMPL
  407. AArch64InstructionSelector::AArch64InstructionSelector(
  408. const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
  409. const AArch64RegisterBankInfo &RBI)
  410. : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
  411. RBI(RBI),
  412. #define GET_GLOBALISEL_PREDICATES_INIT
  413. #include "AArch64GenGlobalISel.inc"
  414. #undef GET_GLOBALISEL_PREDICATES_INIT
  415. #define GET_GLOBALISEL_TEMPORARIES_INIT
  416. #include "AArch64GenGlobalISel.inc"
  417. #undef GET_GLOBALISEL_TEMPORARIES_INIT
  418. {
  419. }
  420. // FIXME: This should be target-independent, inferred from the types declared
  421. // for each class in the bank.
  422. static const TargetRegisterClass *
  423. getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
  424. const RegisterBankInfo &RBI,
  425. bool GetAllRegSet = false) {
  426. if (RB.getID() == AArch64::GPRRegBankID) {
  427. if (Ty.getSizeInBits() <= 32)
  428. return GetAllRegSet ? &AArch64::GPR32allRegClass
  429. : &AArch64::GPR32RegClass;
  430. if (Ty.getSizeInBits() == 64)
  431. return GetAllRegSet ? &AArch64::GPR64allRegClass
  432. : &AArch64::GPR64RegClass;
  433. if (Ty.getSizeInBits() == 128)
  434. return &AArch64::XSeqPairsClassRegClass;
  435. return nullptr;
  436. }
  437. if (RB.getID() == AArch64::FPRRegBankID) {
  438. switch (Ty.getSizeInBits()) {
  439. case 8:
  440. return &AArch64::FPR8RegClass;
  441. case 16:
  442. return &AArch64::FPR16RegClass;
  443. case 32:
  444. return &AArch64::FPR32RegClass;
  445. case 64:
  446. return &AArch64::FPR64RegClass;
  447. case 128:
  448. return &AArch64::FPR128RegClass;
  449. }
  450. return nullptr;
  451. }
  452. return nullptr;
  453. }
  454. /// Given a register bank, and size in bits, return the smallest register class
  455. /// that can represent that combination.
  456. static const TargetRegisterClass *
  457. getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
  458. bool GetAllRegSet = false) {
  459. unsigned RegBankID = RB.getID();
  460. if (RegBankID == AArch64::GPRRegBankID) {
  461. if (SizeInBits <= 32)
  462. return GetAllRegSet ? &AArch64::GPR32allRegClass
  463. : &AArch64::GPR32RegClass;
  464. if (SizeInBits == 64)
  465. return GetAllRegSet ? &AArch64::GPR64allRegClass
  466. : &AArch64::GPR64RegClass;
  467. if (SizeInBits == 128)
  468. return &AArch64::XSeqPairsClassRegClass;
  469. }
  470. if (RegBankID == AArch64::FPRRegBankID) {
  471. switch (SizeInBits) {
  472. default:
  473. return nullptr;
  474. case 8:
  475. return &AArch64::FPR8RegClass;
  476. case 16:
  477. return &AArch64::FPR16RegClass;
  478. case 32:
  479. return &AArch64::FPR32RegClass;
  480. case 64:
  481. return &AArch64::FPR64RegClass;
  482. case 128:
  483. return &AArch64::FPR128RegClass;
  484. }
  485. }
  486. return nullptr;
  487. }
  488. /// Returns the correct subregister to use for a given register class.
  489. static bool getSubRegForClass(const TargetRegisterClass *RC,
  490. const TargetRegisterInfo &TRI, unsigned &SubReg) {
  491. switch (TRI.getRegSizeInBits(*RC)) {
  492. case 8:
  493. SubReg = AArch64::bsub;
  494. break;
  495. case 16:
  496. SubReg = AArch64::hsub;
  497. break;
  498. case 32:
  499. if (RC != &AArch64::FPR32RegClass)
  500. SubReg = AArch64::sub_32;
  501. else
  502. SubReg = AArch64::ssub;
  503. break;
  504. case 64:
  505. SubReg = AArch64::dsub;
  506. break;
  507. default:
  508. LLVM_DEBUG(
  509. dbgs() << "Couldn't find appropriate subregister for register class.");
  510. return false;
  511. }
  512. return true;
  513. }
  514. /// Returns the minimum size the given register bank can hold.
  515. static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
  516. switch (RB.getID()) {
  517. case AArch64::GPRRegBankID:
  518. return 32;
  519. case AArch64::FPRRegBankID:
  520. return 8;
  521. default:
  522. llvm_unreachable("Tried to get minimum size for unknown register bank.");
  523. }
  524. }
  525. /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
  526. /// Helper function for functions like createDTuple and createQTuple.
  527. ///
  528. /// \p RegClassIDs - The list of register class IDs available for some tuple of
  529. /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
  530. /// expected to contain between 2 and 4 tuple classes.
  531. ///
  532. /// \p SubRegs - The list of subregister classes associated with each register
  533. /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
  534. /// subregister class. The index of each subregister class is expected to
  535. /// correspond with the index of each register class.
  536. ///
  537. /// \returns Either the destination register of REG_SEQUENCE instruction that
  538. /// was created, or the 0th element of \p Regs if \p Regs contains a single
  539. /// element.
  540. static Register createTuple(ArrayRef<Register> Regs,
  541. const unsigned RegClassIDs[],
  542. const unsigned SubRegs[], MachineIRBuilder &MIB) {
  543. unsigned NumRegs = Regs.size();
  544. if (NumRegs == 1)
  545. return Regs[0];
  546. assert(NumRegs >= 2 && NumRegs <= 4 &&
  547. "Only support between two and 4 registers in a tuple!");
  548. const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
  549. auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
  550. auto RegSequence =
  551. MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
  552. for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
  553. RegSequence.addUse(Regs[I]);
  554. RegSequence.addImm(SubRegs[I]);
  555. }
  556. return RegSequence.getReg(0);
  557. }
  558. /// Create a tuple of D-registers using the registers in \p Regs.
  559. static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
  560. static const unsigned RegClassIDs[] = {
  561. AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
  562. static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
  563. AArch64::dsub2, AArch64::dsub3};
  564. return createTuple(Regs, RegClassIDs, SubRegs, MIB);
  565. }
  566. /// Create a tuple of Q-registers using the registers in \p Regs.
  567. static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
  568. static const unsigned RegClassIDs[] = {
  569. AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
  570. static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
  571. AArch64::qsub2, AArch64::qsub3};
  572. return createTuple(Regs, RegClassIDs, SubRegs, MIB);
  573. }
  574. static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
  575. auto &MI = *Root.getParent();
  576. auto &MBB = *MI.getParent();
  577. auto &MF = *MBB.getParent();
  578. auto &MRI = MF.getRegInfo();
  579. uint64_t Immed;
  580. if (Root.isImm())
  581. Immed = Root.getImm();
  582. else if (Root.isCImm())
  583. Immed = Root.getCImm()->getZExtValue();
  584. else if (Root.isReg()) {
  585. auto ValAndVReg =
  586. getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
  587. if (!ValAndVReg)
  588. return None;
  589. Immed = ValAndVReg->Value.getSExtValue();
  590. } else
  591. return None;
  592. return Immed;
  593. }
  594. /// Check whether \p I is a currently unsupported binary operation:
  595. /// - it has an unsized type
  596. /// - an operand is not a vreg
  597. /// - all operands are not in the same bank
  598. /// These are checks that should someday live in the verifier, but right now,
  599. /// these are mostly limitations of the aarch64 selector.
  600. static bool unsupportedBinOp(const MachineInstr &I,
  601. const AArch64RegisterBankInfo &RBI,
  602. const MachineRegisterInfo &MRI,
  603. const AArch64RegisterInfo &TRI) {
  604. LLT Ty = MRI.getType(I.getOperand(0).getReg());
  605. if (!Ty.isValid()) {
  606. LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
  607. return true;
  608. }
  609. const RegisterBank *PrevOpBank = nullptr;
  610. for (auto &MO : I.operands()) {
  611. // FIXME: Support non-register operands.
  612. if (!MO.isReg()) {
  613. LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
  614. return true;
  615. }
  616. // FIXME: Can generic operations have physical registers operands? If
  617. // so, this will need to be taught about that, and we'll need to get the
  618. // bank out of the minimal class for the register.
  619. // Either way, this needs to be documented (and possibly verified).
  620. if (!Register::isVirtualRegister(MO.getReg())) {
  621. LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
  622. return true;
  623. }
  624. const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
  625. if (!OpBank) {
  626. LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
  627. return true;
  628. }
  629. if (PrevOpBank && OpBank != PrevOpBank) {
  630. LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
  631. return true;
  632. }
  633. PrevOpBank = OpBank;
  634. }
  635. return false;
  636. }
  637. /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
  638. /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
  639. /// and of size \p OpSize.
  640. /// \returns \p GenericOpc if the combination is unsupported.
  641. static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
  642. unsigned OpSize) {
  643. switch (RegBankID) {
  644. case AArch64::GPRRegBankID:
  645. if (OpSize == 32) {
  646. switch (GenericOpc) {
  647. case TargetOpcode::G_SHL:
  648. return AArch64::LSLVWr;
  649. case TargetOpcode::G_LSHR:
  650. return AArch64::LSRVWr;
  651. case TargetOpcode::G_ASHR:
  652. return AArch64::ASRVWr;
  653. default:
  654. return GenericOpc;
  655. }
  656. } else if (OpSize == 64) {
  657. switch (GenericOpc) {
  658. case TargetOpcode::G_PTR_ADD:
  659. return AArch64::ADDXrr;
  660. case TargetOpcode::G_SHL:
  661. return AArch64::LSLVXr;
  662. case TargetOpcode::G_LSHR:
  663. return AArch64::LSRVXr;
  664. case TargetOpcode::G_ASHR:
  665. return AArch64::ASRVXr;
  666. default:
  667. return GenericOpc;
  668. }
  669. }
  670. break;
  671. case AArch64::FPRRegBankID:
  672. switch (OpSize) {
  673. case 32:
  674. switch (GenericOpc) {
  675. case TargetOpcode::G_FADD:
  676. return AArch64::FADDSrr;
  677. case TargetOpcode::G_FSUB:
  678. return AArch64::FSUBSrr;
  679. case TargetOpcode::G_FMUL:
  680. return AArch64::FMULSrr;
  681. case TargetOpcode::G_FDIV:
  682. return AArch64::FDIVSrr;
  683. default:
  684. return GenericOpc;
  685. }
  686. case 64:
  687. switch (GenericOpc) {
  688. case TargetOpcode::G_FADD:
  689. return AArch64::FADDDrr;
  690. case TargetOpcode::G_FSUB:
  691. return AArch64::FSUBDrr;
  692. case TargetOpcode::G_FMUL:
  693. return AArch64::FMULDrr;
  694. case TargetOpcode::G_FDIV:
  695. return AArch64::FDIVDrr;
  696. case TargetOpcode::G_OR:
  697. return AArch64::ORRv8i8;
  698. default:
  699. return GenericOpc;
  700. }
  701. }
  702. break;
  703. }
  704. return GenericOpc;
  705. }
  706. /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
  707. /// appropriate for the (value) register bank \p RegBankID and of memory access
  708. /// size \p OpSize. This returns the variant with the base+unsigned-immediate
  709. /// addressing mode (e.g., LDRXui).
  710. /// \returns \p GenericOpc if the combination is unsupported.
  711. static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
  712. unsigned OpSize) {
  713. const bool isStore = GenericOpc == TargetOpcode::G_STORE;
  714. switch (RegBankID) {
  715. case AArch64::GPRRegBankID:
  716. switch (OpSize) {
  717. case 8:
  718. return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
  719. case 16:
  720. return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
  721. case 32:
  722. return isStore ? AArch64::STRWui : AArch64::LDRWui;
  723. case 64:
  724. return isStore ? AArch64::STRXui : AArch64::LDRXui;
  725. }
  726. break;
  727. case AArch64::FPRRegBankID:
  728. switch (OpSize) {
  729. case 8:
  730. return isStore ? AArch64::STRBui : AArch64::LDRBui;
  731. case 16:
  732. return isStore ? AArch64::STRHui : AArch64::LDRHui;
  733. case 32:
  734. return isStore ? AArch64::STRSui : AArch64::LDRSui;
  735. case 64:
  736. return isStore ? AArch64::STRDui : AArch64::LDRDui;
  737. case 128:
  738. return isStore ? AArch64::STRQui : AArch64::LDRQui;
  739. }
  740. break;
  741. }
  742. return GenericOpc;
  743. }
  744. #ifndef NDEBUG
  745. /// Helper function that verifies that we have a valid copy at the end of
  746. /// selectCopy. Verifies that the source and dest have the expected sizes and
  747. /// then returns true.
  748. static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
  749. const MachineRegisterInfo &MRI,
  750. const TargetRegisterInfo &TRI,
  751. const RegisterBankInfo &RBI) {
  752. const Register DstReg = I.getOperand(0).getReg();
  753. const Register SrcReg = I.getOperand(1).getReg();
  754. const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
  755. const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
  756. // Make sure the size of the source and dest line up.
  757. assert(
  758. (DstSize == SrcSize ||
  759. // Copies are a mean to setup initial types, the number of
  760. // bits may not exactly match.
  761. (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
  762. // Copies are a mean to copy bits around, as long as we are
  763. // on the same register class, that's fine. Otherwise, that
  764. // means we need some SUBREG_TO_REG or AND & co.
  765. (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
  766. "Copy with different width?!");
  767. // Check the size of the destination.
  768. assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
  769. "GPRs cannot get more than 64-bit width values");
  770. return true;
  771. }
  772. #endif
  773. /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
  774. /// to \p *To.
  775. ///
  776. /// E.g "To = COPY SrcReg:SubReg"
  777. static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
  778. const RegisterBankInfo &RBI, Register SrcReg,
  779. const TargetRegisterClass *To, unsigned SubReg) {
  780. assert(SrcReg.isValid() && "Expected a valid source register?");
  781. assert(To && "Destination register class cannot be null");
  782. assert(SubReg && "Expected a valid subregister");
  783. MachineIRBuilder MIB(I);
  784. auto SubRegCopy =
  785. MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
  786. MachineOperand &RegOp = I.getOperand(1);
  787. RegOp.setReg(SubRegCopy.getReg(0));
  788. // It's possible that the destination register won't be constrained. Make
  789. // sure that happens.
  790. if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
  791. RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
  792. return true;
  793. }
  794. /// Helper function to get the source and destination register classes for a
  795. /// copy. Returns a std::pair containing the source register class for the
  796. /// copy, and the destination register class for the copy. If a register class
  797. /// cannot be determined, then it will be nullptr.
  798. static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
  799. getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
  800. MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
  801. const RegisterBankInfo &RBI) {
  802. Register DstReg = I.getOperand(0).getReg();
  803. Register SrcReg = I.getOperand(1).getReg();
  804. const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  805. const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
  806. unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
  807. unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
  808. // Special casing for cross-bank copies of s1s. We can technically represent
  809. // a 1-bit value with any size of register. The minimum size for a GPR is 32
  810. // bits. So, we need to put the FPR on 32 bits as well.
  811. //
  812. // FIXME: I'm not sure if this case holds true outside of copies. If it does,
  813. // then we can pull it into the helpers that get the appropriate class for a
  814. // register bank. Or make a new helper that carries along some constraint
  815. // information.
  816. if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
  817. SrcSize = DstSize = 32;
  818. return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
  819. getMinClassForRegBank(DstRegBank, DstSize, true)};
  820. }
  821. static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
  822. MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
  823. const RegisterBankInfo &RBI) {
  824. Register DstReg = I.getOperand(0).getReg();
  825. Register SrcReg = I.getOperand(1).getReg();
  826. const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  827. const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
  828. // Find the correct register classes for the source and destination registers.
  829. const TargetRegisterClass *SrcRC;
  830. const TargetRegisterClass *DstRC;
  831. std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
  832. if (!DstRC) {
  833. LLVM_DEBUG(dbgs() << "Unexpected dest size "
  834. << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
  835. return false;
  836. }
  837. // A couple helpers below, for making sure that the copy we produce is valid.
  838. // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
  839. // to verify that the src and dst are the same size, since that's handled by
  840. // the SUBREG_TO_REG.
  841. bool KnownValid = false;
  842. // Returns true, or asserts if something we don't expect happens. Instead of
  843. // returning true, we return isValidCopy() to ensure that we verify the
  844. // result.
  845. auto CheckCopy = [&]() {
  846. // If we have a bitcast or something, we can't have physical registers.
  847. assert((I.isCopy() ||
  848. (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
  849. !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
  850. "No phys reg on generic operator!");
  851. bool ValidCopy = true;
  852. #ifndef NDEBUG
  853. ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
  854. assert(ValidCopy && "Invalid copy.");
  855. #endif
  856. (void)KnownValid;
  857. return ValidCopy;
  858. };
  859. // Is this a copy? If so, then we may need to insert a subregister copy.
  860. if (I.isCopy()) {
  861. // Yes. Check if there's anything to fix up.
  862. if (!SrcRC) {
  863. LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
  864. return false;
  865. }
  866. unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
  867. unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
  868. unsigned SubReg;
  869. // If the source bank doesn't support a subregister copy small enough,
  870. // then we first need to copy to the destination bank.
  871. if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
  872. const TargetRegisterClass *DstTempRC =
  873. getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
  874. getSubRegForClass(DstRC, TRI, SubReg);
  875. MachineIRBuilder MIB(I);
  876. auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
  877. copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
  878. } else if (SrcSize > DstSize) {
  879. // If the source register is bigger than the destination we need to
  880. // perform a subregister copy.
  881. const TargetRegisterClass *SubRegRC =
  882. getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
  883. getSubRegForClass(SubRegRC, TRI, SubReg);
  884. copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
  885. } else if (DstSize > SrcSize) {
  886. // If the destination register is bigger than the source we need to do
  887. // a promotion using SUBREG_TO_REG.
  888. const TargetRegisterClass *PromotionRC =
  889. getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
  890. getSubRegForClass(SrcRC, TRI, SubReg);
  891. Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
  892. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  893. TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
  894. .addImm(0)
  895. .addUse(SrcReg)
  896. .addImm(SubReg);
  897. MachineOperand &RegOp = I.getOperand(1);
  898. RegOp.setReg(PromoteReg);
  899. // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
  900. KnownValid = true;
  901. }
  902. // If the destination is a physical register, then there's nothing to
  903. // change, so we're done.
  904. if (Register::isPhysicalRegister(DstReg))
  905. return CheckCopy();
  906. }
  907. // No need to constrain SrcReg. It will get constrained when we hit another
  908. // of its use or its defs. Copies do not have constraints.
  909. if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  910. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  911. << " operand\n");
  912. return false;
  913. }
  914. // If this a GPR ZEXT that we want to just reduce down into a copy.
  915. // The sizes will be mismatched with the source < 32b but that's ok.
  916. if (I.getOpcode() == TargetOpcode::G_ZEXT) {
  917. I.setDesc(TII.get(AArch64::COPY));
  918. assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
  919. return selectCopy(I, TII, MRI, TRI, RBI);
  920. }
  921. I.setDesc(TII.get(AArch64::COPY));
  922. return CheckCopy();
  923. }
  924. static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
  925. if (!DstTy.isScalar() || !SrcTy.isScalar())
  926. return GenericOpc;
  927. const unsigned DstSize = DstTy.getSizeInBits();
  928. const unsigned SrcSize = SrcTy.getSizeInBits();
  929. switch (DstSize) {
  930. case 32:
  931. switch (SrcSize) {
  932. case 32:
  933. switch (GenericOpc) {
  934. case TargetOpcode::G_SITOFP:
  935. return AArch64::SCVTFUWSri;
  936. case TargetOpcode::G_UITOFP:
  937. return AArch64::UCVTFUWSri;
  938. case TargetOpcode::G_FPTOSI:
  939. return AArch64::FCVTZSUWSr;
  940. case TargetOpcode::G_FPTOUI:
  941. return AArch64::FCVTZUUWSr;
  942. default:
  943. return GenericOpc;
  944. }
  945. case 64:
  946. switch (GenericOpc) {
  947. case TargetOpcode::G_SITOFP:
  948. return AArch64::SCVTFUXSri;
  949. case TargetOpcode::G_UITOFP:
  950. return AArch64::UCVTFUXSri;
  951. case TargetOpcode::G_FPTOSI:
  952. return AArch64::FCVTZSUWDr;
  953. case TargetOpcode::G_FPTOUI:
  954. return AArch64::FCVTZUUWDr;
  955. default:
  956. return GenericOpc;
  957. }
  958. default:
  959. return GenericOpc;
  960. }
  961. case 64:
  962. switch (SrcSize) {
  963. case 32:
  964. switch (GenericOpc) {
  965. case TargetOpcode::G_SITOFP:
  966. return AArch64::SCVTFUWDri;
  967. case TargetOpcode::G_UITOFP:
  968. return AArch64::UCVTFUWDri;
  969. case TargetOpcode::G_FPTOSI:
  970. return AArch64::FCVTZSUXSr;
  971. case TargetOpcode::G_FPTOUI:
  972. return AArch64::FCVTZUUXSr;
  973. default:
  974. return GenericOpc;
  975. }
  976. case 64:
  977. switch (GenericOpc) {
  978. case TargetOpcode::G_SITOFP:
  979. return AArch64::SCVTFUXDri;
  980. case TargetOpcode::G_UITOFP:
  981. return AArch64::UCVTFUXDri;
  982. case TargetOpcode::G_FPTOSI:
  983. return AArch64::FCVTZSUXDr;
  984. case TargetOpcode::G_FPTOUI:
  985. return AArch64::FCVTZUUXDr;
  986. default:
  987. return GenericOpc;
  988. }
  989. default:
  990. return GenericOpc;
  991. }
  992. default:
  993. return GenericOpc;
  994. };
  995. return GenericOpc;
  996. }
  997. MachineInstr *
  998. AArch64InstructionSelector::emitSelect(Register Dst, Register True,
  999. Register False, AArch64CC::CondCode CC,
  1000. MachineIRBuilder &MIB) const {
  1001. MachineRegisterInfo &MRI = *MIB.getMRI();
  1002. assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
  1003. RBI.getRegBank(True, MRI, TRI)->getID() &&
  1004. "Expected both select operands to have the same regbank?");
  1005. LLT Ty = MRI.getType(True);
  1006. if (Ty.isVector())
  1007. return nullptr;
  1008. const unsigned Size = Ty.getSizeInBits();
  1009. assert((Size == 32 || Size == 64) &&
  1010. "Expected 32 bit or 64 bit select only?");
  1011. const bool Is32Bit = Size == 32;
  1012. if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
  1013. unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
  1014. auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
  1015. constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
  1016. return &*FCSel;
  1017. }
  1018. // By default, we'll try and emit a CSEL.
  1019. unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
  1020. bool Optimized = false;
  1021. auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
  1022. &Optimized](Register &Reg, Register &OtherReg,
  1023. bool Invert) {
  1024. if (Optimized)
  1025. return false;
  1026. // Attempt to fold:
  1027. //
  1028. // %sub = G_SUB 0, %x
  1029. // %select = G_SELECT cc, %reg, %sub
  1030. //
  1031. // Into:
  1032. // %select = CSNEG %reg, %x, cc
  1033. Register MatchReg;
  1034. if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
  1035. Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
  1036. Reg = MatchReg;
  1037. if (Invert) {
  1038. CC = AArch64CC::getInvertedCondCode(CC);
  1039. std::swap(Reg, OtherReg);
  1040. }
  1041. return true;
  1042. }
  1043. // Attempt to fold:
  1044. //
  1045. // %xor = G_XOR %x, -1
  1046. // %select = G_SELECT cc, %reg, %xor
  1047. //
  1048. // Into:
  1049. // %select = CSINV %reg, %x, cc
  1050. if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
  1051. Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
  1052. Reg = MatchReg;
  1053. if (Invert) {
  1054. CC = AArch64CC::getInvertedCondCode(CC);
  1055. std::swap(Reg, OtherReg);
  1056. }
  1057. return true;
  1058. }
  1059. // Attempt to fold:
  1060. //
  1061. // %add = G_ADD %x, 1
  1062. // %select = G_SELECT cc, %reg, %add
  1063. //
  1064. // Into:
  1065. // %select = CSINC %reg, %x, cc
  1066. if (mi_match(Reg, MRI,
  1067. m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
  1068. m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
  1069. Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
  1070. Reg = MatchReg;
  1071. if (Invert) {
  1072. CC = AArch64CC::getInvertedCondCode(CC);
  1073. std::swap(Reg, OtherReg);
  1074. }
  1075. return true;
  1076. }
  1077. return false;
  1078. };
  1079. // Helper lambda which tries to use CSINC/CSINV for the instruction when its
  1080. // true/false values are constants.
  1081. // FIXME: All of these patterns already exist in tablegen. We should be
  1082. // able to import these.
  1083. auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
  1084. &Optimized]() {
  1085. if (Optimized)
  1086. return false;
  1087. auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
  1088. auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
  1089. if (!TrueCst && !FalseCst)
  1090. return false;
  1091. Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
  1092. if (TrueCst && FalseCst) {
  1093. int64_t T = TrueCst->Value.getSExtValue();
  1094. int64_t F = FalseCst->Value.getSExtValue();
  1095. if (T == 0 && F == 1) {
  1096. // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
  1097. Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
  1098. True = ZReg;
  1099. False = ZReg;
  1100. return true;
  1101. }
  1102. if (T == 0 && F == -1) {
  1103. // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
  1104. Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
  1105. True = ZReg;
  1106. False = ZReg;
  1107. return true;
  1108. }
  1109. }
  1110. if (TrueCst) {
  1111. int64_t T = TrueCst->Value.getSExtValue();
  1112. if (T == 1) {
  1113. // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
  1114. Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
  1115. True = False;
  1116. False = ZReg;
  1117. CC = AArch64CC::getInvertedCondCode(CC);
  1118. return true;
  1119. }
  1120. if (T == -1) {
  1121. // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
  1122. Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
  1123. True = False;
  1124. False = ZReg;
  1125. CC = AArch64CC::getInvertedCondCode(CC);
  1126. return true;
  1127. }
  1128. }
  1129. if (FalseCst) {
  1130. int64_t F = FalseCst->Value.getSExtValue();
  1131. if (F == 1) {
  1132. // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
  1133. Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
  1134. False = ZReg;
  1135. return true;
  1136. }
  1137. if (F == -1) {
  1138. // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
  1139. Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
  1140. False = ZReg;
  1141. return true;
  1142. }
  1143. }
  1144. return false;
  1145. };
  1146. Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
  1147. Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
  1148. Optimized |= TryOptSelectCst();
  1149. auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
  1150. constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
  1151. return &*SelectInst;
  1152. }
  1153. static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
  1154. switch (P) {
  1155. default:
  1156. llvm_unreachable("Unknown condition code!");
  1157. case CmpInst::ICMP_NE:
  1158. return AArch64CC::NE;
  1159. case CmpInst::ICMP_EQ:
  1160. return AArch64CC::EQ;
  1161. case CmpInst::ICMP_SGT:
  1162. return AArch64CC::GT;
  1163. case CmpInst::ICMP_SGE:
  1164. return AArch64CC::GE;
  1165. case CmpInst::ICMP_SLT:
  1166. return AArch64CC::LT;
  1167. case CmpInst::ICMP_SLE:
  1168. return AArch64CC::LE;
  1169. case CmpInst::ICMP_UGT:
  1170. return AArch64CC::HI;
  1171. case CmpInst::ICMP_UGE:
  1172. return AArch64CC::HS;
  1173. case CmpInst::ICMP_ULT:
  1174. return AArch64CC::LO;
  1175. case CmpInst::ICMP_ULE:
  1176. return AArch64CC::LS;
  1177. }
  1178. }
  1179. /// Return a register which can be used as a bit to test in a TB(N)Z.
  1180. static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
  1181. MachineRegisterInfo &MRI) {
  1182. assert(Reg.isValid() && "Expected valid register!");
  1183. bool HasZext = false;
  1184. while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
  1185. unsigned Opc = MI->getOpcode();
  1186. if (!MI->getOperand(0).isReg() ||
  1187. !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
  1188. break;
  1189. // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
  1190. //
  1191. // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
  1192. // on the truncated x is the same as the bit number on x.
  1193. if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
  1194. Opc == TargetOpcode::G_TRUNC) {
  1195. if (Opc == TargetOpcode::G_ZEXT)
  1196. HasZext = true;
  1197. Register NextReg = MI->getOperand(1).getReg();
  1198. // Did we find something worth folding?
  1199. if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
  1200. break;
  1201. // NextReg is worth folding. Keep looking.
  1202. Reg = NextReg;
  1203. continue;
  1204. }
  1205. // Attempt to find a suitable operation with a constant on one side.
  1206. Optional<uint64_t> C;
  1207. Register TestReg;
  1208. switch (Opc) {
  1209. default:
  1210. break;
  1211. case TargetOpcode::G_AND:
  1212. case TargetOpcode::G_XOR: {
  1213. TestReg = MI->getOperand(1).getReg();
  1214. Register ConstantReg = MI->getOperand(2).getReg();
  1215. auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
  1216. if (!VRegAndVal) {
  1217. // AND commutes, check the other side for a constant.
  1218. // FIXME: Can we canonicalize the constant so that it's always on the
  1219. // same side at some point earlier?
  1220. std::swap(ConstantReg, TestReg);
  1221. VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
  1222. }
  1223. if (VRegAndVal) {
  1224. if (HasZext)
  1225. C = VRegAndVal->Value.getZExtValue();
  1226. else
  1227. C = VRegAndVal->Value.getSExtValue();
  1228. }
  1229. break;
  1230. }
  1231. case TargetOpcode::G_ASHR:
  1232. case TargetOpcode::G_LSHR:
  1233. case TargetOpcode::G_SHL: {
  1234. TestReg = MI->getOperand(1).getReg();
  1235. auto VRegAndVal =
  1236. getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
  1237. if (VRegAndVal)
  1238. C = VRegAndVal->Value.getSExtValue();
  1239. break;
  1240. }
  1241. }
  1242. // Didn't find a constant or viable register. Bail out of the loop.
  1243. if (!C || !TestReg.isValid())
  1244. break;
  1245. // We found a suitable instruction with a constant. Check to see if we can
  1246. // walk through the instruction.
  1247. Register NextReg;
  1248. unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
  1249. switch (Opc) {
  1250. default:
  1251. break;
  1252. case TargetOpcode::G_AND:
  1253. // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
  1254. if ((*C >> Bit) & 1)
  1255. NextReg = TestReg;
  1256. break;
  1257. case TargetOpcode::G_SHL:
  1258. // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
  1259. // the type of the register.
  1260. if (*C <= Bit && (Bit - *C) < TestRegSize) {
  1261. NextReg = TestReg;
  1262. Bit = Bit - *C;
  1263. }
  1264. break;
  1265. case TargetOpcode::G_ASHR:
  1266. // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
  1267. // in x
  1268. NextReg = TestReg;
  1269. Bit = Bit + *C;
  1270. if (Bit >= TestRegSize)
  1271. Bit = TestRegSize - 1;
  1272. break;
  1273. case TargetOpcode::G_LSHR:
  1274. // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
  1275. if ((Bit + *C) < TestRegSize) {
  1276. NextReg = TestReg;
  1277. Bit = Bit + *C;
  1278. }
  1279. break;
  1280. case TargetOpcode::G_XOR:
  1281. // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
  1282. // appropriate.
  1283. //
  1284. // e.g. If x' = xor x, c, and the b-th bit is set in c then
  1285. //
  1286. // tbz x', b -> tbnz x, b
  1287. //
  1288. // Because x' only has the b-th bit set if x does not.
  1289. if ((*C >> Bit) & 1)
  1290. Invert = !Invert;
  1291. NextReg = TestReg;
  1292. break;
  1293. }
  1294. // Check if we found anything worth folding.
  1295. if (!NextReg.isValid())
  1296. return Reg;
  1297. Reg = NextReg;
  1298. }
  1299. return Reg;
  1300. }
  1301. MachineInstr *AArch64InstructionSelector::emitTestBit(
  1302. Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
  1303. MachineIRBuilder &MIB) const {
  1304. assert(TestReg.isValid());
  1305. assert(ProduceNonFlagSettingCondBr &&
  1306. "Cannot emit TB(N)Z with speculation tracking!");
  1307. MachineRegisterInfo &MRI = *MIB.getMRI();
  1308. // Attempt to optimize the test bit by walking over instructions.
  1309. TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
  1310. LLT Ty = MRI.getType(TestReg);
  1311. unsigned Size = Ty.getSizeInBits();
  1312. assert(!Ty.isVector() && "Expected a scalar!");
  1313. assert(Bit < 64 && "Bit is too large!");
  1314. // When the test register is a 64-bit register, we have to narrow to make
  1315. // TBNZW work.
  1316. bool UseWReg = Bit < 32;
  1317. unsigned NecessarySize = UseWReg ? 32 : 64;
  1318. if (Size != NecessarySize)
  1319. TestReg = moveScalarRegClass(
  1320. TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
  1321. MIB);
  1322. static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
  1323. {AArch64::TBZW, AArch64::TBNZW}};
  1324. unsigned Opc = OpcTable[UseWReg][IsNegative];
  1325. auto TestBitMI =
  1326. MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
  1327. constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
  1328. return &*TestBitMI;
  1329. }
  1330. bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
  1331. MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
  1332. MachineIRBuilder &MIB) const {
  1333. assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
  1334. // Given something like this:
  1335. //
  1336. // %x = ...Something...
  1337. // %one = G_CONSTANT i64 1
  1338. // %zero = G_CONSTANT i64 0
  1339. // %and = G_AND %x, %one
  1340. // %cmp = G_ICMP intpred(ne), %and, %zero
  1341. // %cmp_trunc = G_TRUNC %cmp
  1342. // G_BRCOND %cmp_trunc, %bb.3
  1343. //
  1344. // We want to try and fold the AND into the G_BRCOND and produce either a
  1345. // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
  1346. //
  1347. // In this case, we'd get
  1348. //
  1349. // TBNZ %x %bb.3
  1350. //
  1351. // Check if the AND has a constant on its RHS which we can use as a mask.
  1352. // If it's a power of 2, then it's the same as checking a specific bit.
  1353. // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
  1354. auto MaybeBit = getIConstantVRegValWithLookThrough(
  1355. AndInst.getOperand(2).getReg(), *MIB.getMRI());
  1356. if (!MaybeBit)
  1357. return false;
  1358. int32_t Bit = MaybeBit->Value.exactLogBase2();
  1359. if (Bit < 0)
  1360. return false;
  1361. Register TestReg = AndInst.getOperand(1).getReg();
  1362. // Emit a TB(N)Z.
  1363. emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
  1364. return true;
  1365. }
  1366. MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
  1367. bool IsNegative,
  1368. MachineBasicBlock *DestMBB,
  1369. MachineIRBuilder &MIB) const {
  1370. assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
  1371. MachineRegisterInfo &MRI = *MIB.getMRI();
  1372. assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
  1373. AArch64::GPRRegBankID &&
  1374. "Expected GPRs only?");
  1375. auto Ty = MRI.getType(CompareReg);
  1376. unsigned Width = Ty.getSizeInBits();
  1377. assert(!Ty.isVector() && "Expected scalar only?");
  1378. assert(Width <= 64 && "Expected width to be at most 64?");
  1379. static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
  1380. {AArch64::CBNZW, AArch64::CBNZX}};
  1381. unsigned Opc = OpcTable[IsNegative][Width == 64];
  1382. auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
  1383. constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
  1384. return &*BranchMI;
  1385. }
  1386. bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
  1387. MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
  1388. assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
  1389. assert(I.getOpcode() == TargetOpcode::G_BRCOND);
  1390. // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
  1391. // totally clean. Some of them require two branches to implement.
  1392. auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
  1393. emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
  1394. Pred);
  1395. AArch64CC::CondCode CC1, CC2;
  1396. changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
  1397. MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
  1398. MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
  1399. if (CC2 != AArch64CC::AL)
  1400. MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
  1401. I.eraseFromParent();
  1402. return true;
  1403. }
  1404. bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
  1405. MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
  1406. assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
  1407. assert(I.getOpcode() == TargetOpcode::G_BRCOND);
  1408. // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
  1409. //
  1410. // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
  1411. // instructions will not be produced, as they are conditional branch
  1412. // instructions that do not set flags.
  1413. if (!ProduceNonFlagSettingCondBr)
  1414. return false;
  1415. MachineRegisterInfo &MRI = *MIB.getMRI();
  1416. MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
  1417. auto Pred =
  1418. static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
  1419. Register LHS = ICmp.getOperand(2).getReg();
  1420. Register RHS = ICmp.getOperand(3).getReg();
  1421. // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
  1422. auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
  1423. MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
  1424. // When we can emit a TB(N)Z, prefer that.
  1425. //
  1426. // Handle non-commutative condition codes first.
  1427. // Note that we don't want to do this when we have a G_AND because it can
  1428. // become a tst. The tst will make the test bit in the TB(N)Z redundant.
  1429. if (VRegAndVal && !AndInst) {
  1430. int64_t C = VRegAndVal->Value.getSExtValue();
  1431. // When we have a greater-than comparison, we can just test if the msb is
  1432. // zero.
  1433. if (C == -1 && Pred == CmpInst::ICMP_SGT) {
  1434. uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
  1435. emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
  1436. I.eraseFromParent();
  1437. return true;
  1438. }
  1439. // When we have a less than comparison, we can just test if the msb is not
  1440. // zero.
  1441. if (C == 0 && Pred == CmpInst::ICMP_SLT) {
  1442. uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
  1443. emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
  1444. I.eraseFromParent();
  1445. return true;
  1446. }
  1447. }
  1448. // Attempt to handle commutative condition codes. Right now, that's only
  1449. // eq/ne.
  1450. if (ICmpInst::isEquality(Pred)) {
  1451. if (!VRegAndVal) {
  1452. std::swap(RHS, LHS);
  1453. VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
  1454. AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
  1455. }
  1456. if (VRegAndVal && VRegAndVal->Value == 0) {
  1457. // If there's a G_AND feeding into this branch, try to fold it away by
  1458. // emitting a TB(N)Z instead.
  1459. //
  1460. // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
  1461. // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
  1462. // would be redundant.
  1463. if (AndInst &&
  1464. tryOptAndIntoCompareBranch(
  1465. *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
  1466. I.eraseFromParent();
  1467. return true;
  1468. }
  1469. // Otherwise, try to emit a CB(N)Z instead.
  1470. auto LHSTy = MRI.getType(LHS);
  1471. if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
  1472. emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
  1473. I.eraseFromParent();
  1474. return true;
  1475. }
  1476. }
  1477. }
  1478. return false;
  1479. }
  1480. bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
  1481. MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
  1482. assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
  1483. assert(I.getOpcode() == TargetOpcode::G_BRCOND);
  1484. if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
  1485. return true;
  1486. // Couldn't optimize. Emit a compare + a Bcc.
  1487. MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
  1488. auto PredOp = ICmp.getOperand(1);
  1489. emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
  1490. const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
  1491. static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
  1492. MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
  1493. I.eraseFromParent();
  1494. return true;
  1495. }
  1496. bool AArch64InstructionSelector::selectCompareBranch(
  1497. MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
  1498. Register CondReg = I.getOperand(0).getReg();
  1499. MachineInstr *CCMI = MRI.getVRegDef(CondReg);
  1500. if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
  1501. CondReg = CCMI->getOperand(1).getReg();
  1502. CCMI = MRI.getVRegDef(CondReg);
  1503. }
  1504. // Try to select the G_BRCOND using whatever is feeding the condition if
  1505. // possible.
  1506. unsigned CCMIOpc = CCMI->getOpcode();
  1507. if (CCMIOpc == TargetOpcode::G_FCMP)
  1508. return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
  1509. if (CCMIOpc == TargetOpcode::G_ICMP)
  1510. return selectCompareBranchFedByICmp(I, *CCMI, MIB);
  1511. // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
  1512. // instructions will not be produced, as they are conditional branch
  1513. // instructions that do not set flags.
  1514. if (ProduceNonFlagSettingCondBr) {
  1515. emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
  1516. I.getOperand(1).getMBB(), MIB);
  1517. I.eraseFromParent();
  1518. return true;
  1519. }
  1520. // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
  1521. auto TstMI =
  1522. MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
  1523. constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
  1524. auto Bcc = MIB.buildInstr(AArch64::Bcc)
  1525. .addImm(AArch64CC::EQ)
  1526. .addMBB(I.getOperand(1).getMBB());
  1527. I.eraseFromParent();
  1528. return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
  1529. }
  1530. /// Returns the element immediate value of a vector shift operand if found.
  1531. /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
  1532. static Optional<int64_t> getVectorShiftImm(Register Reg,
  1533. MachineRegisterInfo &MRI) {
  1534. assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
  1535. MachineInstr *OpMI = MRI.getVRegDef(Reg);
  1536. assert(OpMI && "Expected to find a vreg def for vector shift operand");
  1537. return getAArch64VectorSplatScalar(*OpMI, MRI);
  1538. }
  1539. /// Matches and returns the shift immediate value for a SHL instruction given
  1540. /// a shift operand.
  1541. static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
  1542. Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
  1543. if (!ShiftImm)
  1544. return None;
  1545. // Check the immediate is in range for a SHL.
  1546. int64_t Imm = *ShiftImm;
  1547. if (Imm < 0)
  1548. return None;
  1549. switch (SrcTy.getElementType().getSizeInBits()) {
  1550. default:
  1551. LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
  1552. return None;
  1553. case 8:
  1554. if (Imm > 7)
  1555. return None;
  1556. break;
  1557. case 16:
  1558. if (Imm > 15)
  1559. return None;
  1560. break;
  1561. case 32:
  1562. if (Imm > 31)
  1563. return None;
  1564. break;
  1565. case 64:
  1566. if (Imm > 63)
  1567. return None;
  1568. break;
  1569. }
  1570. return Imm;
  1571. }
  1572. bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
  1573. MachineRegisterInfo &MRI) {
  1574. assert(I.getOpcode() == TargetOpcode::G_SHL);
  1575. Register DstReg = I.getOperand(0).getReg();
  1576. const LLT Ty = MRI.getType(DstReg);
  1577. Register Src1Reg = I.getOperand(1).getReg();
  1578. Register Src2Reg = I.getOperand(2).getReg();
  1579. if (!Ty.isVector())
  1580. return false;
  1581. // Check if we have a vector of constants on RHS that we can select as the
  1582. // immediate form.
  1583. Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
  1584. unsigned Opc = 0;
  1585. if (Ty == LLT::fixed_vector(2, 64)) {
  1586. Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
  1587. } else if (Ty == LLT::fixed_vector(4, 32)) {
  1588. Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
  1589. } else if (Ty == LLT::fixed_vector(2, 32)) {
  1590. Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
  1591. } else if (Ty == LLT::fixed_vector(4, 16)) {
  1592. Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
  1593. } else if (Ty == LLT::fixed_vector(8, 16)) {
  1594. Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
  1595. } else if (Ty == LLT::fixed_vector(16, 8)) {
  1596. Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
  1597. } else if (Ty == LLT::fixed_vector(8, 8)) {
  1598. Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
  1599. } else {
  1600. LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
  1601. return false;
  1602. }
  1603. auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
  1604. if (ImmVal)
  1605. Shl.addImm(*ImmVal);
  1606. else
  1607. Shl.addUse(Src2Reg);
  1608. constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
  1609. I.eraseFromParent();
  1610. return true;
  1611. }
  1612. bool AArch64InstructionSelector::selectVectorAshrLshr(
  1613. MachineInstr &I, MachineRegisterInfo &MRI) {
  1614. assert(I.getOpcode() == TargetOpcode::G_ASHR ||
  1615. I.getOpcode() == TargetOpcode::G_LSHR);
  1616. Register DstReg = I.getOperand(0).getReg();
  1617. const LLT Ty = MRI.getType(DstReg);
  1618. Register Src1Reg = I.getOperand(1).getReg();
  1619. Register Src2Reg = I.getOperand(2).getReg();
  1620. if (!Ty.isVector())
  1621. return false;
  1622. bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
  1623. // We expect the immediate case to be lowered in the PostLegalCombiner to
  1624. // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
  1625. // There is not a shift right register instruction, but the shift left
  1626. // register instruction takes a signed value, where negative numbers specify a
  1627. // right shift.
  1628. unsigned Opc = 0;
  1629. unsigned NegOpc = 0;
  1630. const TargetRegisterClass *RC =
  1631. getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
  1632. if (Ty == LLT::fixed_vector(2, 64)) {
  1633. Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
  1634. NegOpc = AArch64::NEGv2i64;
  1635. } else if (Ty == LLT::fixed_vector(4, 32)) {
  1636. Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
  1637. NegOpc = AArch64::NEGv4i32;
  1638. } else if (Ty == LLT::fixed_vector(2, 32)) {
  1639. Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
  1640. NegOpc = AArch64::NEGv2i32;
  1641. } else if (Ty == LLT::fixed_vector(4, 16)) {
  1642. Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
  1643. NegOpc = AArch64::NEGv4i16;
  1644. } else if (Ty == LLT::fixed_vector(8, 16)) {
  1645. Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
  1646. NegOpc = AArch64::NEGv8i16;
  1647. } else if (Ty == LLT::fixed_vector(16, 8)) {
  1648. Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
  1649. NegOpc = AArch64::NEGv16i8;
  1650. } else if (Ty == LLT::fixed_vector(8, 8)) {
  1651. Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
  1652. NegOpc = AArch64::NEGv8i8;
  1653. } else {
  1654. LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
  1655. return false;
  1656. }
  1657. auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
  1658. constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
  1659. auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
  1660. constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
  1661. I.eraseFromParent();
  1662. return true;
  1663. }
  1664. bool AArch64InstructionSelector::selectVaStartAAPCS(
  1665. MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
  1666. return false;
  1667. }
  1668. bool AArch64InstructionSelector::selectVaStartDarwin(
  1669. MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
  1670. AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
  1671. Register ListReg = I.getOperand(0).getReg();
  1672. Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
  1673. auto MIB =
  1674. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
  1675. .addDef(ArgsAddrReg)
  1676. .addFrameIndex(FuncInfo->getVarArgsStackIndex())
  1677. .addImm(0)
  1678. .addImm(0);
  1679. constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
  1680. MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
  1681. .addUse(ArgsAddrReg)
  1682. .addUse(ListReg)
  1683. .addImm(0)
  1684. .addMemOperand(*I.memoperands_begin());
  1685. constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
  1686. I.eraseFromParent();
  1687. return true;
  1688. }
  1689. void AArch64InstructionSelector::materializeLargeCMVal(
  1690. MachineInstr &I, const Value *V, unsigned OpFlags) {
  1691. MachineBasicBlock &MBB = *I.getParent();
  1692. MachineFunction &MF = *MBB.getParent();
  1693. MachineRegisterInfo &MRI = MF.getRegInfo();
  1694. auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
  1695. MovZ->addOperand(MF, I.getOperand(1));
  1696. MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
  1697. AArch64II::MO_NC);
  1698. MovZ->addOperand(MF, MachineOperand::CreateImm(0));
  1699. constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
  1700. auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
  1701. Register ForceDstReg) {
  1702. Register DstReg = ForceDstReg
  1703. ? ForceDstReg
  1704. : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
  1705. auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
  1706. if (auto *GV = dyn_cast<GlobalValue>(V)) {
  1707. MovI->addOperand(MF, MachineOperand::CreateGA(
  1708. GV, MovZ->getOperand(1).getOffset(), Flags));
  1709. } else {
  1710. MovI->addOperand(
  1711. MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
  1712. MovZ->getOperand(1).getOffset(), Flags));
  1713. }
  1714. MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
  1715. constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
  1716. return DstReg;
  1717. };
  1718. Register DstReg = BuildMovK(MovZ.getReg(0),
  1719. AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
  1720. DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
  1721. BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
  1722. }
  1723. bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
  1724. MachineBasicBlock &MBB = *I.getParent();
  1725. MachineFunction &MF = *MBB.getParent();
  1726. MachineRegisterInfo &MRI = MF.getRegInfo();
  1727. switch (I.getOpcode()) {
  1728. case TargetOpcode::G_STORE: {
  1729. bool Changed = contractCrossBankCopyIntoStore(I, MRI);
  1730. MachineOperand &SrcOp = I.getOperand(0);
  1731. if (MRI.getType(SrcOp.getReg()).isPointer()) {
  1732. // Allow matching with imported patterns for stores of pointers. Unlike
  1733. // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
  1734. // and constrain.
  1735. auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
  1736. Register NewSrc = Copy.getReg(0);
  1737. SrcOp.setReg(NewSrc);
  1738. RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
  1739. Changed = true;
  1740. }
  1741. return Changed;
  1742. }
  1743. case TargetOpcode::G_PTR_ADD:
  1744. return convertPtrAddToAdd(I, MRI);
  1745. case TargetOpcode::G_LOAD: {
  1746. // For scalar loads of pointers, we try to convert the dest type from p0
  1747. // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
  1748. // conversion, this should be ok because all users should have been
  1749. // selected already, so the type doesn't matter for them.
  1750. Register DstReg = I.getOperand(0).getReg();
  1751. const LLT DstTy = MRI.getType(DstReg);
  1752. if (!DstTy.isPointer())
  1753. return false;
  1754. MRI.setType(DstReg, LLT::scalar(64));
  1755. return true;
  1756. }
  1757. case AArch64::G_DUP: {
  1758. // Convert the type from p0 to s64 to help selection.
  1759. LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  1760. if (!DstTy.getElementType().isPointer())
  1761. return false;
  1762. auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
  1763. MRI.setType(I.getOperand(0).getReg(),
  1764. DstTy.changeElementType(LLT::scalar(64)));
  1765. MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
  1766. I.getOperand(1).setReg(NewSrc.getReg(0));
  1767. return true;
  1768. }
  1769. case TargetOpcode::G_UITOFP:
  1770. case TargetOpcode::G_SITOFP: {
  1771. // If both source and destination regbanks are FPR, then convert the opcode
  1772. // to G_SITOF so that the importer can select it to an fpr variant.
  1773. // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
  1774. // copy.
  1775. Register SrcReg = I.getOperand(1).getReg();
  1776. LLT SrcTy = MRI.getType(SrcReg);
  1777. LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  1778. if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
  1779. return false;
  1780. if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
  1781. if (I.getOpcode() == TargetOpcode::G_SITOFP)
  1782. I.setDesc(TII.get(AArch64::G_SITOF));
  1783. else
  1784. I.setDesc(TII.get(AArch64::G_UITOF));
  1785. return true;
  1786. }
  1787. return false;
  1788. }
  1789. default:
  1790. return false;
  1791. }
  1792. }
  1793. /// This lowering tries to look for G_PTR_ADD instructions and then converts
  1794. /// them to a standard G_ADD with a COPY on the source.
  1795. ///
  1796. /// The motivation behind this is to expose the add semantics to the imported
  1797. /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
  1798. /// because the selector works bottom up, uses before defs. By the time we
  1799. /// end up trying to select a G_PTR_ADD, we should have already attempted to
  1800. /// fold this into addressing modes and were therefore unsuccessful.
  1801. bool AArch64InstructionSelector::convertPtrAddToAdd(
  1802. MachineInstr &I, MachineRegisterInfo &MRI) {
  1803. assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
  1804. Register DstReg = I.getOperand(0).getReg();
  1805. Register AddOp1Reg = I.getOperand(1).getReg();
  1806. const LLT PtrTy = MRI.getType(DstReg);
  1807. if (PtrTy.getAddressSpace() != 0)
  1808. return false;
  1809. const LLT CastPtrTy =
  1810. PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
  1811. auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
  1812. // Set regbanks on the registers.
  1813. if (PtrTy.isVector())
  1814. MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
  1815. else
  1816. MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
  1817. // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
  1818. // %dst(intty) = G_ADD %intbase, off
  1819. I.setDesc(TII.get(TargetOpcode::G_ADD));
  1820. MRI.setType(DstReg, CastPtrTy);
  1821. I.getOperand(1).setReg(PtrToInt.getReg(0));
  1822. if (!select(*PtrToInt)) {
  1823. LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
  1824. return false;
  1825. }
  1826. // Also take the opportunity here to try to do some optimization.
  1827. // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
  1828. Register NegatedReg;
  1829. if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
  1830. return true;
  1831. I.getOperand(2).setReg(NegatedReg);
  1832. I.setDesc(TII.get(TargetOpcode::G_SUB));
  1833. return true;
  1834. }
  1835. bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
  1836. MachineRegisterInfo &MRI) {
  1837. // We try to match the immediate variant of LSL, which is actually an alias
  1838. // for a special case of UBFM. Otherwise, we fall back to the imported
  1839. // selector which will match the register variant.
  1840. assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
  1841. const auto &MO = I.getOperand(2);
  1842. auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
  1843. if (!VRegAndVal)
  1844. return false;
  1845. const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  1846. if (DstTy.isVector())
  1847. return false;
  1848. bool Is64Bit = DstTy.getSizeInBits() == 64;
  1849. auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
  1850. auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
  1851. if (!Imm1Fn || !Imm2Fn)
  1852. return false;
  1853. auto NewI =
  1854. MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
  1855. {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
  1856. for (auto &RenderFn : *Imm1Fn)
  1857. RenderFn(NewI);
  1858. for (auto &RenderFn : *Imm2Fn)
  1859. RenderFn(NewI);
  1860. I.eraseFromParent();
  1861. return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
  1862. }
  1863. bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
  1864. MachineInstr &I, MachineRegisterInfo &MRI) {
  1865. assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
  1866. // If we're storing a scalar, it doesn't matter what register bank that
  1867. // scalar is on. All that matters is the size.
  1868. //
  1869. // So, if we see something like this (with a 32-bit scalar as an example):
  1870. //
  1871. // %x:gpr(s32) = ... something ...
  1872. // %y:fpr(s32) = COPY %x:gpr(s32)
  1873. // G_STORE %y:fpr(s32)
  1874. //
  1875. // We can fix this up into something like this:
  1876. //
  1877. // G_STORE %x:gpr(s32)
  1878. //
  1879. // And then continue the selection process normally.
  1880. Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
  1881. if (!DefDstReg.isValid())
  1882. return false;
  1883. LLT DefDstTy = MRI.getType(DefDstReg);
  1884. Register StoreSrcReg = I.getOperand(0).getReg();
  1885. LLT StoreSrcTy = MRI.getType(StoreSrcReg);
  1886. // If we get something strange like a physical register, then we shouldn't
  1887. // go any further.
  1888. if (!DefDstTy.isValid())
  1889. return false;
  1890. // Are the source and dst types the same size?
  1891. if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
  1892. return false;
  1893. if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
  1894. RBI.getRegBank(DefDstReg, MRI, TRI))
  1895. return false;
  1896. // We have a cross-bank copy, which is entering a store. Let's fold it.
  1897. I.getOperand(0).setReg(DefDstReg);
  1898. return true;
  1899. }
  1900. bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
  1901. assert(I.getParent() && "Instruction should be in a basic block!");
  1902. assert(I.getParent()->getParent() && "Instruction should be in a function!");
  1903. MachineBasicBlock &MBB = *I.getParent();
  1904. MachineFunction &MF = *MBB.getParent();
  1905. MachineRegisterInfo &MRI = MF.getRegInfo();
  1906. switch (I.getOpcode()) {
  1907. case AArch64::G_DUP: {
  1908. // Before selecting a DUP instruction, check if it is better selected as a
  1909. // MOV or load from a constant pool.
  1910. Register Src = I.getOperand(1).getReg();
  1911. auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
  1912. if (!ValAndVReg)
  1913. return false;
  1914. LLVMContext &Ctx = MF.getFunction().getContext();
  1915. Register Dst = I.getOperand(0).getReg();
  1916. auto *CV = ConstantDataVector::getSplat(
  1917. MRI.getType(Dst).getNumElements(),
  1918. ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
  1919. ValAndVReg->Value));
  1920. if (!emitConstantVector(Dst, CV, MIB, MRI))
  1921. return false;
  1922. I.eraseFromParent();
  1923. return true;
  1924. }
  1925. case TargetOpcode::G_SEXT:
  1926. // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
  1927. // over a normal extend.
  1928. if (selectUSMovFromExtend(I, MRI))
  1929. return true;
  1930. return false;
  1931. case TargetOpcode::G_BR:
  1932. return false;
  1933. case TargetOpcode::G_SHL:
  1934. return earlySelectSHL(I, MRI);
  1935. case TargetOpcode::G_CONSTANT: {
  1936. bool IsZero = false;
  1937. if (I.getOperand(1).isCImm())
  1938. IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
  1939. else if (I.getOperand(1).isImm())
  1940. IsZero = I.getOperand(1).getImm() == 0;
  1941. if (!IsZero)
  1942. return false;
  1943. Register DefReg = I.getOperand(0).getReg();
  1944. LLT Ty = MRI.getType(DefReg);
  1945. if (Ty.getSizeInBits() == 64) {
  1946. I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
  1947. RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
  1948. } else if (Ty.getSizeInBits() == 32) {
  1949. I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
  1950. RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
  1951. } else
  1952. return false;
  1953. I.setDesc(TII.get(TargetOpcode::COPY));
  1954. return true;
  1955. }
  1956. case TargetOpcode::G_ADD: {
  1957. // Check if this is being fed by a G_ICMP on either side.
  1958. //
  1959. // (cmp pred, x, y) + z
  1960. //
  1961. // In the above case, when the cmp is true, we increment z by 1. So, we can
  1962. // fold the add into the cset for the cmp by using cinc.
  1963. //
  1964. // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
  1965. Register AddDst = I.getOperand(0).getReg();
  1966. Register AddLHS = I.getOperand(1).getReg();
  1967. Register AddRHS = I.getOperand(2).getReg();
  1968. // Only handle scalars.
  1969. LLT Ty = MRI.getType(AddLHS);
  1970. if (Ty.isVector())
  1971. return false;
  1972. // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
  1973. // bits.
  1974. unsigned Size = Ty.getSizeInBits();
  1975. if (Size != 32 && Size != 64)
  1976. return false;
  1977. auto MatchCmp = [&](Register Reg) -> MachineInstr * {
  1978. if (!MRI.hasOneNonDBGUse(Reg))
  1979. return nullptr;
  1980. // If the LHS of the add is 32 bits, then we want to fold a 32-bit
  1981. // compare.
  1982. if (Size == 32)
  1983. return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
  1984. // We model scalar compares using 32-bit destinations right now.
  1985. // If it's a 64-bit compare, it'll have 64-bit sources.
  1986. Register ZExt;
  1987. if (!mi_match(Reg, MRI,
  1988. m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
  1989. return nullptr;
  1990. auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
  1991. if (!Cmp ||
  1992. MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
  1993. return nullptr;
  1994. return Cmp;
  1995. };
  1996. // Try to match
  1997. // z + (cmp pred, x, y)
  1998. MachineInstr *Cmp = MatchCmp(AddRHS);
  1999. if (!Cmp) {
  2000. // (cmp pred, x, y) + z
  2001. std::swap(AddLHS, AddRHS);
  2002. Cmp = MatchCmp(AddRHS);
  2003. if (!Cmp)
  2004. return false;
  2005. }
  2006. auto &PredOp = Cmp->getOperand(1);
  2007. auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
  2008. const AArch64CC::CondCode InvCC =
  2009. changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
  2010. MIB.setInstrAndDebugLoc(I);
  2011. emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
  2012. /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
  2013. emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
  2014. I.eraseFromParent();
  2015. return true;
  2016. }
  2017. case TargetOpcode::G_OR: {
  2018. // Look for operations that take the lower `Width=Size-ShiftImm` bits of
  2019. // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
  2020. // shifting and masking that we can replace with a BFI (encoded as a BFM).
  2021. Register Dst = I.getOperand(0).getReg();
  2022. LLT Ty = MRI.getType(Dst);
  2023. if (!Ty.isScalar())
  2024. return false;
  2025. unsigned Size = Ty.getSizeInBits();
  2026. if (Size != 32 && Size != 64)
  2027. return false;
  2028. Register ShiftSrc;
  2029. int64_t ShiftImm;
  2030. Register MaskSrc;
  2031. int64_t MaskImm;
  2032. if (!mi_match(
  2033. Dst, MRI,
  2034. m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
  2035. m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
  2036. return false;
  2037. if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
  2038. return false;
  2039. int64_t Immr = Size - ShiftImm;
  2040. int64_t Imms = Size - ShiftImm - 1;
  2041. unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
  2042. emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
  2043. I.eraseFromParent();
  2044. return true;
  2045. }
  2046. default:
  2047. return false;
  2048. }
  2049. }
  2050. bool AArch64InstructionSelector::select(MachineInstr &I) {
  2051. assert(I.getParent() && "Instruction should be in a basic block!");
  2052. assert(I.getParent()->getParent() && "Instruction should be in a function!");
  2053. MachineBasicBlock &MBB = *I.getParent();
  2054. MachineFunction &MF = *MBB.getParent();
  2055. MachineRegisterInfo &MRI = MF.getRegInfo();
  2056. const AArch64Subtarget *Subtarget =
  2057. &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
  2058. if (Subtarget->requiresStrictAlign()) {
  2059. // We don't support this feature yet.
  2060. LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
  2061. return false;
  2062. }
  2063. MIB.setInstrAndDebugLoc(I);
  2064. unsigned Opcode = I.getOpcode();
  2065. // G_PHI requires same handling as PHI
  2066. if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
  2067. // Certain non-generic instructions also need some special handling.
  2068. if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
  2069. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2070. if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
  2071. const Register DefReg = I.getOperand(0).getReg();
  2072. const LLT DefTy = MRI.getType(DefReg);
  2073. const RegClassOrRegBank &RegClassOrBank =
  2074. MRI.getRegClassOrRegBank(DefReg);
  2075. const TargetRegisterClass *DefRC
  2076. = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
  2077. if (!DefRC) {
  2078. if (!DefTy.isValid()) {
  2079. LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
  2080. return false;
  2081. }
  2082. const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
  2083. DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
  2084. if (!DefRC) {
  2085. LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
  2086. return false;
  2087. }
  2088. }
  2089. I.setDesc(TII.get(TargetOpcode::PHI));
  2090. return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
  2091. }
  2092. if (I.isCopy())
  2093. return selectCopy(I, TII, MRI, TRI, RBI);
  2094. return true;
  2095. }
  2096. if (I.getNumOperands() != I.getNumExplicitOperands()) {
  2097. LLVM_DEBUG(
  2098. dbgs() << "Generic instruction has unexpected implicit operands\n");
  2099. return false;
  2100. }
  2101. // Try to do some lowering before we start instruction selecting. These
  2102. // lowerings are purely transformations on the input G_MIR and so selection
  2103. // must continue after any modification of the instruction.
  2104. if (preISelLower(I)) {
  2105. Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
  2106. }
  2107. // There may be patterns where the importer can't deal with them optimally,
  2108. // but does select it to a suboptimal sequence so our custom C++ selection
  2109. // code later never has a chance to work on it. Therefore, we have an early
  2110. // selection attempt here to give priority to certain selection routines
  2111. // over the imported ones.
  2112. if (earlySelect(I))
  2113. return true;
  2114. if (selectImpl(I, *CoverageInfo))
  2115. return true;
  2116. LLT Ty =
  2117. I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
  2118. switch (Opcode) {
  2119. case TargetOpcode::G_SBFX:
  2120. case TargetOpcode::G_UBFX: {
  2121. static const unsigned OpcTable[2][2] = {
  2122. {AArch64::UBFMWri, AArch64::UBFMXri},
  2123. {AArch64::SBFMWri, AArch64::SBFMXri}};
  2124. bool IsSigned = Opcode == TargetOpcode::G_SBFX;
  2125. unsigned Size = Ty.getSizeInBits();
  2126. unsigned Opc = OpcTable[IsSigned][Size == 64];
  2127. auto Cst1 =
  2128. getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
  2129. assert(Cst1 && "Should have gotten a constant for src 1?");
  2130. auto Cst2 =
  2131. getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
  2132. assert(Cst2 && "Should have gotten a constant for src 2?");
  2133. auto LSB = Cst1->Value.getZExtValue();
  2134. auto Width = Cst2->Value.getZExtValue();
  2135. auto BitfieldInst =
  2136. MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
  2137. .addImm(LSB)
  2138. .addImm(LSB + Width - 1);
  2139. I.eraseFromParent();
  2140. return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
  2141. }
  2142. case TargetOpcode::G_BRCOND:
  2143. return selectCompareBranch(I, MF, MRI);
  2144. case TargetOpcode::G_BRINDIRECT: {
  2145. I.setDesc(TII.get(AArch64::BR));
  2146. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2147. }
  2148. case TargetOpcode::G_BRJT:
  2149. return selectBrJT(I, MRI);
  2150. case AArch64::G_ADD_LOW: {
  2151. // This op may have been separated from it's ADRP companion by the localizer
  2152. // or some other code motion pass. Given that many CPUs will try to
  2153. // macro fuse these operations anyway, select this into a MOVaddr pseudo
  2154. // which will later be expanded into an ADRP+ADD pair after scheduling.
  2155. MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
  2156. if (BaseMI->getOpcode() != AArch64::ADRP) {
  2157. I.setDesc(TII.get(AArch64::ADDXri));
  2158. I.addOperand(MachineOperand::CreateImm(0));
  2159. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2160. }
  2161. assert(TM.getCodeModel() == CodeModel::Small &&
  2162. "Expected small code model");
  2163. auto Op1 = BaseMI->getOperand(1);
  2164. auto Op2 = I.getOperand(2);
  2165. auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
  2166. .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
  2167. Op1.getTargetFlags())
  2168. .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
  2169. Op2.getTargetFlags());
  2170. I.eraseFromParent();
  2171. return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
  2172. }
  2173. case TargetOpcode::G_BSWAP: {
  2174. // Handle vector types for G_BSWAP directly.
  2175. Register DstReg = I.getOperand(0).getReg();
  2176. LLT DstTy = MRI.getType(DstReg);
  2177. // We should only get vector types here; everything else is handled by the
  2178. // importer right now.
  2179. if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
  2180. LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
  2181. return false;
  2182. }
  2183. // Only handle 4 and 2 element vectors for now.
  2184. // TODO: 16-bit elements.
  2185. unsigned NumElts = DstTy.getNumElements();
  2186. if (NumElts != 4 && NumElts != 2) {
  2187. LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
  2188. return false;
  2189. }
  2190. // Choose the correct opcode for the supported types. Right now, that's
  2191. // v2s32, v4s32, and v2s64.
  2192. unsigned Opc = 0;
  2193. unsigned EltSize = DstTy.getElementType().getSizeInBits();
  2194. if (EltSize == 32)
  2195. Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
  2196. : AArch64::REV32v16i8;
  2197. else if (EltSize == 64)
  2198. Opc = AArch64::REV64v16i8;
  2199. // We should always get something by the time we get here...
  2200. assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
  2201. I.setDesc(TII.get(Opc));
  2202. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2203. }
  2204. case TargetOpcode::G_FCONSTANT:
  2205. case TargetOpcode::G_CONSTANT: {
  2206. const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
  2207. const LLT s8 = LLT::scalar(8);
  2208. const LLT s16 = LLT::scalar(16);
  2209. const LLT s32 = LLT::scalar(32);
  2210. const LLT s64 = LLT::scalar(64);
  2211. const LLT s128 = LLT::scalar(128);
  2212. const LLT p0 = LLT::pointer(0, 64);
  2213. const Register DefReg = I.getOperand(0).getReg();
  2214. const LLT DefTy = MRI.getType(DefReg);
  2215. const unsigned DefSize = DefTy.getSizeInBits();
  2216. const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
  2217. // FIXME: Redundant check, but even less readable when factored out.
  2218. if (isFP) {
  2219. if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
  2220. LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
  2221. << " constant, expected: " << s16 << " or " << s32
  2222. << " or " << s64 << " or " << s128 << '\n');
  2223. return false;
  2224. }
  2225. if (RB.getID() != AArch64::FPRRegBankID) {
  2226. LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
  2227. << " constant on bank: " << RB
  2228. << ", expected: FPR\n");
  2229. return false;
  2230. }
  2231. // The case when we have 0.0 is covered by tablegen. Reject it here so we
  2232. // can be sure tablegen works correctly and isn't rescued by this code.
  2233. // 0.0 is not covered by tablegen for FP128. So we will handle this
  2234. // scenario in the code here.
  2235. if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
  2236. return false;
  2237. } else {
  2238. // s32 and s64 are covered by tablegen.
  2239. if (Ty != p0 && Ty != s8 && Ty != s16) {
  2240. LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
  2241. << " constant, expected: " << s32 << ", " << s64
  2242. << ", or " << p0 << '\n');
  2243. return false;
  2244. }
  2245. if (RB.getID() != AArch64::GPRRegBankID) {
  2246. LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
  2247. << " constant on bank: " << RB
  2248. << ", expected: GPR\n");
  2249. return false;
  2250. }
  2251. }
  2252. if (isFP) {
  2253. const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
  2254. // For 16, 64, and 128b values, emit a constant pool load.
  2255. switch (DefSize) {
  2256. default:
  2257. llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
  2258. case 32:
  2259. // For s32, use a cp load if we have optsize/minsize.
  2260. if (!shouldOptForSize(&MF))
  2261. break;
  2262. LLVM_FALLTHROUGH;
  2263. case 16:
  2264. case 64:
  2265. case 128: {
  2266. auto *FPImm = I.getOperand(1).getFPImm();
  2267. auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
  2268. if (!LoadMI) {
  2269. LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
  2270. return false;
  2271. }
  2272. MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
  2273. I.eraseFromParent();
  2274. return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
  2275. }
  2276. }
  2277. // Either emit a FMOV, or emit a copy to emit a normal mov.
  2278. assert(DefSize == 32 &&
  2279. "Expected constant pool loads for all sizes other than 32!");
  2280. const Register DefGPRReg =
  2281. MRI.createVirtualRegister(&AArch64::GPR32RegClass);
  2282. MachineOperand &RegOp = I.getOperand(0);
  2283. RegOp.setReg(DefGPRReg);
  2284. MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
  2285. MIB.buildCopy({DefReg}, {DefGPRReg});
  2286. if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
  2287. LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
  2288. return false;
  2289. }
  2290. MachineOperand &ImmOp = I.getOperand(1);
  2291. // FIXME: Is going through int64_t always correct?
  2292. ImmOp.ChangeToImmediate(
  2293. ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
  2294. } else if (I.getOperand(1).isCImm()) {
  2295. uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
  2296. I.getOperand(1).ChangeToImmediate(Val);
  2297. } else if (I.getOperand(1).isImm()) {
  2298. uint64_t Val = I.getOperand(1).getImm();
  2299. I.getOperand(1).ChangeToImmediate(Val);
  2300. }
  2301. const unsigned MovOpc =
  2302. DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
  2303. I.setDesc(TII.get(MovOpc));
  2304. constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2305. return true;
  2306. }
  2307. case TargetOpcode::G_EXTRACT: {
  2308. Register DstReg = I.getOperand(0).getReg();
  2309. Register SrcReg = I.getOperand(1).getReg();
  2310. LLT SrcTy = MRI.getType(SrcReg);
  2311. LLT DstTy = MRI.getType(DstReg);
  2312. (void)DstTy;
  2313. unsigned SrcSize = SrcTy.getSizeInBits();
  2314. if (SrcTy.getSizeInBits() > 64) {
  2315. // This should be an extract of an s128, which is like a vector extract.
  2316. if (SrcTy.getSizeInBits() != 128)
  2317. return false;
  2318. // Only support extracting 64 bits from an s128 at the moment.
  2319. if (DstTy.getSizeInBits() != 64)
  2320. return false;
  2321. unsigned Offset = I.getOperand(2).getImm();
  2322. if (Offset % 64 != 0)
  2323. return false;
  2324. // Check we have the right regbank always.
  2325. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
  2326. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  2327. assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
  2328. if (SrcRB.getID() == AArch64::GPRRegBankID) {
  2329. MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
  2330. .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
  2331. I.eraseFromParent();
  2332. return true;
  2333. }
  2334. // Emit the same code as a vector extract.
  2335. // Offset must be a multiple of 64.
  2336. unsigned LaneIdx = Offset / 64;
  2337. MachineInstr *Extract = emitExtractVectorElt(
  2338. DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
  2339. if (!Extract)
  2340. return false;
  2341. I.eraseFromParent();
  2342. return true;
  2343. }
  2344. I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
  2345. MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
  2346. Ty.getSizeInBits() - 1);
  2347. if (SrcSize < 64) {
  2348. assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
  2349. "unexpected G_EXTRACT types");
  2350. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2351. }
  2352. DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
  2353. MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
  2354. MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
  2355. .addReg(DstReg, 0, AArch64::sub_32);
  2356. RBI.constrainGenericRegister(I.getOperand(0).getReg(),
  2357. AArch64::GPR32RegClass, MRI);
  2358. I.getOperand(0).setReg(DstReg);
  2359. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2360. }
  2361. case TargetOpcode::G_INSERT: {
  2362. LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
  2363. LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  2364. unsigned DstSize = DstTy.getSizeInBits();
  2365. // Larger inserts are vectors, same-size ones should be something else by
  2366. // now (split up or turned into COPYs).
  2367. if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
  2368. return false;
  2369. I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
  2370. unsigned LSB = I.getOperand(3).getImm();
  2371. unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
  2372. I.getOperand(3).setImm((DstSize - LSB) % DstSize);
  2373. MachineInstrBuilder(MF, I).addImm(Width - 1);
  2374. if (DstSize < 64) {
  2375. assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
  2376. "unexpected G_INSERT types");
  2377. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2378. }
  2379. Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
  2380. BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
  2381. TII.get(AArch64::SUBREG_TO_REG))
  2382. .addDef(SrcReg)
  2383. .addImm(0)
  2384. .addUse(I.getOperand(2).getReg())
  2385. .addImm(AArch64::sub_32);
  2386. RBI.constrainGenericRegister(I.getOperand(2).getReg(),
  2387. AArch64::GPR32RegClass, MRI);
  2388. I.getOperand(2).setReg(SrcReg);
  2389. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2390. }
  2391. case TargetOpcode::G_FRAME_INDEX: {
  2392. // allocas and G_FRAME_INDEX are only supported in addrspace(0).
  2393. if (Ty != LLT::pointer(0, 64)) {
  2394. LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
  2395. << ", expected: " << LLT::pointer(0, 64) << '\n');
  2396. return false;
  2397. }
  2398. I.setDesc(TII.get(AArch64::ADDXri));
  2399. // MOs for a #0 shifted immediate.
  2400. I.addOperand(MachineOperand::CreateImm(0));
  2401. I.addOperand(MachineOperand::CreateImm(0));
  2402. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2403. }
  2404. case TargetOpcode::G_GLOBAL_VALUE: {
  2405. auto GV = I.getOperand(1).getGlobal();
  2406. if (GV->isThreadLocal())
  2407. return selectTLSGlobalValue(I, MRI);
  2408. unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
  2409. if (OpFlags & AArch64II::MO_GOT) {
  2410. I.setDesc(TII.get(AArch64::LOADgot));
  2411. I.getOperand(1).setTargetFlags(OpFlags);
  2412. } else if (TM.getCodeModel() == CodeModel::Large) {
  2413. // Materialize the global using movz/movk instructions.
  2414. materializeLargeCMVal(I, GV, OpFlags);
  2415. I.eraseFromParent();
  2416. return true;
  2417. } else if (TM.getCodeModel() == CodeModel::Tiny) {
  2418. I.setDesc(TII.get(AArch64::ADR));
  2419. I.getOperand(1).setTargetFlags(OpFlags);
  2420. } else {
  2421. I.setDesc(TII.get(AArch64::MOVaddr));
  2422. I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
  2423. MachineInstrBuilder MIB(MF, I);
  2424. MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
  2425. OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  2426. }
  2427. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2428. }
  2429. case TargetOpcode::G_ZEXTLOAD:
  2430. case TargetOpcode::G_LOAD:
  2431. case TargetOpcode::G_STORE: {
  2432. GLoadStore &LdSt = cast<GLoadStore>(I);
  2433. bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
  2434. LLT PtrTy = MRI.getType(LdSt.getPointerReg());
  2435. if (PtrTy != LLT::pointer(0, 64)) {
  2436. LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
  2437. << ", expected: " << LLT::pointer(0, 64) << '\n');
  2438. return false;
  2439. }
  2440. uint64_t MemSizeInBytes = LdSt.getMemSize();
  2441. unsigned MemSizeInBits = LdSt.getMemSizeInBits();
  2442. AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
  2443. // Need special instructions for atomics that affect ordering.
  2444. if (Order != AtomicOrdering::NotAtomic &&
  2445. Order != AtomicOrdering::Unordered &&
  2446. Order != AtomicOrdering::Monotonic) {
  2447. assert(!isa<GZExtLoad>(LdSt));
  2448. if (MemSizeInBytes > 64)
  2449. return false;
  2450. if (isa<GLoad>(LdSt)) {
  2451. static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
  2452. AArch64::LDARW, AArch64::LDARX};
  2453. I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
  2454. } else {
  2455. static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
  2456. AArch64::STLRW, AArch64::STLRX};
  2457. Register ValReg = LdSt.getReg(0);
  2458. if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
  2459. // Emit a subreg copy of 32 bits.
  2460. Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
  2461. MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
  2462. .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
  2463. I.getOperand(0).setReg(NewVal);
  2464. }
  2465. I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
  2466. }
  2467. constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2468. return true;
  2469. }
  2470. #ifndef NDEBUG
  2471. const Register PtrReg = LdSt.getPointerReg();
  2472. const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
  2473. // Check that the pointer register is valid.
  2474. assert(PtrRB.getID() == AArch64::GPRRegBankID &&
  2475. "Load/Store pointer operand isn't a GPR");
  2476. assert(MRI.getType(PtrReg).isPointer() &&
  2477. "Load/Store pointer operand isn't a pointer");
  2478. #endif
  2479. const Register ValReg = LdSt.getReg(0);
  2480. const LLT ValTy = MRI.getType(ValReg);
  2481. const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
  2482. // The code below doesn't support truncating stores, so we need to split it
  2483. // again.
  2484. if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
  2485. unsigned SubReg;
  2486. LLT MemTy = LdSt.getMMO().getMemoryType();
  2487. auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
  2488. if (!getSubRegForClass(RC, TRI, SubReg))
  2489. return false;
  2490. // Generate a subreg copy.
  2491. auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
  2492. .addReg(ValReg, 0, SubReg)
  2493. .getReg(0);
  2494. RBI.constrainGenericRegister(Copy, *RC, MRI);
  2495. LdSt.getOperand(0).setReg(Copy);
  2496. } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
  2497. // If this is an any-extending load from the FPR bank, split it into a regular
  2498. // load + extend.
  2499. if (RB.getID() == AArch64::FPRRegBankID) {
  2500. unsigned SubReg;
  2501. LLT MemTy = LdSt.getMMO().getMemoryType();
  2502. auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
  2503. if (!getSubRegForClass(RC, TRI, SubReg))
  2504. return false;
  2505. Register OldDst = LdSt.getReg(0);
  2506. Register NewDst =
  2507. MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
  2508. LdSt.getOperand(0).setReg(NewDst);
  2509. MRI.setRegBank(NewDst, RB);
  2510. // Generate a SUBREG_TO_REG to extend it.
  2511. MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
  2512. MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
  2513. .addImm(0)
  2514. .addUse(NewDst)
  2515. .addImm(SubReg);
  2516. auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
  2517. RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
  2518. MIB.setInstr(LdSt);
  2519. }
  2520. }
  2521. // Helper lambda for partially selecting I. Either returns the original
  2522. // instruction with an updated opcode, or a new instruction.
  2523. auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
  2524. bool IsStore = isa<GStore>(I);
  2525. const unsigned NewOpc =
  2526. selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
  2527. if (NewOpc == I.getOpcode())
  2528. return nullptr;
  2529. // Check if we can fold anything into the addressing mode.
  2530. auto AddrModeFns =
  2531. selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
  2532. if (!AddrModeFns) {
  2533. // Can't fold anything. Use the original instruction.
  2534. I.setDesc(TII.get(NewOpc));
  2535. I.addOperand(MachineOperand::CreateImm(0));
  2536. return &I;
  2537. }
  2538. // Folded something. Create a new instruction and return it.
  2539. auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
  2540. Register CurValReg = I.getOperand(0).getReg();
  2541. IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
  2542. NewInst.cloneMemRefs(I);
  2543. for (auto &Fn : *AddrModeFns)
  2544. Fn(NewInst);
  2545. I.eraseFromParent();
  2546. return &*NewInst;
  2547. };
  2548. MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
  2549. if (!LoadStore)
  2550. return false;
  2551. // If we're storing a 0, use WZR/XZR.
  2552. if (Opcode == TargetOpcode::G_STORE) {
  2553. auto CVal = getIConstantVRegValWithLookThrough(
  2554. LoadStore->getOperand(0).getReg(), MRI);
  2555. if (CVal && CVal->Value == 0) {
  2556. switch (LoadStore->getOpcode()) {
  2557. case AArch64::STRWui:
  2558. case AArch64::STRHHui:
  2559. case AArch64::STRBBui:
  2560. LoadStore->getOperand(0).setReg(AArch64::WZR);
  2561. break;
  2562. case AArch64::STRXui:
  2563. LoadStore->getOperand(0).setReg(AArch64::XZR);
  2564. break;
  2565. }
  2566. }
  2567. }
  2568. if (IsZExtLoad) {
  2569. // The zextload from a smaller type to i32 should be handled by the
  2570. // importer.
  2571. if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
  2572. return false;
  2573. // If we have a ZEXTLOAD then change the load's type to be a narrower reg
  2574. // and zero_extend with SUBREG_TO_REG.
  2575. Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
  2576. Register DstReg = LoadStore->getOperand(0).getReg();
  2577. LoadStore->getOperand(0).setReg(LdReg);
  2578. MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
  2579. MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
  2580. .addImm(0)
  2581. .addUse(LdReg)
  2582. .addImm(AArch64::sub_32);
  2583. constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
  2584. return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
  2585. MRI);
  2586. }
  2587. return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
  2588. }
  2589. case TargetOpcode::G_SMULH:
  2590. case TargetOpcode::G_UMULH: {
  2591. // Reject the various things we don't support yet.
  2592. if (unsupportedBinOp(I, RBI, MRI, TRI))
  2593. return false;
  2594. const Register DefReg = I.getOperand(0).getReg();
  2595. const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
  2596. if (RB.getID() != AArch64::GPRRegBankID) {
  2597. LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
  2598. return false;
  2599. }
  2600. if (Ty != LLT::scalar(64)) {
  2601. LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
  2602. << ", expected: " << LLT::scalar(64) << '\n');
  2603. return false;
  2604. }
  2605. unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
  2606. : AArch64::UMULHrr;
  2607. I.setDesc(TII.get(NewOpc));
  2608. // Now that we selected an opcode, we need to constrain the register
  2609. // operands to use appropriate classes.
  2610. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2611. }
  2612. case TargetOpcode::G_LSHR:
  2613. case TargetOpcode::G_ASHR:
  2614. if (MRI.getType(I.getOperand(0).getReg()).isVector())
  2615. return selectVectorAshrLshr(I, MRI);
  2616. LLVM_FALLTHROUGH;
  2617. case TargetOpcode::G_SHL:
  2618. if (Opcode == TargetOpcode::G_SHL &&
  2619. MRI.getType(I.getOperand(0).getReg()).isVector())
  2620. return selectVectorSHL(I, MRI);
  2621. // These shifts were legalized to have 64 bit shift amounts because we
  2622. // want to take advantage of the selection patterns that assume the
  2623. // immediates are s64s, however, selectBinaryOp will assume both operands
  2624. // will have the same bit size.
  2625. {
  2626. Register SrcReg = I.getOperand(1).getReg();
  2627. Register ShiftReg = I.getOperand(2).getReg();
  2628. const LLT ShiftTy = MRI.getType(ShiftReg);
  2629. const LLT SrcTy = MRI.getType(SrcReg);
  2630. if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
  2631. ShiftTy.getSizeInBits() == 64) {
  2632. assert(!ShiftTy.isVector() && "unexpected vector shift ty");
  2633. assert(MRI.getVRegDef(ShiftReg) &&
  2634. "could not find a vreg definition for shift amount");
  2635. // Insert a subregister copy to implement a 64->32 trunc
  2636. auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
  2637. .addReg(ShiftReg, 0, AArch64::sub_32);
  2638. MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
  2639. I.getOperand(2).setReg(Trunc.getReg(0));
  2640. }
  2641. }
  2642. LLVM_FALLTHROUGH;
  2643. case TargetOpcode::G_FADD:
  2644. case TargetOpcode::G_FSUB:
  2645. case TargetOpcode::G_FMUL:
  2646. case TargetOpcode::G_FDIV:
  2647. case TargetOpcode::G_OR: {
  2648. // Reject the various things we don't support yet.
  2649. if (unsupportedBinOp(I, RBI, MRI, TRI))
  2650. return false;
  2651. const unsigned OpSize = Ty.getSizeInBits();
  2652. const Register DefReg = I.getOperand(0).getReg();
  2653. const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
  2654. const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
  2655. if (NewOpc == I.getOpcode())
  2656. return false;
  2657. I.setDesc(TII.get(NewOpc));
  2658. // FIXME: Should the type be always reset in setDesc?
  2659. // Now that we selected an opcode, we need to constrain the register
  2660. // operands to use appropriate classes.
  2661. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2662. }
  2663. case TargetOpcode::G_PTR_ADD: {
  2664. emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
  2665. I.eraseFromParent();
  2666. return true;
  2667. }
  2668. case TargetOpcode::G_SADDO:
  2669. case TargetOpcode::G_UADDO:
  2670. case TargetOpcode::G_SSUBO:
  2671. case TargetOpcode::G_USUBO: {
  2672. // Emit the operation and get the correct condition code.
  2673. auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
  2674. I.getOperand(2), I.getOperand(3), MIB);
  2675. // Now, put the overflow result in the register given by the first operand
  2676. // to the overflow op. CSINC increments the result when the predicate is
  2677. // false, so to get the increment when it's true, we need to use the
  2678. // inverse. In this case, we want to increment when carry is set.
  2679. Register ZReg = AArch64::WZR;
  2680. emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
  2681. getInvertedCondCode(OpAndCC.second), MIB);
  2682. I.eraseFromParent();
  2683. return true;
  2684. }
  2685. case TargetOpcode::G_PTRMASK: {
  2686. Register MaskReg = I.getOperand(2).getReg();
  2687. Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
  2688. // TODO: Implement arbitrary cases
  2689. if (!MaskVal || !isShiftedMask_64(*MaskVal))
  2690. return false;
  2691. uint64_t Mask = *MaskVal;
  2692. I.setDesc(TII.get(AArch64::ANDXri));
  2693. I.getOperand(2).ChangeToImmediate(
  2694. AArch64_AM::encodeLogicalImmediate(Mask, 64));
  2695. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2696. }
  2697. case TargetOpcode::G_PTRTOINT:
  2698. case TargetOpcode::G_TRUNC: {
  2699. const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  2700. const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
  2701. const Register DstReg = I.getOperand(0).getReg();
  2702. const Register SrcReg = I.getOperand(1).getReg();
  2703. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  2704. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
  2705. if (DstRB.getID() != SrcRB.getID()) {
  2706. LLVM_DEBUG(
  2707. dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
  2708. return false;
  2709. }
  2710. if (DstRB.getID() == AArch64::GPRRegBankID) {
  2711. const TargetRegisterClass *DstRC =
  2712. getRegClassForTypeOnBank(DstTy, DstRB, RBI);
  2713. if (!DstRC)
  2714. return false;
  2715. const TargetRegisterClass *SrcRC =
  2716. getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
  2717. if (!SrcRC)
  2718. return false;
  2719. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  2720. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  2721. LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
  2722. return false;
  2723. }
  2724. if (DstRC == SrcRC) {
  2725. // Nothing to be done
  2726. } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
  2727. SrcTy == LLT::scalar(64)) {
  2728. llvm_unreachable("TableGen can import this case");
  2729. return false;
  2730. } else if (DstRC == &AArch64::GPR32RegClass &&
  2731. SrcRC == &AArch64::GPR64RegClass) {
  2732. I.getOperand(1).setSubReg(AArch64::sub_32);
  2733. } else {
  2734. LLVM_DEBUG(
  2735. dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
  2736. return false;
  2737. }
  2738. I.setDesc(TII.get(TargetOpcode::COPY));
  2739. return true;
  2740. } else if (DstRB.getID() == AArch64::FPRRegBankID) {
  2741. if (DstTy == LLT::fixed_vector(4, 16) &&
  2742. SrcTy == LLT::fixed_vector(4, 32)) {
  2743. I.setDesc(TII.get(AArch64::XTNv4i16));
  2744. constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2745. return true;
  2746. }
  2747. if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
  2748. MachineInstr *Extract = emitExtractVectorElt(
  2749. DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
  2750. if (!Extract)
  2751. return false;
  2752. I.eraseFromParent();
  2753. return true;
  2754. }
  2755. // We might have a vector G_PTRTOINT, in which case just emit a COPY.
  2756. if (Opcode == TargetOpcode::G_PTRTOINT) {
  2757. assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
  2758. I.setDesc(TII.get(TargetOpcode::COPY));
  2759. return selectCopy(I, TII, MRI, TRI, RBI);
  2760. }
  2761. }
  2762. return false;
  2763. }
  2764. case TargetOpcode::G_ANYEXT: {
  2765. if (selectUSMovFromExtend(I, MRI))
  2766. return true;
  2767. const Register DstReg = I.getOperand(0).getReg();
  2768. const Register SrcReg = I.getOperand(1).getReg();
  2769. const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
  2770. if (RBDst.getID() != AArch64::GPRRegBankID) {
  2771. LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
  2772. << ", expected: GPR\n");
  2773. return false;
  2774. }
  2775. const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
  2776. if (RBSrc.getID() != AArch64::GPRRegBankID) {
  2777. LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
  2778. << ", expected: GPR\n");
  2779. return false;
  2780. }
  2781. const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
  2782. if (DstSize == 0) {
  2783. LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
  2784. return false;
  2785. }
  2786. if (DstSize != 64 && DstSize > 32) {
  2787. LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
  2788. << ", expected: 32 or 64\n");
  2789. return false;
  2790. }
  2791. // At this point G_ANYEXT is just like a plain COPY, but we need
  2792. // to explicitly form the 64-bit value if any.
  2793. if (DstSize > 32) {
  2794. Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
  2795. BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
  2796. .addDef(ExtSrc)
  2797. .addImm(0)
  2798. .addUse(SrcReg)
  2799. .addImm(AArch64::sub_32);
  2800. I.getOperand(1).setReg(ExtSrc);
  2801. }
  2802. return selectCopy(I, TII, MRI, TRI, RBI);
  2803. }
  2804. case TargetOpcode::G_ZEXT:
  2805. case TargetOpcode::G_SEXT_INREG:
  2806. case TargetOpcode::G_SEXT: {
  2807. if (selectUSMovFromExtend(I, MRI))
  2808. return true;
  2809. unsigned Opcode = I.getOpcode();
  2810. const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
  2811. const Register DefReg = I.getOperand(0).getReg();
  2812. Register SrcReg = I.getOperand(1).getReg();
  2813. const LLT DstTy = MRI.getType(DefReg);
  2814. const LLT SrcTy = MRI.getType(SrcReg);
  2815. unsigned DstSize = DstTy.getSizeInBits();
  2816. unsigned SrcSize = SrcTy.getSizeInBits();
  2817. // SEXT_INREG has the same src reg size as dst, the size of the value to be
  2818. // extended is encoded in the imm.
  2819. if (Opcode == TargetOpcode::G_SEXT_INREG)
  2820. SrcSize = I.getOperand(2).getImm();
  2821. if (DstTy.isVector())
  2822. return false; // Should be handled by imported patterns.
  2823. assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
  2824. AArch64::GPRRegBankID &&
  2825. "Unexpected ext regbank");
  2826. MachineInstr *ExtI;
  2827. // First check if we're extending the result of a load which has a dest type
  2828. // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
  2829. // GPR register on AArch64 and all loads which are smaller automatically
  2830. // zero-extend the upper bits. E.g.
  2831. // %v(s8) = G_LOAD %p, :: (load 1)
  2832. // %v2(s32) = G_ZEXT %v(s8)
  2833. if (!IsSigned) {
  2834. auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
  2835. bool IsGPR =
  2836. RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
  2837. if (LoadMI && IsGPR) {
  2838. const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
  2839. unsigned BytesLoaded = MemOp->getSize();
  2840. if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
  2841. return selectCopy(I, TII, MRI, TRI, RBI);
  2842. }
  2843. // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
  2844. // + SUBREG_TO_REG.
  2845. //
  2846. // If we are zero extending from 32 bits to 64 bits, it's possible that
  2847. // the instruction implicitly does the zero extend for us. In that case,
  2848. // we only need the SUBREG_TO_REG.
  2849. if (IsGPR && SrcSize == 32 && DstSize == 64) {
  2850. // Unlike with the G_LOAD case, we don't want to look through copies
  2851. // here. (See isDef32.)
  2852. MachineInstr *Def = MRI.getVRegDef(SrcReg);
  2853. Register SubregToRegSrc = SrcReg;
  2854. // Does the instruction implicitly zero extend?
  2855. if (!Def || !isDef32(*Def)) {
  2856. // No. Zero out using an OR.
  2857. Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
  2858. const Register ZReg = AArch64::WZR;
  2859. MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
  2860. SubregToRegSrc = OrDst;
  2861. }
  2862. MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
  2863. .addImm(0)
  2864. .addUse(SubregToRegSrc)
  2865. .addImm(AArch64::sub_32);
  2866. if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
  2867. MRI)) {
  2868. LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
  2869. return false;
  2870. }
  2871. if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
  2872. MRI)) {
  2873. LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
  2874. return false;
  2875. }
  2876. I.eraseFromParent();
  2877. return true;
  2878. }
  2879. }
  2880. if (DstSize == 64) {
  2881. if (Opcode != TargetOpcode::G_SEXT_INREG) {
  2882. // FIXME: Can we avoid manually doing this?
  2883. if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
  2884. MRI)) {
  2885. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
  2886. << " operand\n");
  2887. return false;
  2888. }
  2889. SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
  2890. {&AArch64::GPR64RegClass}, {})
  2891. .addImm(0)
  2892. .addUse(SrcReg)
  2893. .addImm(AArch64::sub_32)
  2894. .getReg(0);
  2895. }
  2896. ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
  2897. {DefReg}, {SrcReg})
  2898. .addImm(0)
  2899. .addImm(SrcSize - 1);
  2900. } else if (DstSize <= 32) {
  2901. ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
  2902. {DefReg}, {SrcReg})
  2903. .addImm(0)
  2904. .addImm(SrcSize - 1);
  2905. } else {
  2906. return false;
  2907. }
  2908. constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
  2909. I.eraseFromParent();
  2910. return true;
  2911. }
  2912. case TargetOpcode::G_SITOFP:
  2913. case TargetOpcode::G_UITOFP:
  2914. case TargetOpcode::G_FPTOSI:
  2915. case TargetOpcode::G_FPTOUI: {
  2916. const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
  2917. SrcTy = MRI.getType(I.getOperand(1).getReg());
  2918. const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
  2919. if (NewOpc == Opcode)
  2920. return false;
  2921. I.setDesc(TII.get(NewOpc));
  2922. constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  2923. return true;
  2924. }
  2925. case TargetOpcode::G_FREEZE:
  2926. return selectCopy(I, TII, MRI, TRI, RBI);
  2927. case TargetOpcode::G_INTTOPTR:
  2928. // The importer is currently unable to import pointer types since they
  2929. // didn't exist in SelectionDAG.
  2930. return selectCopy(I, TII, MRI, TRI, RBI);
  2931. case TargetOpcode::G_BITCAST:
  2932. // Imported SelectionDAG rules can handle every bitcast except those that
  2933. // bitcast from a type to the same type. Ideally, these shouldn't occur
  2934. // but we might not run an optimizer that deletes them. The other exception
  2935. // is bitcasts involving pointer types, as SelectionDAG has no knowledge
  2936. // of them.
  2937. return selectCopy(I, TII, MRI, TRI, RBI);
  2938. case TargetOpcode::G_SELECT: {
  2939. if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
  2940. LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
  2941. << ", expected: " << LLT::scalar(1) << '\n');
  2942. return false;
  2943. }
  2944. const Register CondReg = I.getOperand(1).getReg();
  2945. const Register TReg = I.getOperand(2).getReg();
  2946. const Register FReg = I.getOperand(3).getReg();
  2947. if (tryOptSelect(I))
  2948. return true;
  2949. // Make sure to use an unused vreg instead of wzr, so that the peephole
  2950. // optimizations will be able to optimize these.
  2951. Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
  2952. auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
  2953. .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
  2954. constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
  2955. if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
  2956. return false;
  2957. I.eraseFromParent();
  2958. return true;
  2959. }
  2960. case TargetOpcode::G_ICMP: {
  2961. if (Ty.isVector())
  2962. return selectVectorICmp(I, MRI);
  2963. if (Ty != LLT::scalar(32)) {
  2964. LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
  2965. << ", expected: " << LLT::scalar(32) << '\n');
  2966. return false;
  2967. }
  2968. auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
  2969. const AArch64CC::CondCode InvCC =
  2970. changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
  2971. emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
  2972. emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
  2973. /*Src2=*/AArch64::WZR, InvCC, MIB);
  2974. I.eraseFromParent();
  2975. return true;
  2976. }
  2977. case TargetOpcode::G_FCMP: {
  2978. CmpInst::Predicate Pred =
  2979. static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
  2980. if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
  2981. Pred) ||
  2982. !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
  2983. return false;
  2984. I.eraseFromParent();
  2985. return true;
  2986. }
  2987. case TargetOpcode::G_VASTART:
  2988. return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
  2989. : selectVaStartAAPCS(I, MF, MRI);
  2990. case TargetOpcode::G_INTRINSIC:
  2991. return selectIntrinsic(I, MRI);
  2992. case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
  2993. return selectIntrinsicWithSideEffects(I, MRI);
  2994. case TargetOpcode::G_IMPLICIT_DEF: {
  2995. I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
  2996. const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  2997. const Register DstReg = I.getOperand(0).getReg();
  2998. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  2999. const TargetRegisterClass *DstRC =
  3000. getRegClassForTypeOnBank(DstTy, DstRB, RBI);
  3001. RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
  3002. return true;
  3003. }
  3004. case TargetOpcode::G_BLOCK_ADDR: {
  3005. if (TM.getCodeModel() == CodeModel::Large) {
  3006. materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
  3007. I.eraseFromParent();
  3008. return true;
  3009. } else {
  3010. I.setDesc(TII.get(AArch64::MOVaddrBA));
  3011. auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
  3012. I.getOperand(0).getReg())
  3013. .addBlockAddress(I.getOperand(1).getBlockAddress(),
  3014. /* Offset */ 0, AArch64II::MO_PAGE)
  3015. .addBlockAddress(
  3016. I.getOperand(1).getBlockAddress(), /* Offset */ 0,
  3017. AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
  3018. I.eraseFromParent();
  3019. return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
  3020. }
  3021. }
  3022. case AArch64::G_DUP: {
  3023. // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
  3024. // imported patterns. Do it manually here. Avoiding generating s16 gpr is
  3025. // difficult because at RBS we may end up pessimizing the fpr case if we
  3026. // decided to add an anyextend to fix this. Manual selection is the most
  3027. // robust solution for now.
  3028. if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
  3029. AArch64::GPRRegBankID)
  3030. return false; // We expect the fpr regbank case to be imported.
  3031. LLT VecTy = MRI.getType(I.getOperand(0).getReg());
  3032. if (VecTy == LLT::fixed_vector(8, 8))
  3033. I.setDesc(TII.get(AArch64::DUPv8i8gpr));
  3034. else if (VecTy == LLT::fixed_vector(16, 8))
  3035. I.setDesc(TII.get(AArch64::DUPv16i8gpr));
  3036. else if (VecTy == LLT::fixed_vector(4, 16))
  3037. I.setDesc(TII.get(AArch64::DUPv4i16gpr));
  3038. else if (VecTy == LLT::fixed_vector(8, 16))
  3039. I.setDesc(TII.get(AArch64::DUPv8i16gpr));
  3040. else
  3041. return false;
  3042. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  3043. }
  3044. case TargetOpcode::G_INTRINSIC_TRUNC:
  3045. return selectIntrinsicTrunc(I, MRI);
  3046. case TargetOpcode::G_INTRINSIC_ROUND:
  3047. return selectIntrinsicRound(I, MRI);
  3048. case TargetOpcode::G_BUILD_VECTOR:
  3049. return selectBuildVector(I, MRI);
  3050. case TargetOpcode::G_MERGE_VALUES:
  3051. return selectMergeValues(I, MRI);
  3052. case TargetOpcode::G_UNMERGE_VALUES:
  3053. return selectUnmergeValues(I, MRI);
  3054. case TargetOpcode::G_SHUFFLE_VECTOR:
  3055. return selectShuffleVector(I, MRI);
  3056. case TargetOpcode::G_EXTRACT_VECTOR_ELT:
  3057. return selectExtractElt(I, MRI);
  3058. case TargetOpcode::G_INSERT_VECTOR_ELT:
  3059. return selectInsertElt(I, MRI);
  3060. case TargetOpcode::G_CONCAT_VECTORS:
  3061. return selectConcatVectors(I, MRI);
  3062. case TargetOpcode::G_JUMP_TABLE:
  3063. return selectJumpTable(I, MRI);
  3064. case TargetOpcode::G_VECREDUCE_FADD:
  3065. case TargetOpcode::G_VECREDUCE_ADD:
  3066. return selectReduction(I, MRI);
  3067. case TargetOpcode::G_MEMCPY:
  3068. case TargetOpcode::G_MEMCPY_INLINE:
  3069. case TargetOpcode::G_MEMMOVE:
  3070. case TargetOpcode::G_MEMSET:
  3071. assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
  3072. return selectMOPS(I, MRI);
  3073. }
  3074. return false;
  3075. }
  3076. bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
  3077. MachineRegisterInfo &MRI) {
  3078. Register VecReg = I.getOperand(1).getReg();
  3079. LLT VecTy = MRI.getType(VecReg);
  3080. if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
  3081. // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
  3082. // a subregister copy afterwards.
  3083. if (VecTy == LLT::fixed_vector(2, 32)) {
  3084. Register DstReg = I.getOperand(0).getReg();
  3085. auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
  3086. {VecReg, VecReg});
  3087. auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
  3088. .addReg(AddP.getReg(0), 0, AArch64::ssub)
  3089. .getReg(0);
  3090. RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
  3091. I.eraseFromParent();
  3092. return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
  3093. }
  3094. unsigned Opc = 0;
  3095. if (VecTy == LLT::fixed_vector(16, 8))
  3096. Opc = AArch64::ADDVv16i8v;
  3097. else if (VecTy == LLT::fixed_vector(8, 16))
  3098. Opc = AArch64::ADDVv8i16v;
  3099. else if (VecTy == LLT::fixed_vector(4, 32))
  3100. Opc = AArch64::ADDVv4i32v;
  3101. else if (VecTy == LLT::fixed_vector(2, 64))
  3102. Opc = AArch64::ADDPv2i64p;
  3103. else {
  3104. LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
  3105. return false;
  3106. }
  3107. I.setDesc(TII.get(Opc));
  3108. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  3109. }
  3110. if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
  3111. unsigned Opc = 0;
  3112. if (VecTy == LLT::fixed_vector(2, 32))
  3113. Opc = AArch64::FADDPv2i32p;
  3114. else if (VecTy == LLT::fixed_vector(2, 64))
  3115. Opc = AArch64::FADDPv2i64p;
  3116. else {
  3117. LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
  3118. return false;
  3119. }
  3120. I.setDesc(TII.get(Opc));
  3121. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  3122. }
  3123. return false;
  3124. }
  3125. bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
  3126. MachineRegisterInfo &MRI) {
  3127. unsigned Mopcode;
  3128. switch (GI.getOpcode()) {
  3129. case TargetOpcode::G_MEMCPY:
  3130. case TargetOpcode::G_MEMCPY_INLINE:
  3131. Mopcode = AArch64::MOPSMemoryCopyPseudo;
  3132. break;
  3133. case TargetOpcode::G_MEMMOVE:
  3134. Mopcode = AArch64::MOPSMemoryMovePseudo;
  3135. break;
  3136. case TargetOpcode::G_MEMSET:
  3137. // For tagged memset see llvm.aarch64.mops.memset.tag
  3138. Mopcode = AArch64::MOPSMemorySetPseudo;
  3139. break;
  3140. }
  3141. auto &DstPtr = GI.getOperand(0);
  3142. auto &SrcOrVal = GI.getOperand(1);
  3143. auto &Size = GI.getOperand(2);
  3144. // Create copies of the registers that can be clobbered.
  3145. const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
  3146. const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
  3147. const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
  3148. const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
  3149. const auto &SrcValRegClass =
  3150. IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
  3151. // Constrain to specific registers
  3152. RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
  3153. RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
  3154. RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
  3155. MIB.buildCopy(DstPtrCopy, DstPtr);
  3156. MIB.buildCopy(SrcValCopy, SrcOrVal);
  3157. MIB.buildCopy(SizeCopy, Size);
  3158. // New instruction uses the copied registers because it must update them.
  3159. // The defs are not used since they don't exist in G_MEM*. They are still
  3160. // tied.
  3161. // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
  3162. Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
  3163. Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
  3164. if (IsSet) {
  3165. MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
  3166. {DstPtrCopy, SizeCopy, SrcValCopy});
  3167. } else {
  3168. Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
  3169. MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
  3170. {DstPtrCopy, SrcValCopy, SizeCopy});
  3171. }
  3172. GI.eraseFromParent();
  3173. return true;
  3174. }
  3175. bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
  3176. MachineRegisterInfo &MRI) {
  3177. assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
  3178. Register JTAddr = I.getOperand(0).getReg();
  3179. unsigned JTI = I.getOperand(1).getIndex();
  3180. Register Index = I.getOperand(2).getReg();
  3181. Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
  3182. Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
  3183. MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
  3184. auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
  3185. {TargetReg, ScratchReg}, {JTAddr, Index})
  3186. .addJumpTableIndex(JTI);
  3187. // Build the indirect branch.
  3188. MIB.buildInstr(AArch64::BR, {}, {TargetReg});
  3189. I.eraseFromParent();
  3190. return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
  3191. }
  3192. bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
  3193. MachineRegisterInfo &MRI) {
  3194. assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
  3195. assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
  3196. Register DstReg = I.getOperand(0).getReg();
  3197. unsigned JTI = I.getOperand(1).getIndex();
  3198. // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
  3199. auto MovMI =
  3200. MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
  3201. .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
  3202. .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
  3203. I.eraseFromParent();
  3204. return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
  3205. }
  3206. bool AArch64InstructionSelector::selectTLSGlobalValue(
  3207. MachineInstr &I, MachineRegisterInfo &MRI) {
  3208. if (!STI.isTargetMachO())
  3209. return false;
  3210. MachineFunction &MF = *I.getParent()->getParent();
  3211. MF.getFrameInfo().setAdjustsStack(true);
  3212. const auto &GlobalOp = I.getOperand(1);
  3213. assert(GlobalOp.getOffset() == 0 &&
  3214. "Shouldn't have an offset on TLS globals!");
  3215. const GlobalValue &GV = *GlobalOp.getGlobal();
  3216. auto LoadGOT =
  3217. MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
  3218. .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
  3219. auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
  3220. {LoadGOT.getReg(0)})
  3221. .addImm(0);
  3222. MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
  3223. // TLS calls preserve all registers except those that absolutely must be
  3224. // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
  3225. // silly).
  3226. MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
  3227. .addUse(AArch64::X0, RegState::Implicit)
  3228. .addDef(AArch64::X0, RegState::Implicit)
  3229. .addRegMask(TRI.getTLSCallPreservedMask());
  3230. MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
  3231. RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
  3232. MRI);
  3233. I.eraseFromParent();
  3234. return true;
  3235. }
  3236. bool AArch64InstructionSelector::selectIntrinsicTrunc(
  3237. MachineInstr &I, MachineRegisterInfo &MRI) const {
  3238. const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
  3239. // Select the correct opcode.
  3240. unsigned Opc = 0;
  3241. if (!SrcTy.isVector()) {
  3242. switch (SrcTy.getSizeInBits()) {
  3243. default:
  3244. case 16:
  3245. Opc = AArch64::FRINTZHr;
  3246. break;
  3247. case 32:
  3248. Opc = AArch64::FRINTZSr;
  3249. break;
  3250. case 64:
  3251. Opc = AArch64::FRINTZDr;
  3252. break;
  3253. }
  3254. } else {
  3255. unsigned NumElts = SrcTy.getNumElements();
  3256. switch (SrcTy.getElementType().getSizeInBits()) {
  3257. default:
  3258. break;
  3259. case 16:
  3260. if (NumElts == 4)
  3261. Opc = AArch64::FRINTZv4f16;
  3262. else if (NumElts == 8)
  3263. Opc = AArch64::FRINTZv8f16;
  3264. break;
  3265. case 32:
  3266. if (NumElts == 2)
  3267. Opc = AArch64::FRINTZv2f32;
  3268. else if (NumElts == 4)
  3269. Opc = AArch64::FRINTZv4f32;
  3270. break;
  3271. case 64:
  3272. if (NumElts == 2)
  3273. Opc = AArch64::FRINTZv2f64;
  3274. break;
  3275. }
  3276. }
  3277. if (!Opc) {
  3278. // Didn't get an opcode above, bail.
  3279. LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
  3280. return false;
  3281. }
  3282. // Legalization would have set us up perfectly for this; we just need to
  3283. // set the opcode and move on.
  3284. I.setDesc(TII.get(Opc));
  3285. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  3286. }
  3287. bool AArch64InstructionSelector::selectIntrinsicRound(
  3288. MachineInstr &I, MachineRegisterInfo &MRI) const {
  3289. const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
  3290. // Select the correct opcode.
  3291. unsigned Opc = 0;
  3292. if (!SrcTy.isVector()) {
  3293. switch (SrcTy.getSizeInBits()) {
  3294. default:
  3295. case 16:
  3296. Opc = AArch64::FRINTAHr;
  3297. break;
  3298. case 32:
  3299. Opc = AArch64::FRINTASr;
  3300. break;
  3301. case 64:
  3302. Opc = AArch64::FRINTADr;
  3303. break;
  3304. }
  3305. } else {
  3306. unsigned NumElts = SrcTy.getNumElements();
  3307. switch (SrcTy.getElementType().getSizeInBits()) {
  3308. default:
  3309. break;
  3310. case 16:
  3311. if (NumElts == 4)
  3312. Opc = AArch64::FRINTAv4f16;
  3313. else if (NumElts == 8)
  3314. Opc = AArch64::FRINTAv8f16;
  3315. break;
  3316. case 32:
  3317. if (NumElts == 2)
  3318. Opc = AArch64::FRINTAv2f32;
  3319. else if (NumElts == 4)
  3320. Opc = AArch64::FRINTAv4f32;
  3321. break;
  3322. case 64:
  3323. if (NumElts == 2)
  3324. Opc = AArch64::FRINTAv2f64;
  3325. break;
  3326. }
  3327. }
  3328. if (!Opc) {
  3329. // Didn't get an opcode above, bail.
  3330. LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
  3331. return false;
  3332. }
  3333. // Legalization would have set us up perfectly for this; we just need to
  3334. // set the opcode and move on.
  3335. I.setDesc(TII.get(Opc));
  3336. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  3337. }
  3338. bool AArch64InstructionSelector::selectVectorICmp(
  3339. MachineInstr &I, MachineRegisterInfo &MRI) {
  3340. Register DstReg = I.getOperand(0).getReg();
  3341. LLT DstTy = MRI.getType(DstReg);
  3342. Register SrcReg = I.getOperand(2).getReg();
  3343. Register Src2Reg = I.getOperand(3).getReg();
  3344. LLT SrcTy = MRI.getType(SrcReg);
  3345. unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
  3346. unsigned NumElts = DstTy.getNumElements();
  3347. // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
  3348. // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
  3349. // Third index is cc opcode:
  3350. // 0 == eq
  3351. // 1 == ugt
  3352. // 2 == uge
  3353. // 3 == ult
  3354. // 4 == ule
  3355. // 5 == sgt
  3356. // 6 == sge
  3357. // 7 == slt
  3358. // 8 == sle
  3359. // ne is done by negating 'eq' result.
  3360. // This table below assumes that for some comparisons the operands will be
  3361. // commuted.
  3362. // ult op == commute + ugt op
  3363. // ule op == commute + uge op
  3364. // slt op == commute + sgt op
  3365. // sle op == commute + sge op
  3366. unsigned PredIdx = 0;
  3367. bool SwapOperands = false;
  3368. CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
  3369. switch (Pred) {
  3370. case CmpInst::ICMP_NE:
  3371. case CmpInst::ICMP_EQ:
  3372. PredIdx = 0;
  3373. break;
  3374. case CmpInst::ICMP_UGT:
  3375. PredIdx = 1;
  3376. break;
  3377. case CmpInst::ICMP_UGE:
  3378. PredIdx = 2;
  3379. break;
  3380. case CmpInst::ICMP_ULT:
  3381. PredIdx = 3;
  3382. SwapOperands = true;
  3383. break;
  3384. case CmpInst::ICMP_ULE:
  3385. PredIdx = 4;
  3386. SwapOperands = true;
  3387. break;
  3388. case CmpInst::ICMP_SGT:
  3389. PredIdx = 5;
  3390. break;
  3391. case CmpInst::ICMP_SGE:
  3392. PredIdx = 6;
  3393. break;
  3394. case CmpInst::ICMP_SLT:
  3395. PredIdx = 7;
  3396. SwapOperands = true;
  3397. break;
  3398. case CmpInst::ICMP_SLE:
  3399. PredIdx = 8;
  3400. SwapOperands = true;
  3401. break;
  3402. default:
  3403. llvm_unreachable("Unhandled icmp predicate");
  3404. return false;
  3405. }
  3406. // This table obviously should be tablegen'd when we have our GISel native
  3407. // tablegen selector.
  3408. static const unsigned OpcTable[4][4][9] = {
  3409. {
  3410. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3411. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3412. 0 /* invalid */},
  3413. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3414. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3415. 0 /* invalid */},
  3416. {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
  3417. AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
  3418. AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
  3419. {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
  3420. AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
  3421. AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
  3422. },
  3423. {
  3424. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3425. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3426. 0 /* invalid */},
  3427. {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
  3428. AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
  3429. AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
  3430. {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
  3431. AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
  3432. AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
  3433. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3434. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3435. 0 /* invalid */}
  3436. },
  3437. {
  3438. {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
  3439. AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
  3440. AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
  3441. {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
  3442. AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
  3443. AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
  3444. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3445. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3446. 0 /* invalid */},
  3447. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3448. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3449. 0 /* invalid */}
  3450. },
  3451. {
  3452. {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
  3453. AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
  3454. AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
  3455. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3456. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3457. 0 /* invalid */},
  3458. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3459. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3460. 0 /* invalid */},
  3461. {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3462. 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
  3463. 0 /* invalid */}
  3464. },
  3465. };
  3466. unsigned EltIdx = Log2_32(SrcEltSize / 8);
  3467. unsigned NumEltsIdx = Log2_32(NumElts / 2);
  3468. unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
  3469. if (!Opc) {
  3470. LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
  3471. return false;
  3472. }
  3473. const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
  3474. const TargetRegisterClass *SrcRC =
  3475. getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
  3476. if (!SrcRC) {
  3477. LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
  3478. return false;
  3479. }
  3480. unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
  3481. if (SrcTy.getSizeInBits() == 128)
  3482. NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
  3483. if (SwapOperands)
  3484. std::swap(SrcReg, Src2Reg);
  3485. auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
  3486. constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
  3487. // Invert if we had a 'ne' cc.
  3488. if (NotOpc) {
  3489. Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
  3490. constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
  3491. } else {
  3492. MIB.buildCopy(DstReg, Cmp.getReg(0));
  3493. }
  3494. RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
  3495. I.eraseFromParent();
  3496. return true;
  3497. }
  3498. MachineInstr *AArch64InstructionSelector::emitScalarToVector(
  3499. unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
  3500. MachineIRBuilder &MIRBuilder) const {
  3501. auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
  3502. auto BuildFn = [&](unsigned SubregIndex) {
  3503. auto Ins =
  3504. MIRBuilder
  3505. .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
  3506. .addImm(SubregIndex);
  3507. constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
  3508. constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
  3509. return &*Ins;
  3510. };
  3511. switch (EltSize) {
  3512. case 16:
  3513. return BuildFn(AArch64::hsub);
  3514. case 32:
  3515. return BuildFn(AArch64::ssub);
  3516. case 64:
  3517. return BuildFn(AArch64::dsub);
  3518. default:
  3519. return nullptr;
  3520. }
  3521. }
  3522. bool AArch64InstructionSelector::selectMergeValues(
  3523. MachineInstr &I, MachineRegisterInfo &MRI) {
  3524. assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
  3525. const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  3526. const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
  3527. assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
  3528. const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
  3529. if (I.getNumOperands() != 3)
  3530. return false;
  3531. // Merging 2 s64s into an s128.
  3532. if (DstTy == LLT::scalar(128)) {
  3533. if (SrcTy.getSizeInBits() != 64)
  3534. return false;
  3535. Register DstReg = I.getOperand(0).getReg();
  3536. Register Src1Reg = I.getOperand(1).getReg();
  3537. Register Src2Reg = I.getOperand(2).getReg();
  3538. auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
  3539. MachineInstr *InsMI =
  3540. emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
  3541. if (!InsMI)
  3542. return false;
  3543. MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
  3544. Src2Reg, /* LaneIdx */ 1, RB, MIB);
  3545. if (!Ins2MI)
  3546. return false;
  3547. constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
  3548. constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
  3549. I.eraseFromParent();
  3550. return true;
  3551. }
  3552. if (RB.getID() != AArch64::GPRRegBankID)
  3553. return false;
  3554. if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
  3555. return false;
  3556. auto *DstRC = &AArch64::GPR64RegClass;
  3557. Register SubToRegDef = MRI.createVirtualRegister(DstRC);
  3558. MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  3559. TII.get(TargetOpcode::SUBREG_TO_REG))
  3560. .addDef(SubToRegDef)
  3561. .addImm(0)
  3562. .addUse(I.getOperand(1).getReg())
  3563. .addImm(AArch64::sub_32);
  3564. Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
  3565. // Need to anyext the second scalar before we can use bfm
  3566. MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  3567. TII.get(TargetOpcode::SUBREG_TO_REG))
  3568. .addDef(SubToRegDef2)
  3569. .addImm(0)
  3570. .addUse(I.getOperand(2).getReg())
  3571. .addImm(AArch64::sub_32);
  3572. MachineInstr &BFM =
  3573. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
  3574. .addDef(I.getOperand(0).getReg())
  3575. .addUse(SubToRegDef)
  3576. .addUse(SubToRegDef2)
  3577. .addImm(32)
  3578. .addImm(31);
  3579. constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
  3580. constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
  3581. constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
  3582. I.eraseFromParent();
  3583. return true;
  3584. }
  3585. static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
  3586. const unsigned EltSize) {
  3587. // Choose a lane copy opcode and subregister based off of the size of the
  3588. // vector's elements.
  3589. switch (EltSize) {
  3590. case 8:
  3591. CopyOpc = AArch64::DUPi8;
  3592. ExtractSubReg = AArch64::bsub;
  3593. break;
  3594. case 16:
  3595. CopyOpc = AArch64::DUPi16;
  3596. ExtractSubReg = AArch64::hsub;
  3597. break;
  3598. case 32:
  3599. CopyOpc = AArch64::DUPi32;
  3600. ExtractSubReg = AArch64::ssub;
  3601. break;
  3602. case 64:
  3603. CopyOpc = AArch64::DUPi64;
  3604. ExtractSubReg = AArch64::dsub;
  3605. break;
  3606. default:
  3607. // Unknown size, bail out.
  3608. LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
  3609. return false;
  3610. }
  3611. return true;
  3612. }
  3613. MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
  3614. Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
  3615. Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
  3616. MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  3617. unsigned CopyOpc = 0;
  3618. unsigned ExtractSubReg = 0;
  3619. if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
  3620. LLVM_DEBUG(
  3621. dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
  3622. return nullptr;
  3623. }
  3624. const TargetRegisterClass *DstRC =
  3625. getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
  3626. if (!DstRC) {
  3627. LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
  3628. return nullptr;
  3629. }
  3630. const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
  3631. const LLT &VecTy = MRI.getType(VecReg);
  3632. const TargetRegisterClass *VecRC =
  3633. getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
  3634. if (!VecRC) {
  3635. LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
  3636. return nullptr;
  3637. }
  3638. // The register that we're going to copy into.
  3639. Register InsertReg = VecReg;
  3640. if (!DstReg)
  3641. DstReg = MRI.createVirtualRegister(DstRC);
  3642. // If the lane index is 0, we just use a subregister COPY.
  3643. if (LaneIdx == 0) {
  3644. auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
  3645. .addReg(VecReg, 0, ExtractSubReg);
  3646. RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
  3647. return &*Copy;
  3648. }
  3649. // Lane copies require 128-bit wide registers. If we're dealing with an
  3650. // unpacked vector, then we need to move up to that width. Insert an implicit
  3651. // def and a subregister insert to get us there.
  3652. if (VecTy.getSizeInBits() != 128) {
  3653. MachineInstr *ScalarToVector = emitScalarToVector(
  3654. VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
  3655. if (!ScalarToVector)
  3656. return nullptr;
  3657. InsertReg = ScalarToVector->getOperand(0).getReg();
  3658. }
  3659. MachineInstr *LaneCopyMI =
  3660. MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
  3661. constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
  3662. // Make sure that we actually constrain the initial copy.
  3663. RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
  3664. return LaneCopyMI;
  3665. }
  3666. bool AArch64InstructionSelector::selectExtractElt(
  3667. MachineInstr &I, MachineRegisterInfo &MRI) {
  3668. assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
  3669. "unexpected opcode!");
  3670. Register DstReg = I.getOperand(0).getReg();
  3671. const LLT NarrowTy = MRI.getType(DstReg);
  3672. const Register SrcReg = I.getOperand(1).getReg();
  3673. const LLT WideTy = MRI.getType(SrcReg);
  3674. (void)WideTy;
  3675. assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
  3676. "source register size too small!");
  3677. assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
  3678. // Need the lane index to determine the correct copy opcode.
  3679. MachineOperand &LaneIdxOp = I.getOperand(2);
  3680. assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
  3681. if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
  3682. LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
  3683. return false;
  3684. }
  3685. // Find the index to extract from.
  3686. auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
  3687. if (!VRegAndVal)
  3688. return false;
  3689. unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
  3690. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  3691. MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
  3692. LaneIdx, MIB);
  3693. if (!Extract)
  3694. return false;
  3695. I.eraseFromParent();
  3696. return true;
  3697. }
  3698. bool AArch64InstructionSelector::selectSplitVectorUnmerge(
  3699. MachineInstr &I, MachineRegisterInfo &MRI) {
  3700. unsigned NumElts = I.getNumOperands() - 1;
  3701. Register SrcReg = I.getOperand(NumElts).getReg();
  3702. const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
  3703. const LLT SrcTy = MRI.getType(SrcReg);
  3704. assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
  3705. if (SrcTy.getSizeInBits() > 128) {
  3706. LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
  3707. return false;
  3708. }
  3709. // We implement a split vector operation by treating the sub-vectors as
  3710. // scalars and extracting them.
  3711. const RegisterBank &DstRB =
  3712. *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
  3713. for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
  3714. Register Dst = I.getOperand(OpIdx).getReg();
  3715. MachineInstr *Extract =
  3716. emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
  3717. if (!Extract)
  3718. return false;
  3719. }
  3720. I.eraseFromParent();
  3721. return true;
  3722. }
  3723. bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
  3724. MachineRegisterInfo &MRI) {
  3725. assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
  3726. "unexpected opcode");
  3727. // TODO: Handle unmerging into GPRs and from scalars to scalars.
  3728. if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
  3729. AArch64::FPRRegBankID ||
  3730. RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
  3731. AArch64::FPRRegBankID) {
  3732. LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
  3733. "currently unsupported.\n");
  3734. return false;
  3735. }
  3736. // The last operand is the vector source register, and every other operand is
  3737. // a register to unpack into.
  3738. unsigned NumElts = I.getNumOperands() - 1;
  3739. Register SrcReg = I.getOperand(NumElts).getReg();
  3740. const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
  3741. const LLT WideTy = MRI.getType(SrcReg);
  3742. (void)WideTy;
  3743. assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
  3744. "can only unmerge from vector or s128 types!");
  3745. assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
  3746. "source register size too small!");
  3747. if (!NarrowTy.isScalar())
  3748. return selectSplitVectorUnmerge(I, MRI);
  3749. // Choose a lane copy opcode and subregister based off of the size of the
  3750. // vector's elements.
  3751. unsigned CopyOpc = 0;
  3752. unsigned ExtractSubReg = 0;
  3753. if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
  3754. return false;
  3755. // Set up for the lane copies.
  3756. MachineBasicBlock &MBB = *I.getParent();
  3757. // Stores the registers we'll be copying from.
  3758. SmallVector<Register, 4> InsertRegs;
  3759. // We'll use the first register twice, so we only need NumElts-1 registers.
  3760. unsigned NumInsertRegs = NumElts - 1;
  3761. // If our elements fit into exactly 128 bits, then we can copy from the source
  3762. // directly. Otherwise, we need to do a bit of setup with some subregister
  3763. // inserts.
  3764. if (NarrowTy.getSizeInBits() * NumElts == 128) {
  3765. InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
  3766. } else {
  3767. // No. We have to perform subregister inserts. For each insert, create an
  3768. // implicit def and a subregister insert, and save the register we create.
  3769. const TargetRegisterClass *RC =
  3770. getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
  3771. WideTy.getScalarSizeInBits() * NumElts);
  3772. unsigned SubReg = 0;
  3773. bool Found = getSubRegForClass(RC, TRI, SubReg);
  3774. (void)Found;
  3775. assert(Found && "expected to find last operand's subeg idx");
  3776. for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
  3777. Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
  3778. MachineInstr &ImpDefMI =
  3779. *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
  3780. ImpDefReg);
  3781. // Now, create the subregister insert from SrcReg.
  3782. Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
  3783. MachineInstr &InsMI =
  3784. *BuildMI(MBB, I, I.getDebugLoc(),
  3785. TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
  3786. .addUse(ImpDefReg)
  3787. .addUse(SrcReg)
  3788. .addImm(SubReg);
  3789. constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
  3790. constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
  3791. // Save the register so that we can copy from it after.
  3792. InsertRegs.push_back(InsertReg);
  3793. }
  3794. }
  3795. // Now that we've created any necessary subregister inserts, we can
  3796. // create the copies.
  3797. //
  3798. // Perform the first copy separately as a subregister copy.
  3799. Register CopyTo = I.getOperand(0).getReg();
  3800. auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
  3801. .addReg(InsertRegs[0], 0, ExtractSubReg);
  3802. constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
  3803. // Now, perform the remaining copies as vector lane copies.
  3804. unsigned LaneIdx = 1;
  3805. for (Register InsReg : InsertRegs) {
  3806. Register CopyTo = I.getOperand(LaneIdx).getReg();
  3807. MachineInstr &CopyInst =
  3808. *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
  3809. .addUse(InsReg)
  3810. .addImm(LaneIdx);
  3811. constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
  3812. ++LaneIdx;
  3813. }
  3814. // Separately constrain the first copy's destination. Because of the
  3815. // limitation in constrainOperandRegClass, we can't guarantee that this will
  3816. // actually be constrained. So, do it ourselves using the second operand.
  3817. const TargetRegisterClass *RC =
  3818. MRI.getRegClassOrNull(I.getOperand(1).getReg());
  3819. if (!RC) {
  3820. LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
  3821. return false;
  3822. }
  3823. RBI.constrainGenericRegister(CopyTo, *RC, MRI);
  3824. I.eraseFromParent();
  3825. return true;
  3826. }
  3827. bool AArch64InstructionSelector::selectConcatVectors(
  3828. MachineInstr &I, MachineRegisterInfo &MRI) {
  3829. assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
  3830. "Unexpected opcode");
  3831. Register Dst = I.getOperand(0).getReg();
  3832. Register Op1 = I.getOperand(1).getReg();
  3833. Register Op2 = I.getOperand(2).getReg();
  3834. MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
  3835. if (!ConcatMI)
  3836. return false;
  3837. I.eraseFromParent();
  3838. return true;
  3839. }
  3840. unsigned
  3841. AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
  3842. MachineFunction &MF) const {
  3843. Type *CPTy = CPVal->getType();
  3844. Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
  3845. MachineConstantPool *MCP = MF.getConstantPool();
  3846. return MCP->getConstantPoolIndex(CPVal, Alignment);
  3847. }
  3848. MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
  3849. const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
  3850. auto &MF = MIRBuilder.getMF();
  3851. unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
  3852. auto Adrp =
  3853. MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
  3854. .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
  3855. MachineInstr *LoadMI = nullptr;
  3856. MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
  3857. unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
  3858. switch (Size) {
  3859. case 16:
  3860. LoadMI =
  3861. &*MIRBuilder
  3862. .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
  3863. .addConstantPoolIndex(CPIdx, 0,
  3864. AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  3865. break;
  3866. case 8:
  3867. LoadMI =
  3868. &*MIRBuilder
  3869. .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
  3870. .addConstantPoolIndex(CPIdx, 0,
  3871. AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  3872. break;
  3873. case 4:
  3874. LoadMI =
  3875. &*MIRBuilder
  3876. .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
  3877. .addConstantPoolIndex(CPIdx, 0,
  3878. AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  3879. break;
  3880. case 2:
  3881. LoadMI =
  3882. &*MIRBuilder
  3883. .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
  3884. .addConstantPoolIndex(CPIdx, 0,
  3885. AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  3886. break;
  3887. default:
  3888. LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
  3889. << *CPVal->getType());
  3890. return nullptr;
  3891. }
  3892. LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
  3893. MachineMemOperand::MOLoad,
  3894. Size, Align(Size)));
  3895. constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
  3896. constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
  3897. return LoadMI;
  3898. }
  3899. /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
  3900. /// size and RB.
  3901. static std::pair<unsigned, unsigned>
  3902. getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
  3903. unsigned Opc, SubregIdx;
  3904. if (RB.getID() == AArch64::GPRRegBankID) {
  3905. if (EltSize == 16) {
  3906. Opc = AArch64::INSvi16gpr;
  3907. SubregIdx = AArch64::ssub;
  3908. } else if (EltSize == 32) {
  3909. Opc = AArch64::INSvi32gpr;
  3910. SubregIdx = AArch64::ssub;
  3911. } else if (EltSize == 64) {
  3912. Opc = AArch64::INSvi64gpr;
  3913. SubregIdx = AArch64::dsub;
  3914. } else {
  3915. llvm_unreachable("invalid elt size!");
  3916. }
  3917. } else {
  3918. if (EltSize == 8) {
  3919. Opc = AArch64::INSvi8lane;
  3920. SubregIdx = AArch64::bsub;
  3921. } else if (EltSize == 16) {
  3922. Opc = AArch64::INSvi16lane;
  3923. SubregIdx = AArch64::hsub;
  3924. } else if (EltSize == 32) {
  3925. Opc = AArch64::INSvi32lane;
  3926. SubregIdx = AArch64::ssub;
  3927. } else if (EltSize == 64) {
  3928. Opc = AArch64::INSvi64lane;
  3929. SubregIdx = AArch64::dsub;
  3930. } else {
  3931. llvm_unreachable("invalid elt size!");
  3932. }
  3933. }
  3934. return std::make_pair(Opc, SubregIdx);
  3935. }
  3936. MachineInstr *AArch64InstructionSelector::emitInstr(
  3937. unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
  3938. std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
  3939. const ComplexRendererFns &RenderFns) const {
  3940. assert(Opcode && "Expected an opcode?");
  3941. assert(!isPreISelGenericOpcode(Opcode) &&
  3942. "Function should only be used to produce selected instructions!");
  3943. auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
  3944. if (RenderFns)
  3945. for (auto &Fn : *RenderFns)
  3946. Fn(MI);
  3947. constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
  3948. return &*MI;
  3949. }
  3950. MachineInstr *AArch64InstructionSelector::emitAddSub(
  3951. const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
  3952. Register Dst, MachineOperand &LHS, MachineOperand &RHS,
  3953. MachineIRBuilder &MIRBuilder) const {
  3954. MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
  3955. assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
  3956. auto Ty = MRI.getType(LHS.getReg());
  3957. assert(!Ty.isVector() && "Expected a scalar or pointer?");
  3958. unsigned Size = Ty.getSizeInBits();
  3959. assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
  3960. bool Is32Bit = Size == 32;
  3961. // INSTRri form with positive arithmetic immediate.
  3962. if (auto Fns = selectArithImmed(RHS))
  3963. return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
  3964. MIRBuilder, Fns);
  3965. // INSTRri form with negative arithmetic immediate.
  3966. if (auto Fns = selectNegArithImmed(RHS))
  3967. return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
  3968. MIRBuilder, Fns);
  3969. // INSTRrx form.
  3970. if (auto Fns = selectArithExtendedRegister(RHS))
  3971. return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
  3972. MIRBuilder, Fns);
  3973. // INSTRrs form.
  3974. if (auto Fns = selectShiftedRegister(RHS))
  3975. return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
  3976. MIRBuilder, Fns);
  3977. return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
  3978. MIRBuilder);
  3979. }
  3980. MachineInstr *
  3981. AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
  3982. MachineOperand &RHS,
  3983. MachineIRBuilder &MIRBuilder) const {
  3984. const std::array<std::array<unsigned, 2>, 5> OpcTable{
  3985. {{AArch64::ADDXri, AArch64::ADDWri},
  3986. {AArch64::ADDXrs, AArch64::ADDWrs},
  3987. {AArch64::ADDXrr, AArch64::ADDWrr},
  3988. {AArch64::SUBXri, AArch64::SUBWri},
  3989. {AArch64::ADDXrx, AArch64::ADDWrx}}};
  3990. return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
  3991. }
  3992. MachineInstr *
  3993. AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
  3994. MachineOperand &RHS,
  3995. MachineIRBuilder &MIRBuilder) const {
  3996. const std::array<std::array<unsigned, 2>, 5> OpcTable{
  3997. {{AArch64::ADDSXri, AArch64::ADDSWri},
  3998. {AArch64::ADDSXrs, AArch64::ADDSWrs},
  3999. {AArch64::ADDSXrr, AArch64::ADDSWrr},
  4000. {AArch64::SUBSXri, AArch64::SUBSWri},
  4001. {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
  4002. return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
  4003. }
  4004. MachineInstr *
  4005. AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
  4006. MachineOperand &RHS,
  4007. MachineIRBuilder &MIRBuilder) const {
  4008. const std::array<std::array<unsigned, 2>, 5> OpcTable{
  4009. {{AArch64::SUBSXri, AArch64::SUBSWri},
  4010. {AArch64::SUBSXrs, AArch64::SUBSWrs},
  4011. {AArch64::SUBSXrr, AArch64::SUBSWrr},
  4012. {AArch64::ADDSXri, AArch64::ADDSWri},
  4013. {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
  4014. return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
  4015. }
  4016. MachineInstr *
  4017. AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
  4018. MachineIRBuilder &MIRBuilder) const {
  4019. MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
  4020. bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
  4021. auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
  4022. return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
  4023. }
  4024. MachineInstr *
  4025. AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
  4026. MachineIRBuilder &MIRBuilder) const {
  4027. assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
  4028. MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
  4029. LLT Ty = MRI.getType(LHS.getReg());
  4030. unsigned RegSize = Ty.getSizeInBits();
  4031. bool Is32Bit = (RegSize == 32);
  4032. const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
  4033. {AArch64::ANDSXrs, AArch64::ANDSWrs},
  4034. {AArch64::ANDSXrr, AArch64::ANDSWrr}};
  4035. // ANDS needs a logical immediate for its immediate form. Check if we can
  4036. // fold one in.
  4037. if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
  4038. int64_t Imm = ValAndVReg->Value.getSExtValue();
  4039. if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
  4040. auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
  4041. TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
  4042. constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
  4043. return &*TstMI;
  4044. }
  4045. }
  4046. if (auto Fns = selectLogicalShiftedRegister(RHS))
  4047. return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
  4048. return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
  4049. }
  4050. MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
  4051. MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
  4052. MachineIRBuilder &MIRBuilder) const {
  4053. assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
  4054. assert(Predicate.isPredicate() && "Expected predicate?");
  4055. MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
  4056. LLT CmpTy = MRI.getType(LHS.getReg());
  4057. assert(!CmpTy.isVector() && "Expected scalar or pointer");
  4058. unsigned Size = CmpTy.getSizeInBits();
  4059. (void)Size;
  4060. assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
  4061. // Fold the compare into a cmn or tst if possible.
  4062. if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
  4063. return FoldCmp;
  4064. auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
  4065. return emitSUBS(Dst, LHS, RHS, MIRBuilder);
  4066. }
  4067. MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
  4068. Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
  4069. MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  4070. #ifndef NDEBUG
  4071. LLT Ty = MRI.getType(Dst);
  4072. assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
  4073. "Expected a 32-bit scalar register?");
  4074. #endif
  4075. const Register ZReg = AArch64::WZR;
  4076. AArch64CC::CondCode CC1, CC2;
  4077. changeFCMPPredToAArch64CC(Pred, CC1, CC2);
  4078. auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
  4079. if (CC2 == AArch64CC::AL)
  4080. return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
  4081. MIRBuilder);
  4082. const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
  4083. Register Def1Reg = MRI.createVirtualRegister(RC);
  4084. Register Def2Reg = MRI.createVirtualRegister(RC);
  4085. auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
  4086. emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
  4087. emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
  4088. auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
  4089. constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
  4090. return &*OrMI;
  4091. }
  4092. MachineInstr *
  4093. AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
  4094. MachineIRBuilder &MIRBuilder,
  4095. Optional<CmpInst::Predicate> Pred) const {
  4096. MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  4097. LLT Ty = MRI.getType(LHS);
  4098. if (Ty.isVector())
  4099. return nullptr;
  4100. unsigned OpSize = Ty.getSizeInBits();
  4101. if (OpSize != 32 && OpSize != 64)
  4102. return nullptr;
  4103. // If this is a compare against +0.0, then we don't have
  4104. // to explicitly materialize a constant.
  4105. const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
  4106. bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
  4107. auto IsEqualityPred = [](CmpInst::Predicate P) {
  4108. return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
  4109. P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
  4110. };
  4111. if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
  4112. // Try commutating the operands.
  4113. const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
  4114. if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
  4115. ShouldUseImm = true;
  4116. std::swap(LHS, RHS);
  4117. }
  4118. }
  4119. unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
  4120. {AArch64::FCMPSri, AArch64::FCMPDri}};
  4121. unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
  4122. // Partially build the compare. Decide if we need to add a use for the
  4123. // third operand based off whether or not we're comparing against 0.0.
  4124. auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
  4125. if (!ShouldUseImm)
  4126. CmpMI.addUse(RHS);
  4127. constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
  4128. return &*CmpMI;
  4129. }
  4130. MachineInstr *AArch64InstructionSelector::emitVectorConcat(
  4131. Optional<Register> Dst, Register Op1, Register Op2,
  4132. MachineIRBuilder &MIRBuilder) const {
  4133. // We implement a vector concat by:
  4134. // 1. Use scalar_to_vector to insert the lower vector into the larger dest
  4135. // 2. Insert the upper vector into the destination's upper element
  4136. // TODO: some of this code is common with G_BUILD_VECTOR handling.
  4137. MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
  4138. const LLT Op1Ty = MRI.getType(Op1);
  4139. const LLT Op2Ty = MRI.getType(Op2);
  4140. if (Op1Ty != Op2Ty) {
  4141. LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
  4142. return nullptr;
  4143. }
  4144. assert(Op1Ty.isVector() && "Expected a vector for vector concat");
  4145. if (Op1Ty.getSizeInBits() >= 128) {
  4146. LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
  4147. return nullptr;
  4148. }
  4149. // At the moment we just support 64 bit vector concats.
  4150. if (Op1Ty.getSizeInBits() != 64) {
  4151. LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
  4152. return nullptr;
  4153. }
  4154. const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
  4155. const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
  4156. const TargetRegisterClass *DstRC =
  4157. getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
  4158. MachineInstr *WidenedOp1 =
  4159. emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
  4160. MachineInstr *WidenedOp2 =
  4161. emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
  4162. if (!WidenedOp1 || !WidenedOp2) {
  4163. LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
  4164. return nullptr;
  4165. }
  4166. // Now do the insert of the upper element.
  4167. unsigned InsertOpc, InsSubRegIdx;
  4168. std::tie(InsertOpc, InsSubRegIdx) =
  4169. getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
  4170. if (!Dst)
  4171. Dst = MRI.createVirtualRegister(DstRC);
  4172. auto InsElt =
  4173. MIRBuilder
  4174. .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
  4175. .addImm(1) /* Lane index */
  4176. .addUse(WidenedOp2->getOperand(0).getReg())
  4177. .addImm(0);
  4178. constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
  4179. return &*InsElt;
  4180. }
  4181. MachineInstr *
  4182. AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
  4183. Register Src2, AArch64CC::CondCode Pred,
  4184. MachineIRBuilder &MIRBuilder) const {
  4185. auto &MRI = *MIRBuilder.getMRI();
  4186. const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
  4187. // If we used a register class, then this won't necessarily have an LLT.
  4188. // Compute the size based off whether or not we have a class or bank.
  4189. unsigned Size;
  4190. if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
  4191. Size = TRI.getRegSizeInBits(*RC);
  4192. else
  4193. Size = MRI.getType(Dst).getSizeInBits();
  4194. // Some opcodes use s1.
  4195. assert(Size <= 64 && "Expected 64 bits or less only!");
  4196. static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
  4197. unsigned Opc = OpcTable[Size == 64];
  4198. auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
  4199. constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
  4200. return &*CSINC;
  4201. }
  4202. std::pair<MachineInstr *, AArch64CC::CondCode>
  4203. AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
  4204. MachineOperand &LHS,
  4205. MachineOperand &RHS,
  4206. MachineIRBuilder &MIRBuilder) const {
  4207. switch (Opcode) {
  4208. default:
  4209. llvm_unreachable("Unexpected opcode!");
  4210. case TargetOpcode::G_SADDO:
  4211. return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
  4212. case TargetOpcode::G_UADDO:
  4213. return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
  4214. case TargetOpcode::G_SSUBO:
  4215. return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
  4216. case TargetOpcode::G_USUBO:
  4217. return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
  4218. }
  4219. }
  4220. bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
  4221. MachineRegisterInfo &MRI = *MIB.getMRI();
  4222. // We want to recognize this pattern:
  4223. //
  4224. // $z = G_FCMP pred, $x, $y
  4225. // ...
  4226. // $w = G_SELECT $z, $a, $b
  4227. //
  4228. // Where the value of $z is *only* ever used by the G_SELECT (possibly with
  4229. // some copies/truncs in between.)
  4230. //
  4231. // If we see this, then we can emit something like this:
  4232. //
  4233. // fcmp $x, $y
  4234. // fcsel $w, $a, $b, pred
  4235. //
  4236. // Rather than emitting both of the rather long sequences in the standard
  4237. // G_FCMP/G_SELECT select methods.
  4238. // First, check if the condition is defined by a compare.
  4239. MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
  4240. while (CondDef) {
  4241. // We can only fold if all of the defs have one use.
  4242. Register CondDefReg = CondDef->getOperand(0).getReg();
  4243. if (!MRI.hasOneNonDBGUse(CondDefReg)) {
  4244. // Unless it's another select.
  4245. for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
  4246. if (CondDef == &UI)
  4247. continue;
  4248. if (UI.getOpcode() != TargetOpcode::G_SELECT)
  4249. return false;
  4250. }
  4251. }
  4252. // We can skip over G_TRUNC since the condition is 1-bit.
  4253. // Truncating/extending can have no impact on the value.
  4254. unsigned Opc = CondDef->getOpcode();
  4255. if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
  4256. break;
  4257. // Can't see past copies from physregs.
  4258. if (Opc == TargetOpcode::COPY &&
  4259. Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
  4260. return false;
  4261. CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
  4262. }
  4263. // Is the condition defined by a compare?
  4264. if (!CondDef)
  4265. return false;
  4266. unsigned CondOpc = CondDef->getOpcode();
  4267. if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
  4268. return false;
  4269. AArch64CC::CondCode CondCode;
  4270. if (CondOpc == TargetOpcode::G_ICMP) {
  4271. auto Pred =
  4272. static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
  4273. CondCode = changeICMPPredToAArch64CC(Pred);
  4274. emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
  4275. CondDef->getOperand(1), MIB);
  4276. } else {
  4277. // Get the condition code for the select.
  4278. auto Pred =
  4279. static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
  4280. AArch64CC::CondCode CondCode2;
  4281. changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
  4282. // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
  4283. // instructions to emit the comparison.
  4284. // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
  4285. // unnecessary.
  4286. if (CondCode2 != AArch64CC::AL)
  4287. return false;
  4288. if (!emitFPCompare(CondDef->getOperand(2).getReg(),
  4289. CondDef->getOperand(3).getReg(), MIB)) {
  4290. LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
  4291. return false;
  4292. }
  4293. }
  4294. // Emit the select.
  4295. emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
  4296. I.getOperand(3).getReg(), CondCode, MIB);
  4297. I.eraseFromParent();
  4298. return true;
  4299. }
  4300. MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
  4301. MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
  4302. MachineIRBuilder &MIRBuilder) const {
  4303. assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
  4304. "Unexpected MachineOperand");
  4305. MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  4306. // We want to find this sort of thing:
  4307. // x = G_SUB 0, y
  4308. // G_ICMP z, x
  4309. //
  4310. // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
  4311. // e.g:
  4312. //
  4313. // cmn z, y
  4314. // Check if the RHS or LHS of the G_ICMP is defined by a SUB
  4315. MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
  4316. MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
  4317. auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
  4318. // Given this:
  4319. //
  4320. // x = G_SUB 0, y
  4321. // G_ICMP x, z
  4322. //
  4323. // Produce this:
  4324. //
  4325. // cmn y, z
  4326. if (isCMN(LHSDef, P, MRI))
  4327. return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
  4328. // Same idea here, but with the RHS of the compare instead:
  4329. //
  4330. // Given this:
  4331. //
  4332. // x = G_SUB 0, y
  4333. // G_ICMP z, x
  4334. //
  4335. // Produce this:
  4336. //
  4337. // cmn z, y
  4338. if (isCMN(RHSDef, P, MRI))
  4339. return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
  4340. // Given this:
  4341. //
  4342. // z = G_AND x, y
  4343. // G_ICMP z, 0
  4344. //
  4345. // Produce this if the compare is signed:
  4346. //
  4347. // tst x, y
  4348. if (!CmpInst::isUnsigned(P) && LHSDef &&
  4349. LHSDef->getOpcode() == TargetOpcode::G_AND) {
  4350. // Make sure that the RHS is 0.
  4351. auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
  4352. if (!ValAndVReg || ValAndVReg->Value != 0)
  4353. return nullptr;
  4354. return emitTST(LHSDef->getOperand(1),
  4355. LHSDef->getOperand(2), MIRBuilder);
  4356. }
  4357. return nullptr;
  4358. }
  4359. bool AArch64InstructionSelector::selectShuffleVector(
  4360. MachineInstr &I, MachineRegisterInfo &MRI) {
  4361. const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  4362. Register Src1Reg = I.getOperand(1).getReg();
  4363. const LLT Src1Ty = MRI.getType(Src1Reg);
  4364. Register Src2Reg = I.getOperand(2).getReg();
  4365. const LLT Src2Ty = MRI.getType(Src2Reg);
  4366. ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
  4367. MachineBasicBlock &MBB = *I.getParent();
  4368. MachineFunction &MF = *MBB.getParent();
  4369. LLVMContext &Ctx = MF.getFunction().getContext();
  4370. // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
  4371. // it's originated from a <1 x T> type. Those should have been lowered into
  4372. // G_BUILD_VECTOR earlier.
  4373. if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
  4374. LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
  4375. return false;
  4376. }
  4377. unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
  4378. SmallVector<Constant *, 64> CstIdxs;
  4379. for (int Val : Mask) {
  4380. // For now, any undef indexes we'll just assume to be 0. This should be
  4381. // optimized in future, e.g. to select DUP etc.
  4382. Val = Val < 0 ? 0 : Val;
  4383. for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
  4384. unsigned Offset = Byte + Val * BytesPerElt;
  4385. CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
  4386. }
  4387. }
  4388. // Use a constant pool to load the index vector for TBL.
  4389. Constant *CPVal = ConstantVector::get(CstIdxs);
  4390. MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
  4391. if (!IndexLoad) {
  4392. LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
  4393. return false;
  4394. }
  4395. if (DstTy.getSizeInBits() != 128) {
  4396. assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
  4397. // This case can be done with TBL1.
  4398. MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
  4399. if (!Concat) {
  4400. LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
  4401. return false;
  4402. }
  4403. // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
  4404. IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
  4405. IndexLoad->getOperand(0).getReg(), MIB);
  4406. auto TBL1 = MIB.buildInstr(
  4407. AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
  4408. {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
  4409. constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
  4410. auto Copy =
  4411. MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
  4412. .addReg(TBL1.getReg(0), 0, AArch64::dsub);
  4413. RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
  4414. I.eraseFromParent();
  4415. return true;
  4416. }
  4417. // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
  4418. // Q registers for regalloc.
  4419. SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
  4420. auto RegSeq = createQTuple(Regs, MIB);
  4421. auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
  4422. {RegSeq, IndexLoad->getOperand(0)});
  4423. constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
  4424. I.eraseFromParent();
  4425. return true;
  4426. }
  4427. MachineInstr *AArch64InstructionSelector::emitLaneInsert(
  4428. Optional<Register> DstReg, Register SrcReg, Register EltReg,
  4429. unsigned LaneIdx, const RegisterBank &RB,
  4430. MachineIRBuilder &MIRBuilder) const {
  4431. MachineInstr *InsElt = nullptr;
  4432. const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
  4433. MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
  4434. // Create a register to define with the insert if one wasn't passed in.
  4435. if (!DstReg)
  4436. DstReg = MRI.createVirtualRegister(DstRC);
  4437. unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
  4438. unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
  4439. if (RB.getID() == AArch64::FPRRegBankID) {
  4440. auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
  4441. InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
  4442. .addImm(LaneIdx)
  4443. .addUse(InsSub->getOperand(0).getReg())
  4444. .addImm(0);
  4445. } else {
  4446. InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
  4447. .addImm(LaneIdx)
  4448. .addUse(EltReg);
  4449. }
  4450. constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
  4451. return InsElt;
  4452. }
  4453. bool AArch64InstructionSelector::selectUSMovFromExtend(
  4454. MachineInstr &MI, MachineRegisterInfo &MRI) {
  4455. if (MI.getOpcode() != TargetOpcode::G_SEXT &&
  4456. MI.getOpcode() != TargetOpcode::G_ZEXT &&
  4457. MI.getOpcode() != TargetOpcode::G_ANYEXT)
  4458. return false;
  4459. bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
  4460. const Register DefReg = MI.getOperand(0).getReg();
  4461. const LLT DstTy = MRI.getType(DefReg);
  4462. unsigned DstSize = DstTy.getSizeInBits();
  4463. if (DstSize != 32 && DstSize != 64)
  4464. return false;
  4465. MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
  4466. MI.getOperand(1).getReg(), MRI);
  4467. int64_t Lane;
  4468. if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
  4469. return false;
  4470. Register Src0 = Extract->getOperand(1).getReg();
  4471. const LLT &VecTy = MRI.getType(Src0);
  4472. if (VecTy.getSizeInBits() != 128) {
  4473. const MachineInstr *ScalarToVector = emitScalarToVector(
  4474. VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
  4475. assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
  4476. Src0 = ScalarToVector->getOperand(0).getReg();
  4477. }
  4478. unsigned Opcode;
  4479. if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
  4480. Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
  4481. else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
  4482. Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
  4483. else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
  4484. Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
  4485. else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
  4486. Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
  4487. else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
  4488. Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
  4489. else
  4490. llvm_unreachable("Unexpected type combo for S/UMov!");
  4491. // We may need to generate one of these, depending on the type and sign of the
  4492. // input:
  4493. // DstReg = SMOV Src0, Lane;
  4494. // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
  4495. MachineInstr *ExtI = nullptr;
  4496. if (DstSize == 64 && !IsSigned) {
  4497. Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
  4498. MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
  4499. ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
  4500. .addImm(0)
  4501. .addUse(NewReg)
  4502. .addImm(AArch64::sub_32);
  4503. RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
  4504. } else
  4505. ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
  4506. constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
  4507. MI.eraseFromParent();
  4508. return true;
  4509. }
  4510. bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
  4511. MachineRegisterInfo &MRI) {
  4512. assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
  4513. // Get information on the destination.
  4514. Register DstReg = I.getOperand(0).getReg();
  4515. const LLT DstTy = MRI.getType(DstReg);
  4516. unsigned VecSize = DstTy.getSizeInBits();
  4517. // Get information on the element we want to insert into the destination.
  4518. Register EltReg = I.getOperand(2).getReg();
  4519. const LLT EltTy = MRI.getType(EltReg);
  4520. unsigned EltSize = EltTy.getSizeInBits();
  4521. if (EltSize < 16 || EltSize > 64)
  4522. return false; // Don't support all element types yet.
  4523. // Find the definition of the index. Bail out if it's not defined by a
  4524. // G_CONSTANT.
  4525. Register IdxReg = I.getOperand(3).getReg();
  4526. auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
  4527. if (!VRegAndVal)
  4528. return false;
  4529. unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
  4530. // Perform the lane insert.
  4531. Register SrcReg = I.getOperand(1).getReg();
  4532. const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
  4533. if (VecSize < 128) {
  4534. // If the vector we're inserting into is smaller than 128 bits, widen it
  4535. // to 128 to do the insert.
  4536. MachineInstr *ScalarToVec =
  4537. emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
  4538. if (!ScalarToVec)
  4539. return false;
  4540. SrcReg = ScalarToVec->getOperand(0).getReg();
  4541. }
  4542. // Create an insert into a new FPR128 register.
  4543. // Note that if our vector is already 128 bits, we end up emitting an extra
  4544. // register.
  4545. MachineInstr *InsMI =
  4546. emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
  4547. if (VecSize < 128) {
  4548. // If we had to widen to perform the insert, then we have to demote back to
  4549. // the original size to get the result we want.
  4550. Register DemoteVec = InsMI->getOperand(0).getReg();
  4551. const TargetRegisterClass *RC =
  4552. getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
  4553. if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
  4554. LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
  4555. return false;
  4556. }
  4557. unsigned SubReg = 0;
  4558. if (!getSubRegForClass(RC, TRI, SubReg))
  4559. return false;
  4560. if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
  4561. LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
  4562. << "\n");
  4563. return false;
  4564. }
  4565. MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
  4566. .addReg(DemoteVec, 0, SubReg);
  4567. RBI.constrainGenericRegister(DstReg, *RC, MRI);
  4568. } else {
  4569. // No widening needed.
  4570. InsMI->getOperand(0).setReg(DstReg);
  4571. constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
  4572. }
  4573. I.eraseFromParent();
  4574. return true;
  4575. }
  4576. MachineInstr *
  4577. AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
  4578. MachineIRBuilder &MIRBuilder,
  4579. MachineRegisterInfo &MRI) {
  4580. LLT DstTy = MRI.getType(Dst);
  4581. unsigned DstSize = DstTy.getSizeInBits();
  4582. if (CV->isNullValue()) {
  4583. if (DstSize == 128) {
  4584. auto Mov =
  4585. MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
  4586. constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
  4587. return &*Mov;
  4588. }
  4589. if (DstSize == 64) {
  4590. auto Mov =
  4591. MIRBuilder
  4592. .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
  4593. .addImm(0);
  4594. auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
  4595. .addReg(Mov.getReg(0), 0, AArch64::dsub);
  4596. RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
  4597. return &*Copy;
  4598. }
  4599. }
  4600. auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
  4601. if (!CPLoad) {
  4602. LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
  4603. return nullptr;
  4604. }
  4605. auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
  4606. RBI.constrainGenericRegister(
  4607. Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
  4608. return &*Copy;
  4609. }
  4610. bool AArch64InstructionSelector::tryOptConstantBuildVec(
  4611. MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
  4612. assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
  4613. unsigned DstSize = DstTy.getSizeInBits();
  4614. assert(DstSize <= 128 && "Unexpected build_vec type!");
  4615. if (DstSize < 32)
  4616. return false;
  4617. // Check if we're building a constant vector, in which case we want to
  4618. // generate a constant pool load instead of a vector insert sequence.
  4619. SmallVector<Constant *, 16> Csts;
  4620. for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
  4621. // Try to find G_CONSTANT or G_FCONSTANT
  4622. auto *OpMI =
  4623. getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
  4624. if (OpMI)
  4625. Csts.emplace_back(
  4626. const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
  4627. else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
  4628. I.getOperand(Idx).getReg(), MRI)))
  4629. Csts.emplace_back(
  4630. const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
  4631. else
  4632. return false;
  4633. }
  4634. Constant *CV = ConstantVector::get(Csts);
  4635. if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
  4636. return false;
  4637. I.eraseFromParent();
  4638. return true;
  4639. }
  4640. bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
  4641. MachineInstr &I, MachineRegisterInfo &MRI) {
  4642. // Given:
  4643. // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
  4644. //
  4645. // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
  4646. Register Dst = I.getOperand(0).getReg();
  4647. Register EltReg = I.getOperand(1).getReg();
  4648. LLT EltTy = MRI.getType(EltReg);
  4649. // If the index isn't on the same bank as its elements, then this can't be a
  4650. // SUBREG_TO_REG.
  4651. const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
  4652. const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
  4653. if (EltRB != DstRB)
  4654. return false;
  4655. if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
  4656. [&MRI](const MachineOperand &Op) {
  4657. return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
  4658. MRI);
  4659. }))
  4660. return false;
  4661. unsigned SubReg;
  4662. const TargetRegisterClass *EltRC =
  4663. getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
  4664. if (!EltRC)
  4665. return false;
  4666. const TargetRegisterClass *DstRC =
  4667. getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
  4668. if (!DstRC)
  4669. return false;
  4670. if (!getSubRegForClass(EltRC, TRI, SubReg))
  4671. return false;
  4672. auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
  4673. .addImm(0)
  4674. .addUse(EltReg)
  4675. .addImm(SubReg);
  4676. I.eraseFromParent();
  4677. constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
  4678. return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
  4679. }
  4680. bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
  4681. MachineRegisterInfo &MRI) {
  4682. assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
  4683. // Until we port more of the optimized selections, for now just use a vector
  4684. // insert sequence.
  4685. const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
  4686. const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
  4687. unsigned EltSize = EltTy.getSizeInBits();
  4688. if (tryOptConstantBuildVec(I, DstTy, MRI))
  4689. return true;
  4690. if (tryOptBuildVecToSubregToReg(I, MRI))
  4691. return true;
  4692. if (EltSize < 16 || EltSize > 64)
  4693. return false; // Don't support all element types yet.
  4694. const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
  4695. const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
  4696. MachineInstr *ScalarToVec =
  4697. emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
  4698. I.getOperand(1).getReg(), MIB);
  4699. if (!ScalarToVec)
  4700. return false;
  4701. Register DstVec = ScalarToVec->getOperand(0).getReg();
  4702. unsigned DstSize = DstTy.getSizeInBits();
  4703. // Keep track of the last MI we inserted. Later on, we might be able to save
  4704. // a copy using it.
  4705. MachineInstr *PrevMI = nullptr;
  4706. for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
  4707. // Note that if we don't do a subregister copy, we can end up making an
  4708. // extra register.
  4709. PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
  4710. MIB);
  4711. DstVec = PrevMI->getOperand(0).getReg();
  4712. }
  4713. // If DstTy's size in bits is less than 128, then emit a subregister copy
  4714. // from DstVec to the last register we've defined.
  4715. if (DstSize < 128) {
  4716. // Force this to be FPR using the destination vector.
  4717. const TargetRegisterClass *RC =
  4718. getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
  4719. if (!RC)
  4720. return false;
  4721. if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
  4722. LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
  4723. return false;
  4724. }
  4725. unsigned SubReg = 0;
  4726. if (!getSubRegForClass(RC, TRI, SubReg))
  4727. return false;
  4728. if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
  4729. LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
  4730. << "\n");
  4731. return false;
  4732. }
  4733. Register Reg = MRI.createVirtualRegister(RC);
  4734. Register DstReg = I.getOperand(0).getReg();
  4735. MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
  4736. MachineOperand &RegOp = I.getOperand(1);
  4737. RegOp.setReg(Reg);
  4738. RBI.constrainGenericRegister(DstReg, *RC, MRI);
  4739. } else {
  4740. // We don't need a subregister copy. Save a copy by re-using the
  4741. // destination register on the final insert.
  4742. assert(PrevMI && "PrevMI was null?");
  4743. PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
  4744. constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
  4745. }
  4746. I.eraseFromParent();
  4747. return true;
  4748. }
  4749. bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
  4750. unsigned NumVecs,
  4751. MachineInstr &I) {
  4752. assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
  4753. assert(Opc && "Expected an opcode?");
  4754. assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
  4755. auto &MRI = *MIB.getMRI();
  4756. LLT Ty = MRI.getType(I.getOperand(0).getReg());
  4757. unsigned Size = Ty.getSizeInBits();
  4758. assert((Size == 64 || Size == 128) &&
  4759. "Destination must be 64 bits or 128 bits?");
  4760. unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
  4761. auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
  4762. assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
  4763. auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
  4764. Load.cloneMemRefs(I);
  4765. constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
  4766. Register SelectedLoadDst = Load->getOperand(0).getReg();
  4767. for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
  4768. auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
  4769. .addReg(SelectedLoadDst, 0, SubReg + Idx);
  4770. // Emit the subreg copies and immediately select them.
  4771. // FIXME: We should refactor our copy code into an emitCopy helper and
  4772. // clean up uses of this pattern elsewhere in the selector.
  4773. selectCopy(*Vec, TII, MRI, TRI, RBI);
  4774. }
  4775. return true;
  4776. }
  4777. bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
  4778. MachineInstr &I, MachineRegisterInfo &MRI) {
  4779. // Find the intrinsic ID.
  4780. unsigned IntrinID = I.getIntrinsicID();
  4781. const LLT S8 = LLT::scalar(8);
  4782. const LLT S16 = LLT::scalar(16);
  4783. const LLT S32 = LLT::scalar(32);
  4784. const LLT S64 = LLT::scalar(64);
  4785. const LLT P0 = LLT::pointer(0, 64);
  4786. // Select the instruction.
  4787. switch (IntrinID) {
  4788. default:
  4789. return false;
  4790. case Intrinsic::aarch64_ldxp:
  4791. case Intrinsic::aarch64_ldaxp: {
  4792. auto NewI = MIB.buildInstr(
  4793. IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
  4794. {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
  4795. {I.getOperand(3)});
  4796. NewI.cloneMemRefs(I);
  4797. constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
  4798. break;
  4799. }
  4800. case Intrinsic::trap:
  4801. MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
  4802. break;
  4803. case Intrinsic::debugtrap:
  4804. MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
  4805. break;
  4806. case Intrinsic::ubsantrap:
  4807. MIB.buildInstr(AArch64::BRK, {}, {})
  4808. .addImm(I.getOperand(1).getImm() | ('U' << 8));
  4809. break;
  4810. case Intrinsic::aarch64_neon_ld2: {
  4811. LLT Ty = MRI.getType(I.getOperand(0).getReg());
  4812. unsigned Opc = 0;
  4813. if (Ty == LLT::fixed_vector(8, S8))
  4814. Opc = AArch64::LD2Twov8b;
  4815. else if (Ty == LLT::fixed_vector(16, S8))
  4816. Opc = AArch64::LD2Twov16b;
  4817. else if (Ty == LLT::fixed_vector(4, S16))
  4818. Opc = AArch64::LD2Twov4h;
  4819. else if (Ty == LLT::fixed_vector(8, S16))
  4820. Opc = AArch64::LD2Twov8h;
  4821. else if (Ty == LLT::fixed_vector(2, S32))
  4822. Opc = AArch64::LD2Twov2s;
  4823. else if (Ty == LLT::fixed_vector(4, S32))
  4824. Opc = AArch64::LD2Twov4s;
  4825. else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
  4826. Opc = AArch64::LD2Twov2d;
  4827. else if (Ty == S64 || Ty == P0)
  4828. Opc = AArch64::LD1Twov1d;
  4829. else
  4830. llvm_unreachable("Unexpected type for ld2!");
  4831. selectVectorLoadIntrinsic(Opc, 2, I);
  4832. break;
  4833. }
  4834. case Intrinsic::aarch64_neon_ld4: {
  4835. LLT Ty = MRI.getType(I.getOperand(0).getReg());
  4836. unsigned Opc = 0;
  4837. if (Ty == LLT::fixed_vector(8, S8))
  4838. Opc = AArch64::LD4Fourv8b;
  4839. else if (Ty == LLT::fixed_vector(16, S8))
  4840. Opc = AArch64::LD4Fourv16b;
  4841. else if (Ty == LLT::fixed_vector(4, S16))
  4842. Opc = AArch64::LD4Fourv4h;
  4843. else if (Ty == LLT::fixed_vector(8, S16))
  4844. Opc = AArch64::LD4Fourv8h;
  4845. else if (Ty == LLT::fixed_vector(2, S32))
  4846. Opc = AArch64::LD4Fourv2s;
  4847. else if (Ty == LLT::fixed_vector(4, S32))
  4848. Opc = AArch64::LD4Fourv4s;
  4849. else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
  4850. Opc = AArch64::LD4Fourv2d;
  4851. else if (Ty == S64 || Ty == P0)
  4852. Opc = AArch64::LD1Fourv1d;
  4853. else
  4854. llvm_unreachable("Unexpected type for ld4!");
  4855. selectVectorLoadIntrinsic(Opc, 4, I);
  4856. break;
  4857. }
  4858. case Intrinsic::aarch64_neon_st2: {
  4859. Register Src1 = I.getOperand(1).getReg();
  4860. Register Src2 = I.getOperand(2).getReg();
  4861. Register Ptr = I.getOperand(3).getReg();
  4862. LLT Ty = MRI.getType(Src1);
  4863. unsigned Opc;
  4864. if (Ty == LLT::fixed_vector(8, S8))
  4865. Opc = AArch64::ST2Twov8b;
  4866. else if (Ty == LLT::fixed_vector(16, S8))
  4867. Opc = AArch64::ST2Twov16b;
  4868. else if (Ty == LLT::fixed_vector(4, S16))
  4869. Opc = AArch64::ST2Twov4h;
  4870. else if (Ty == LLT::fixed_vector(8, S16))
  4871. Opc = AArch64::ST2Twov8h;
  4872. else if (Ty == LLT::fixed_vector(2, S32))
  4873. Opc = AArch64::ST2Twov2s;
  4874. else if (Ty == LLT::fixed_vector(4, S32))
  4875. Opc = AArch64::ST2Twov4s;
  4876. else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
  4877. Opc = AArch64::ST2Twov2d;
  4878. else if (Ty == S64 || Ty == P0)
  4879. Opc = AArch64::ST1Twov1d;
  4880. else
  4881. llvm_unreachable("Unexpected type for st2!");
  4882. SmallVector<Register, 2> Regs = {Src1, Src2};
  4883. Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
  4884. : createDTuple(Regs, MIB);
  4885. auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
  4886. Store.cloneMemRefs(I);
  4887. constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
  4888. break;
  4889. }
  4890. case Intrinsic::aarch64_mops_memset_tag: {
  4891. // Transform
  4892. // %dst:gpr(p0) = \
  4893. // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
  4894. // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
  4895. // where %dst is updated, into
  4896. // %Rd:GPR64common, %Rn:GPR64) = \
  4897. // MOPSMemorySetTaggingPseudo \
  4898. // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
  4899. // where Rd and Rn are tied.
  4900. // It is expected that %val has been extended to s64 in legalization.
  4901. // Note that the order of the size/value operands are swapped.
  4902. Register DstDef = I.getOperand(0).getReg();
  4903. // I.getOperand(1) is the intrinsic function
  4904. Register DstUse = I.getOperand(2).getReg();
  4905. Register ValUse = I.getOperand(3).getReg();
  4906. Register SizeUse = I.getOperand(4).getReg();
  4907. // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
  4908. // Therefore an additional virtual register is requried for the updated size
  4909. // operand. This value is not accessible via the semantics of the intrinsic.
  4910. Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64));
  4911. auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
  4912. {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
  4913. Memset.cloneMemRefs(I);
  4914. constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI);
  4915. break;
  4916. }
  4917. }
  4918. I.eraseFromParent();
  4919. return true;
  4920. }
  4921. bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
  4922. MachineRegisterInfo &MRI) {
  4923. unsigned IntrinID = I.getIntrinsicID();
  4924. switch (IntrinID) {
  4925. default:
  4926. break;
  4927. case Intrinsic::aarch64_crypto_sha1h: {
  4928. Register DstReg = I.getOperand(0).getReg();
  4929. Register SrcReg = I.getOperand(2).getReg();
  4930. // FIXME: Should this be an assert?
  4931. if (MRI.getType(DstReg).getSizeInBits() != 32 ||
  4932. MRI.getType(SrcReg).getSizeInBits() != 32)
  4933. return false;
  4934. // The operation has to happen on FPRs. Set up some new FPR registers for
  4935. // the source and destination if they are on GPRs.
  4936. if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
  4937. SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
  4938. MIB.buildCopy({SrcReg}, {I.getOperand(2)});
  4939. // Make sure the copy ends up getting constrained properly.
  4940. RBI.constrainGenericRegister(I.getOperand(2).getReg(),
  4941. AArch64::GPR32RegClass, MRI);
  4942. }
  4943. if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
  4944. DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
  4945. // Actually insert the instruction.
  4946. auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
  4947. constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
  4948. // Did we create a new register for the destination?
  4949. if (DstReg != I.getOperand(0).getReg()) {
  4950. // Yep. Copy the result of the instruction back into the original
  4951. // destination.
  4952. MIB.buildCopy({I.getOperand(0)}, {DstReg});
  4953. RBI.constrainGenericRegister(I.getOperand(0).getReg(),
  4954. AArch64::GPR32RegClass, MRI);
  4955. }
  4956. I.eraseFromParent();
  4957. return true;
  4958. }
  4959. case Intrinsic::ptrauth_sign: {
  4960. Register DstReg = I.getOperand(0).getReg();
  4961. Register ValReg = I.getOperand(2).getReg();
  4962. uint64_t Key = I.getOperand(3).getImm();
  4963. Register DiscReg = I.getOperand(4).getReg();
  4964. auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
  4965. bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
  4966. if (Key > 3)
  4967. return false;
  4968. unsigned Opcodes[][4] = {
  4969. {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
  4970. {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
  4971. unsigned Opcode = Opcodes[IsDiscZero][Key];
  4972. auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
  4973. if (!IsDiscZero) {
  4974. PAC.addUse(DiscReg);
  4975. RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
  4976. }
  4977. RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
  4978. I.eraseFromParent();
  4979. return true;
  4980. }
  4981. case Intrinsic::frameaddress:
  4982. case Intrinsic::returnaddress: {
  4983. MachineFunction &MF = *I.getParent()->getParent();
  4984. MachineFrameInfo &MFI = MF.getFrameInfo();
  4985. unsigned Depth = I.getOperand(2).getImm();
  4986. Register DstReg = I.getOperand(0).getReg();
  4987. RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
  4988. if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
  4989. if (!MFReturnAddr) {
  4990. // Insert the copy from LR/X30 into the entry block, before it can be
  4991. // clobbered by anything.
  4992. MFI.setReturnAddressIsTaken(true);
  4993. MFReturnAddr = getFunctionLiveInPhysReg(
  4994. MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
  4995. }
  4996. if (STI.hasPAuth()) {
  4997. MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
  4998. } else {
  4999. MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
  5000. MIB.buildInstr(AArch64::XPACLRI);
  5001. MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
  5002. }
  5003. I.eraseFromParent();
  5004. return true;
  5005. }
  5006. MFI.setFrameAddressIsTaken(true);
  5007. Register FrameAddr(AArch64::FP);
  5008. while (Depth--) {
  5009. Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
  5010. auto Ldr =
  5011. MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
  5012. constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
  5013. FrameAddr = NextFrame;
  5014. }
  5015. if (IntrinID == Intrinsic::frameaddress)
  5016. MIB.buildCopy({DstReg}, {FrameAddr});
  5017. else {
  5018. MFI.setReturnAddressIsTaken(true);
  5019. if (STI.hasPAuth()) {
  5020. Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
  5021. MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
  5022. MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
  5023. } else {
  5024. MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
  5025. .addImm(1);
  5026. MIB.buildInstr(AArch64::XPACLRI);
  5027. MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
  5028. }
  5029. }
  5030. I.eraseFromParent();
  5031. return true;
  5032. }
  5033. case Intrinsic::swift_async_context_addr:
  5034. auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
  5035. {Register(AArch64::FP)})
  5036. .addImm(8)
  5037. .addImm(0);
  5038. constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
  5039. MF->getFrameInfo().setFrameAddressIsTaken(true);
  5040. MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
  5041. I.eraseFromParent();
  5042. return true;
  5043. }
  5044. return false;
  5045. }
  5046. InstructionSelector::ComplexRendererFns
  5047. AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
  5048. auto MaybeImmed = getImmedFromMO(Root);
  5049. if (MaybeImmed == None || *MaybeImmed > 31)
  5050. return None;
  5051. uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
  5052. return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
  5053. }
  5054. InstructionSelector::ComplexRendererFns
  5055. AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
  5056. auto MaybeImmed = getImmedFromMO(Root);
  5057. if (MaybeImmed == None || *MaybeImmed > 31)
  5058. return None;
  5059. uint64_t Enc = 31 - *MaybeImmed;
  5060. return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
  5061. }
  5062. InstructionSelector::ComplexRendererFns
  5063. AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
  5064. auto MaybeImmed = getImmedFromMO(Root);
  5065. if (MaybeImmed == None || *MaybeImmed > 63)
  5066. return None;
  5067. uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
  5068. return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
  5069. }
  5070. InstructionSelector::ComplexRendererFns
  5071. AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
  5072. auto MaybeImmed = getImmedFromMO(Root);
  5073. if (MaybeImmed == None || *MaybeImmed > 63)
  5074. return None;
  5075. uint64_t Enc = 63 - *MaybeImmed;
  5076. return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
  5077. }
  5078. /// Helper to select an immediate value that can be represented as a 12-bit
  5079. /// value shifted left by either 0 or 12. If it is possible to do so, return
  5080. /// the immediate and shift value. If not, return None.
  5081. ///
  5082. /// Used by selectArithImmed and selectNegArithImmed.
  5083. InstructionSelector::ComplexRendererFns
  5084. AArch64InstructionSelector::select12BitValueWithLeftShift(
  5085. uint64_t Immed) const {
  5086. unsigned ShiftAmt;
  5087. if (Immed >> 12 == 0) {
  5088. ShiftAmt = 0;
  5089. } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
  5090. ShiftAmt = 12;
  5091. Immed = Immed >> 12;
  5092. } else
  5093. return None;
  5094. unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
  5095. return {{
  5096. [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
  5097. [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
  5098. }};
  5099. }
  5100. /// SelectArithImmed - Select an immediate value that can be represented as
  5101. /// a 12-bit value shifted left by either 0 or 12. If so, return true with
  5102. /// Val set to the 12-bit value and Shift set to the shifter operand.
  5103. InstructionSelector::ComplexRendererFns
  5104. AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
  5105. // This function is called from the addsub_shifted_imm ComplexPattern,
  5106. // which lists [imm] as the list of opcode it's interested in, however
  5107. // we still need to check whether the operand is actually an immediate
  5108. // here because the ComplexPattern opcode list is only used in
  5109. // root-level opcode matching.
  5110. auto MaybeImmed = getImmedFromMO(Root);
  5111. if (MaybeImmed == None)
  5112. return None;
  5113. return select12BitValueWithLeftShift(*MaybeImmed);
  5114. }
  5115. /// SelectNegArithImmed - As above, but negates the value before trying to
  5116. /// select it.
  5117. InstructionSelector::ComplexRendererFns
  5118. AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
  5119. // We need a register here, because we need to know if we have a 64 or 32
  5120. // bit immediate.
  5121. if (!Root.isReg())
  5122. return None;
  5123. auto MaybeImmed = getImmedFromMO(Root);
  5124. if (MaybeImmed == None)
  5125. return None;
  5126. uint64_t Immed = *MaybeImmed;
  5127. // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
  5128. // have the opposite effect on the C flag, so this pattern mustn't match under
  5129. // those circumstances.
  5130. if (Immed == 0)
  5131. return None;
  5132. // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
  5133. // the root.
  5134. MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
  5135. if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
  5136. Immed = ~((uint32_t)Immed) + 1;
  5137. else
  5138. Immed = ~Immed + 1ULL;
  5139. if (Immed & 0xFFFFFFFFFF000000ULL)
  5140. return None;
  5141. Immed &= 0xFFFFFFULL;
  5142. return select12BitValueWithLeftShift(Immed);
  5143. }
  5144. /// Return true if it is worth folding MI into an extended register. That is,
  5145. /// if it's safe to pull it into the addressing mode of a load or store as a
  5146. /// shift.
  5147. bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
  5148. MachineInstr &MI, const MachineRegisterInfo &MRI) const {
  5149. // Always fold if there is one use, or if we're optimizing for size.
  5150. Register DefReg = MI.getOperand(0).getReg();
  5151. if (MRI.hasOneNonDBGUse(DefReg) ||
  5152. MI.getParent()->getParent()->getFunction().hasOptSize())
  5153. return true;
  5154. // It's better to avoid folding and recomputing shifts when we don't have a
  5155. // fastpath.
  5156. if (!STI.hasLSLFast())
  5157. return false;
  5158. // We have a fastpath, so folding a shift in and potentially computing it
  5159. // many times may be beneficial. Check if this is only used in memory ops.
  5160. // If it is, then we should fold.
  5161. return all_of(MRI.use_nodbg_instructions(DefReg),
  5162. [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
  5163. }
  5164. static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
  5165. switch (Type) {
  5166. case AArch64_AM::SXTB:
  5167. case AArch64_AM::SXTH:
  5168. case AArch64_AM::SXTW:
  5169. return true;
  5170. default:
  5171. return false;
  5172. }
  5173. }
  5174. InstructionSelector::ComplexRendererFns
  5175. AArch64InstructionSelector::selectExtendedSHL(
  5176. MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
  5177. unsigned SizeInBytes, bool WantsExt) const {
  5178. assert(Base.isReg() && "Expected base to be a register operand");
  5179. assert(Offset.isReg() && "Expected offset to be a register operand");
  5180. MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
  5181. MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
  5182. if (!OffsetInst)
  5183. return None;
  5184. unsigned OffsetOpc = OffsetInst->getOpcode();
  5185. bool LookedThroughZExt = false;
  5186. if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
  5187. // Try to look through a ZEXT.
  5188. if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
  5189. return None;
  5190. OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
  5191. OffsetOpc = OffsetInst->getOpcode();
  5192. LookedThroughZExt = true;
  5193. if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
  5194. return None;
  5195. }
  5196. // Make sure that the memory op is a valid size.
  5197. int64_t LegalShiftVal = Log2_32(SizeInBytes);
  5198. if (LegalShiftVal == 0)
  5199. return None;
  5200. if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
  5201. return None;
  5202. // Now, try to find the specific G_CONSTANT. Start by assuming that the
  5203. // register we will offset is the LHS, and the register containing the
  5204. // constant is the RHS.
  5205. Register OffsetReg = OffsetInst->getOperand(1).getReg();
  5206. Register ConstantReg = OffsetInst->getOperand(2).getReg();
  5207. auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
  5208. if (!ValAndVReg) {
  5209. // We didn't get a constant on the RHS. If the opcode is a shift, then
  5210. // we're done.
  5211. if (OffsetOpc == TargetOpcode::G_SHL)
  5212. return None;
  5213. // If we have a G_MUL, we can use either register. Try looking at the RHS.
  5214. std::swap(OffsetReg, ConstantReg);
  5215. ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
  5216. if (!ValAndVReg)
  5217. return None;
  5218. }
  5219. // The value must fit into 3 bits, and must be positive. Make sure that is
  5220. // true.
  5221. int64_t ImmVal = ValAndVReg->Value.getSExtValue();
  5222. // Since we're going to pull this into a shift, the constant value must be
  5223. // a power of 2. If we got a multiply, then we need to check this.
  5224. if (OffsetOpc == TargetOpcode::G_MUL) {
  5225. if (!isPowerOf2_32(ImmVal))
  5226. return None;
  5227. // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
  5228. ImmVal = Log2_32(ImmVal);
  5229. }
  5230. if ((ImmVal & 0x7) != ImmVal)
  5231. return None;
  5232. // We are only allowed to shift by LegalShiftVal. This shift value is built
  5233. // into the instruction, so we can't just use whatever we want.
  5234. if (ImmVal != LegalShiftVal)
  5235. return None;
  5236. unsigned SignExtend = 0;
  5237. if (WantsExt) {
  5238. // Check if the offset is defined by an extend, unless we looked through a
  5239. // G_ZEXT earlier.
  5240. if (!LookedThroughZExt) {
  5241. MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
  5242. auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
  5243. if (Ext == AArch64_AM::InvalidShiftExtend)
  5244. return None;
  5245. SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
  5246. // We only support SXTW for signed extension here.
  5247. if (SignExtend && Ext != AArch64_AM::SXTW)
  5248. return None;
  5249. OffsetReg = ExtInst->getOperand(1).getReg();
  5250. }
  5251. // Need a 32-bit wide register here.
  5252. MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
  5253. OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
  5254. }
  5255. // We can use the LHS of the GEP as the base, and the LHS of the shift as an
  5256. // offset. Signify that we are shifting by setting the shift flag to 1.
  5257. return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
  5258. [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
  5259. [=](MachineInstrBuilder &MIB) {
  5260. // Need to add both immediates here to make sure that they are both
  5261. // added to the instruction.
  5262. MIB.addImm(SignExtend);
  5263. MIB.addImm(1);
  5264. }}};
  5265. }
  5266. /// This is used for computing addresses like this:
  5267. ///
  5268. /// ldr x1, [x2, x3, lsl #3]
  5269. ///
  5270. /// Where x2 is the base register, and x3 is an offset register. The shift-left
  5271. /// is a constant value specific to this load instruction. That is, we'll never
  5272. /// see anything other than a 3 here (which corresponds to the size of the
  5273. /// element being loaded.)
  5274. InstructionSelector::ComplexRendererFns
  5275. AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
  5276. MachineOperand &Root, unsigned SizeInBytes) const {
  5277. if (!Root.isReg())
  5278. return None;
  5279. MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
  5280. // We want to find something like this:
  5281. //
  5282. // val = G_CONSTANT LegalShiftVal
  5283. // shift = G_SHL off_reg val
  5284. // ptr = G_PTR_ADD base_reg shift
  5285. // x = G_LOAD ptr
  5286. //
  5287. // And fold it into this addressing mode:
  5288. //
  5289. // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
  5290. // Check if we can find the G_PTR_ADD.
  5291. MachineInstr *PtrAdd =
  5292. getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
  5293. if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
  5294. return None;
  5295. // Now, try to match an opcode which will match our specific offset.
  5296. // We want a G_SHL or a G_MUL.
  5297. MachineInstr *OffsetInst =
  5298. getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
  5299. return selectExtendedSHL(Root, PtrAdd->getOperand(1),
  5300. OffsetInst->getOperand(0), SizeInBytes,
  5301. /*WantsExt=*/false);
  5302. }
  5303. /// This is used for computing addresses like this:
  5304. ///
  5305. /// ldr x1, [x2, x3]
  5306. ///
  5307. /// Where x2 is the base register, and x3 is an offset register.
  5308. ///
  5309. /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
  5310. /// this will do so. Otherwise, it will return None.
  5311. InstructionSelector::ComplexRendererFns
  5312. AArch64InstructionSelector::selectAddrModeRegisterOffset(
  5313. MachineOperand &Root) const {
  5314. MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
  5315. // We need a GEP.
  5316. MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
  5317. if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
  5318. return None;
  5319. // If this is used more than once, let's not bother folding.
  5320. // TODO: Check if they are memory ops. If they are, then we can still fold
  5321. // without having to recompute anything.
  5322. if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
  5323. return None;
  5324. // Base is the GEP's LHS, offset is its RHS.
  5325. return {{[=](MachineInstrBuilder &MIB) {
  5326. MIB.addUse(Gep->getOperand(1).getReg());
  5327. },
  5328. [=](MachineInstrBuilder &MIB) {
  5329. MIB.addUse(Gep->getOperand(2).getReg());
  5330. },
  5331. [=](MachineInstrBuilder &MIB) {
  5332. // Need to add both immediates here to make sure that they are both
  5333. // added to the instruction.
  5334. MIB.addImm(0);
  5335. MIB.addImm(0);
  5336. }}};
  5337. }
  5338. /// This is intended to be equivalent to selectAddrModeXRO in
  5339. /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
  5340. InstructionSelector::ComplexRendererFns
  5341. AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
  5342. unsigned SizeInBytes) const {
  5343. MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
  5344. if (!Root.isReg())
  5345. return None;
  5346. MachineInstr *PtrAdd =
  5347. getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
  5348. if (!PtrAdd)
  5349. return None;
  5350. // Check for an immediates which cannot be encoded in the [base + imm]
  5351. // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
  5352. // end up with code like:
  5353. //
  5354. // mov x0, wide
  5355. // add x1 base, x0
  5356. // ldr x2, [x1, x0]
  5357. //
  5358. // In this situation, we can use the [base, xreg] addressing mode to save an
  5359. // add/sub:
  5360. //
  5361. // mov x0, wide
  5362. // ldr x2, [base, x0]
  5363. auto ValAndVReg =
  5364. getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
  5365. if (ValAndVReg) {
  5366. unsigned Scale = Log2_32(SizeInBytes);
  5367. int64_t ImmOff = ValAndVReg->Value.getSExtValue();
  5368. // Skip immediates that can be selected in the load/store addresing
  5369. // mode.
  5370. if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
  5371. ImmOff < (0x1000 << Scale))
  5372. return None;
  5373. // Helper lambda to decide whether or not it is preferable to emit an add.
  5374. auto isPreferredADD = [](int64_t ImmOff) {
  5375. // Constants in [0x0, 0xfff] can be encoded in an add.
  5376. if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
  5377. return true;
  5378. // Can it be encoded in an add lsl #12?
  5379. if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
  5380. return false;
  5381. // It can be encoded in an add lsl #12, but we may not want to. If it is
  5382. // possible to select this as a single movz, then prefer that. A single
  5383. // movz is faster than an add with a shift.
  5384. return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
  5385. (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
  5386. };
  5387. // If the immediate can be encoded in a single add/sub, then bail out.
  5388. if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
  5389. return None;
  5390. }
  5391. // Try to fold shifts into the addressing mode.
  5392. auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
  5393. if (AddrModeFns)
  5394. return AddrModeFns;
  5395. // If that doesn't work, see if it's possible to fold in registers from
  5396. // a GEP.
  5397. return selectAddrModeRegisterOffset(Root);
  5398. }
  5399. /// This is used for computing addresses like this:
  5400. ///
  5401. /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
  5402. ///
  5403. /// Where we have a 64-bit base register, a 32-bit offset register, and an
  5404. /// extend (which may or may not be signed).
  5405. InstructionSelector::ComplexRendererFns
  5406. AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
  5407. unsigned SizeInBytes) const {
  5408. MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
  5409. MachineInstr *PtrAdd =
  5410. getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
  5411. if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
  5412. return None;
  5413. MachineOperand &LHS = PtrAdd->getOperand(1);
  5414. MachineOperand &RHS = PtrAdd->getOperand(2);
  5415. MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
  5416. // The first case is the same as selectAddrModeXRO, except we need an extend.
  5417. // In this case, we try to find a shift and extend, and fold them into the
  5418. // addressing mode.
  5419. //
  5420. // E.g.
  5421. //
  5422. // off_reg = G_Z/S/ANYEXT ext_reg
  5423. // val = G_CONSTANT LegalShiftVal
  5424. // shift = G_SHL off_reg val
  5425. // ptr = G_PTR_ADD base_reg shift
  5426. // x = G_LOAD ptr
  5427. //
  5428. // In this case we can get a load like this:
  5429. //
  5430. // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
  5431. auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
  5432. SizeInBytes, /*WantsExt=*/true);
  5433. if (ExtendedShl)
  5434. return ExtendedShl;
  5435. // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
  5436. //
  5437. // e.g.
  5438. // ldr something, [base_reg, ext_reg, sxtw]
  5439. if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
  5440. return None;
  5441. // Check if this is an extend. We'll get an extend type if it is.
  5442. AArch64_AM::ShiftExtendType Ext =
  5443. getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
  5444. if (Ext == AArch64_AM::InvalidShiftExtend)
  5445. return None;
  5446. // Need a 32-bit wide register.
  5447. MachineIRBuilder MIB(*PtrAdd);
  5448. Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
  5449. AArch64::GPR32RegClass, MIB);
  5450. unsigned SignExtend = Ext == AArch64_AM::SXTW;
  5451. // Base is LHS, offset is ExtReg.
  5452. return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
  5453. [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
  5454. [=](MachineInstrBuilder &MIB) {
  5455. MIB.addImm(SignExtend);
  5456. MIB.addImm(0);
  5457. }}};
  5458. }
  5459. /// Select a "register plus unscaled signed 9-bit immediate" address. This
  5460. /// should only match when there is an offset that is not valid for a scaled
  5461. /// immediate addressing mode. The "Size" argument is the size in bytes of the
  5462. /// memory reference, which is needed here to know what is valid for a scaled
  5463. /// immediate.
  5464. InstructionSelector::ComplexRendererFns
  5465. AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
  5466. unsigned Size) const {
  5467. MachineRegisterInfo &MRI =
  5468. Root.getParent()->getParent()->getParent()->getRegInfo();
  5469. if (!Root.isReg())
  5470. return None;
  5471. if (!isBaseWithConstantOffset(Root, MRI))
  5472. return None;
  5473. MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
  5474. if (!RootDef)
  5475. return None;
  5476. MachineOperand &OffImm = RootDef->getOperand(2);
  5477. if (!OffImm.isReg())
  5478. return None;
  5479. MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
  5480. if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
  5481. return None;
  5482. int64_t RHSC;
  5483. MachineOperand &RHSOp1 = RHS->getOperand(1);
  5484. if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
  5485. return None;
  5486. RHSC = RHSOp1.getCImm()->getSExtValue();
  5487. // If the offset is valid as a scaled immediate, don't match here.
  5488. if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
  5489. return None;
  5490. if (RHSC >= -256 && RHSC < 256) {
  5491. MachineOperand &Base = RootDef->getOperand(1);
  5492. return {{
  5493. [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
  5494. [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
  5495. }};
  5496. }
  5497. return None;
  5498. }
  5499. InstructionSelector::ComplexRendererFns
  5500. AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
  5501. unsigned Size,
  5502. MachineRegisterInfo &MRI) const {
  5503. if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
  5504. return None;
  5505. MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
  5506. if (Adrp.getOpcode() != AArch64::ADRP)
  5507. return None;
  5508. // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
  5509. auto Offset = Adrp.getOperand(1).getOffset();
  5510. if (Offset % Size != 0)
  5511. return None;
  5512. auto GV = Adrp.getOperand(1).getGlobal();
  5513. if (GV->isThreadLocal())
  5514. return None;
  5515. auto &MF = *RootDef.getParent()->getParent();
  5516. if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
  5517. return None;
  5518. unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
  5519. MachineIRBuilder MIRBuilder(RootDef);
  5520. Register AdrpReg = Adrp.getOperand(0).getReg();
  5521. return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
  5522. [=](MachineInstrBuilder &MIB) {
  5523. MIB.addGlobalAddress(GV, Offset,
  5524. OpFlags | AArch64II::MO_PAGEOFF |
  5525. AArch64II::MO_NC);
  5526. }}};
  5527. }
  5528. /// Select a "register plus scaled unsigned 12-bit immediate" address. The
  5529. /// "Size" argument is the size in bytes of the memory reference, which
  5530. /// determines the scale.
  5531. InstructionSelector::ComplexRendererFns
  5532. AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
  5533. unsigned Size) const {
  5534. MachineFunction &MF = *Root.getParent()->getParent()->getParent();
  5535. MachineRegisterInfo &MRI = MF.getRegInfo();
  5536. if (!Root.isReg())
  5537. return None;
  5538. MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
  5539. if (!RootDef)
  5540. return None;
  5541. if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
  5542. return {{
  5543. [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
  5544. [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
  5545. }};
  5546. }
  5547. CodeModel::Model CM = MF.getTarget().getCodeModel();
  5548. // Check if we can fold in the ADD of small code model ADRP + ADD address.
  5549. if (CM == CodeModel::Small) {
  5550. auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
  5551. if (OpFns)
  5552. return OpFns;
  5553. }
  5554. if (isBaseWithConstantOffset(Root, MRI)) {
  5555. MachineOperand &LHS = RootDef->getOperand(1);
  5556. MachineOperand &RHS = RootDef->getOperand(2);
  5557. MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
  5558. MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
  5559. if (LHSDef && RHSDef) {
  5560. int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
  5561. unsigned Scale = Log2_32(Size);
  5562. if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
  5563. if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
  5564. return {{
  5565. [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
  5566. [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
  5567. }};
  5568. return {{
  5569. [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
  5570. [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
  5571. }};
  5572. }
  5573. }
  5574. }
  5575. // Before falling back to our general case, check if the unscaled
  5576. // instructions can handle this. If so, that's preferable.
  5577. if (selectAddrModeUnscaled(Root, Size).hasValue())
  5578. return None;
  5579. return {{
  5580. [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
  5581. [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
  5582. }};
  5583. }
  5584. /// Given a shift instruction, return the correct shift type for that
  5585. /// instruction.
  5586. static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
  5587. switch (MI.getOpcode()) {
  5588. default:
  5589. return AArch64_AM::InvalidShiftExtend;
  5590. case TargetOpcode::G_SHL:
  5591. return AArch64_AM::LSL;
  5592. case TargetOpcode::G_LSHR:
  5593. return AArch64_AM::LSR;
  5594. case TargetOpcode::G_ASHR:
  5595. return AArch64_AM::ASR;
  5596. case TargetOpcode::G_ROTR:
  5597. return AArch64_AM::ROR;
  5598. }
  5599. }
  5600. /// Select a "shifted register" operand. If the value is not shifted, set the
  5601. /// shift operand to a default value of "lsl 0".
  5602. InstructionSelector::ComplexRendererFns
  5603. AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
  5604. bool AllowROR) const {
  5605. if (!Root.isReg())
  5606. return None;
  5607. MachineRegisterInfo &MRI =
  5608. Root.getParent()->getParent()->getParent()->getRegInfo();
  5609. // Check if the operand is defined by an instruction which corresponds to
  5610. // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
  5611. MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
  5612. if (!ShiftInst)
  5613. return None;
  5614. AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
  5615. if (ShType == AArch64_AM::InvalidShiftExtend)
  5616. return None;
  5617. if (ShType == AArch64_AM::ROR && !AllowROR)
  5618. return None;
  5619. if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
  5620. return None;
  5621. // Need an immediate on the RHS.
  5622. MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
  5623. auto Immed = getImmedFromMO(ShiftRHS);
  5624. if (!Immed)
  5625. return None;
  5626. // We have something that we can fold. Fold in the shift's LHS and RHS into
  5627. // the instruction.
  5628. MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
  5629. Register ShiftReg = ShiftLHS.getReg();
  5630. unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
  5631. unsigned Val = *Immed & (NumBits - 1);
  5632. unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
  5633. return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
  5634. [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
  5635. }
  5636. AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
  5637. MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
  5638. unsigned Opc = MI.getOpcode();
  5639. // Handle explicit extend instructions first.
  5640. if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
  5641. unsigned Size;
  5642. if (Opc == TargetOpcode::G_SEXT)
  5643. Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  5644. else
  5645. Size = MI.getOperand(2).getImm();
  5646. assert(Size != 64 && "Extend from 64 bits?");
  5647. switch (Size) {
  5648. case 8:
  5649. return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
  5650. case 16:
  5651. return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
  5652. case 32:
  5653. return AArch64_AM::SXTW;
  5654. default:
  5655. return AArch64_AM::InvalidShiftExtend;
  5656. }
  5657. }
  5658. if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
  5659. unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
  5660. assert(Size != 64 && "Extend from 64 bits?");
  5661. switch (Size) {
  5662. case 8:
  5663. return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
  5664. case 16:
  5665. return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
  5666. case 32:
  5667. return AArch64_AM::UXTW;
  5668. default:
  5669. return AArch64_AM::InvalidShiftExtend;
  5670. }
  5671. }
  5672. // Don't have an explicit extend. Try to handle a G_AND with a constant mask
  5673. // on the RHS.
  5674. if (Opc != TargetOpcode::G_AND)
  5675. return AArch64_AM::InvalidShiftExtend;
  5676. Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
  5677. if (!MaybeAndMask)
  5678. return AArch64_AM::InvalidShiftExtend;
  5679. uint64_t AndMask = *MaybeAndMask;
  5680. switch (AndMask) {
  5681. default:
  5682. return AArch64_AM::InvalidShiftExtend;
  5683. case 0xFF:
  5684. return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
  5685. case 0xFFFF:
  5686. return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
  5687. case 0xFFFFFFFF:
  5688. return AArch64_AM::UXTW;
  5689. }
  5690. }
  5691. Register AArch64InstructionSelector::moveScalarRegClass(
  5692. Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
  5693. MachineRegisterInfo &MRI = *MIB.getMRI();
  5694. auto Ty = MRI.getType(Reg);
  5695. assert(!Ty.isVector() && "Expected scalars only!");
  5696. if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
  5697. return Reg;
  5698. // Create a copy and immediately select it.
  5699. // FIXME: We should have an emitCopy function?
  5700. auto Copy = MIB.buildCopy({&RC}, {Reg});
  5701. selectCopy(*Copy, TII, MRI, TRI, RBI);
  5702. return Copy.getReg(0);
  5703. }
  5704. /// Select an "extended register" operand. This operand folds in an extend
  5705. /// followed by an optional left shift.
  5706. InstructionSelector::ComplexRendererFns
  5707. AArch64InstructionSelector::selectArithExtendedRegister(
  5708. MachineOperand &Root) const {
  5709. if (!Root.isReg())
  5710. return None;
  5711. MachineRegisterInfo &MRI =
  5712. Root.getParent()->getParent()->getParent()->getRegInfo();
  5713. uint64_t ShiftVal = 0;
  5714. Register ExtReg;
  5715. AArch64_AM::ShiftExtendType Ext;
  5716. MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
  5717. if (!RootDef)
  5718. return None;
  5719. if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
  5720. return None;
  5721. // Check if we can fold a shift and an extend.
  5722. if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
  5723. // Look for a constant on the RHS of the shift.
  5724. MachineOperand &RHS = RootDef->getOperand(2);
  5725. Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
  5726. if (!MaybeShiftVal)
  5727. return None;
  5728. ShiftVal = *MaybeShiftVal;
  5729. if (ShiftVal > 4)
  5730. return None;
  5731. // Look for a valid extend instruction on the LHS of the shift.
  5732. MachineOperand &LHS = RootDef->getOperand(1);
  5733. MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
  5734. if (!ExtDef)
  5735. return None;
  5736. Ext = getExtendTypeForInst(*ExtDef, MRI);
  5737. if (Ext == AArch64_AM::InvalidShiftExtend)
  5738. return None;
  5739. ExtReg = ExtDef->getOperand(1).getReg();
  5740. } else {
  5741. // Didn't get a shift. Try just folding an extend.
  5742. Ext = getExtendTypeForInst(*RootDef, MRI);
  5743. if (Ext == AArch64_AM::InvalidShiftExtend)
  5744. return None;
  5745. ExtReg = RootDef->getOperand(1).getReg();
  5746. // If we have a 32 bit instruction which zeroes out the high half of a
  5747. // register, we get an implicit zero extend for free. Check if we have one.
  5748. // FIXME: We actually emit the extend right now even though we don't have
  5749. // to.
  5750. if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
  5751. MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
  5752. if (ExtInst && isDef32(*ExtInst))
  5753. return None;
  5754. }
  5755. }
  5756. // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
  5757. // copy.
  5758. MachineIRBuilder MIB(*RootDef);
  5759. ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
  5760. return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
  5761. [=](MachineInstrBuilder &MIB) {
  5762. MIB.addImm(getArithExtendImm(Ext, ShiftVal));
  5763. }}};
  5764. }
  5765. void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
  5766. const MachineInstr &MI,
  5767. int OpIdx) const {
  5768. const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
  5769. assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
  5770. "Expected G_CONSTANT");
  5771. Optional<int64_t> CstVal =
  5772. getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
  5773. assert(CstVal && "Expected constant value");
  5774. MIB.addImm(CstVal.getValue());
  5775. }
  5776. void AArch64InstructionSelector::renderLogicalImm32(
  5777. MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
  5778. assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
  5779. "Expected G_CONSTANT");
  5780. uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
  5781. uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
  5782. MIB.addImm(Enc);
  5783. }
  5784. void AArch64InstructionSelector::renderLogicalImm64(
  5785. MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
  5786. assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
  5787. "Expected G_CONSTANT");
  5788. uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
  5789. uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
  5790. MIB.addImm(Enc);
  5791. }
  5792. void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
  5793. const MachineInstr &MI,
  5794. int OpIdx) const {
  5795. assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
  5796. "Expected G_FCONSTANT");
  5797. MIB.addImm(
  5798. AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
  5799. }
  5800. void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
  5801. const MachineInstr &MI,
  5802. int OpIdx) const {
  5803. assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
  5804. "Expected G_FCONSTANT");
  5805. MIB.addImm(
  5806. AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
  5807. }
  5808. void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
  5809. const MachineInstr &MI,
  5810. int OpIdx) const {
  5811. assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
  5812. "Expected G_FCONSTANT");
  5813. MIB.addImm(
  5814. AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
  5815. }
  5816. bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
  5817. const MachineInstr &MI, unsigned NumBytes) const {
  5818. if (!MI.mayLoadOrStore())
  5819. return false;
  5820. assert(MI.hasOneMemOperand() &&
  5821. "Expected load/store to have only one mem op!");
  5822. return (*MI.memoperands_begin())->getSize() == NumBytes;
  5823. }
  5824. bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
  5825. const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
  5826. if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
  5827. return false;
  5828. // Only return true if we know the operation will zero-out the high half of
  5829. // the 64-bit register. Truncates can be subregister copies, which don't
  5830. // zero out the high bits. Copies and other copy-like instructions can be
  5831. // fed by truncates, or could be lowered as subregister copies.
  5832. switch (MI.getOpcode()) {
  5833. default:
  5834. return true;
  5835. case TargetOpcode::COPY:
  5836. case TargetOpcode::G_BITCAST:
  5837. case TargetOpcode::G_TRUNC:
  5838. case TargetOpcode::G_PHI:
  5839. return false;
  5840. }
  5841. }
  5842. // Perform fixups on the given PHI instruction's operands to force them all
  5843. // to be the same as the destination regbank.
  5844. static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
  5845. const AArch64RegisterBankInfo &RBI) {
  5846. assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
  5847. Register DstReg = MI.getOperand(0).getReg();
  5848. const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
  5849. assert(DstRB && "Expected PHI dst to have regbank assigned");
  5850. MachineIRBuilder MIB(MI);
  5851. // Go through each operand and ensure it has the same regbank.
  5852. for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
  5853. if (!MO.isReg())
  5854. continue;
  5855. Register OpReg = MO.getReg();
  5856. const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
  5857. if (RB != DstRB) {
  5858. // Insert a cross-bank copy.
  5859. auto *OpDef = MRI.getVRegDef(OpReg);
  5860. const LLT &Ty = MRI.getType(OpReg);
  5861. MachineBasicBlock &OpDefBB = *OpDef->getParent();
  5862. // Any instruction we insert must appear after all PHIs in the block
  5863. // for the block to be valid MIR.
  5864. MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
  5865. if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
  5866. InsertPt = OpDefBB.getFirstNonPHI();
  5867. MIB.setInsertPt(*OpDef->getParent(), InsertPt);
  5868. auto Copy = MIB.buildCopy(Ty, OpReg);
  5869. MRI.setRegBank(Copy.getReg(0), *DstRB);
  5870. MO.setReg(Copy.getReg(0));
  5871. }
  5872. }
  5873. }
  5874. void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
  5875. // We're looking for PHIs, build a list so we don't invalidate iterators.
  5876. MachineRegisterInfo &MRI = MF.getRegInfo();
  5877. SmallVector<MachineInstr *, 32> Phis;
  5878. for (auto &BB : MF) {
  5879. for (auto &MI : BB) {
  5880. if (MI.getOpcode() == TargetOpcode::G_PHI)
  5881. Phis.emplace_back(&MI);
  5882. }
  5883. }
  5884. for (auto *MI : Phis) {
  5885. // We need to do some work here if the operand types are < 16 bit and they
  5886. // are split across fpr/gpr banks. Since all types <32b on gpr
  5887. // end up being assigned gpr32 regclasses, we can end up with PHIs here
  5888. // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
  5889. // be selecting heterogenous regbanks for operands if possible, but we
  5890. // still need to be able to deal with it here.
  5891. //
  5892. // To fix this, if we have a gpr-bank operand < 32b in size and at least
  5893. // one other operand is on the fpr bank, then we add cross-bank copies
  5894. // to homogenize the operand banks. For simplicity the bank that we choose
  5895. // to settle on is whatever bank the def operand has. For example:
  5896. //
  5897. // %endbb:
  5898. // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
  5899. // =>
  5900. // %bb2:
  5901. // ...
  5902. // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
  5903. // ...
  5904. // %endbb:
  5905. // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
  5906. bool HasGPROp = false, HasFPROp = false;
  5907. for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
  5908. if (!MO.isReg())
  5909. continue;
  5910. const LLT &Ty = MRI.getType(MO.getReg());
  5911. if (!Ty.isValid() || !Ty.isScalar())
  5912. break;
  5913. if (Ty.getSizeInBits() >= 32)
  5914. break;
  5915. const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
  5916. // If for some reason we don't have a regbank yet. Don't try anything.
  5917. if (!RB)
  5918. break;
  5919. if (RB->getID() == AArch64::GPRRegBankID)
  5920. HasGPROp = true;
  5921. else
  5922. HasFPROp = true;
  5923. }
  5924. // We have heterogenous regbanks, need to fixup.
  5925. if (HasGPROp && HasFPROp)
  5926. fixupPHIOpBanks(*MI, MRI, RBI);
  5927. }
  5928. }
  5929. namespace llvm {
  5930. InstructionSelector *
  5931. createAArch64InstructionSelector(const AArch64TargetMachine &TM,
  5932. AArch64Subtarget &Subtarget,
  5933. AArch64RegisterBankInfo &RBI) {
  5934. return new AArch64InstructionSelector(TM, Subtarget, RBI);
  5935. }
  5936. }