PPCInstrInfo.cpp 209 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692
  1. //===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the PowerPC implementation of the TargetInstrInfo class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "PPCInstrInfo.h"
  13. #include "MCTargetDesc/PPCPredicates.h"
  14. #include "PPC.h"
  15. #include "PPCHazardRecognizers.h"
  16. #include "PPCInstrBuilder.h"
  17. #include "PPCMachineFunctionInfo.h"
  18. #include "PPCTargetMachine.h"
  19. #include "llvm/ADT/DenseSet.h"
  20. #include "llvm/ADT/STLExtras.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/Analysis/AliasAnalysis.h"
  23. #include "llvm/CodeGen/LiveIntervals.h"
  24. #include "llvm/CodeGen/MachineCombinerPattern.h"
  25. #include "llvm/CodeGen/MachineConstantPool.h"
  26. #include "llvm/CodeGen/MachineFrameInfo.h"
  27. #include "llvm/CodeGen/MachineFunctionPass.h"
  28. #include "llvm/CodeGen/MachineInstrBuilder.h"
  29. #include "llvm/CodeGen/MachineMemOperand.h"
  30. #include "llvm/CodeGen/MachineRegisterInfo.h"
  31. #include "llvm/CodeGen/PseudoSourceValue.h"
  32. #include "llvm/CodeGen/RegisterClassInfo.h"
  33. #include "llvm/CodeGen/RegisterPressure.h"
  34. #include "llvm/CodeGen/ScheduleDAG.h"
  35. #include "llvm/CodeGen/SlotIndexes.h"
  36. #include "llvm/CodeGen/StackMaps.h"
  37. #include "llvm/MC/MCAsmInfo.h"
  38. #include "llvm/MC/MCInst.h"
  39. #include "llvm/MC/TargetRegistry.h"
  40. #include "llvm/Support/CommandLine.h"
  41. #include "llvm/Support/Debug.h"
  42. #include "llvm/Support/ErrorHandling.h"
  43. #include "llvm/Support/raw_ostream.h"
  44. using namespace llvm;
  45. #define DEBUG_TYPE "ppc-instr-info"
  46. #define GET_INSTRMAP_INFO
  47. #define GET_INSTRINFO_CTOR_DTOR
  48. #include "PPCGenInstrInfo.inc"
  49. STATISTIC(NumStoreSPILLVSRRCAsVec,
  50. "Number of spillvsrrc spilled to stack as vec");
  51. STATISTIC(NumStoreSPILLVSRRCAsGpr,
  52. "Number of spillvsrrc spilled to stack as gpr");
  53. STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
  54. STATISTIC(CmpIselsConverted,
  55. "Number of ISELs that depend on comparison of constants converted");
  56. STATISTIC(MissedConvertibleImmediateInstrs,
  57. "Number of compare-immediate instructions fed by constants");
  58. STATISTIC(NumRcRotatesConvertedToRcAnd,
  59. "Number of record-form rotates converted to record-form andi");
  60. static cl::
  61. opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
  62. cl::desc("Disable analysis for CTR loops"));
  63. static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
  64. cl::desc("Disable compare instruction optimization"), cl::Hidden);
  65. static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
  66. cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
  67. cl::Hidden);
  68. static cl::opt<bool>
  69. UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
  70. cl::desc("Use the old (incorrect) instruction latency calculation"));
  71. static cl::opt<float>
  72. FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),
  73. cl::desc("register pressure factor for the transformations."));
  74. static cl::opt<bool> EnableFMARegPressureReduction(
  75. "ppc-fma-rp-reduction", cl::Hidden, cl::init(true),
  76. cl::desc("enable register pressure reduce in machine combiner pass."));
  77. // Pin the vtable to this file.
  78. void PPCInstrInfo::anchor() {}
  79. PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
  80. : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
  81. /* CatchRetOpcode */ -1,
  82. STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
  83. Subtarget(STI), RI(STI.getTargetMachine()) {}
  84. /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
  85. /// this target when scheduling the DAG.
  86. ScheduleHazardRecognizer *
  87. PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
  88. const ScheduleDAG *DAG) const {
  89. unsigned Directive =
  90. static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
  91. if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
  92. Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
  93. const InstrItineraryData *II =
  94. static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
  95. return new ScoreboardHazardRecognizer(II, DAG);
  96. }
  97. return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
  98. }
  99. /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
  100. /// to use for this target when scheduling the DAG.
  101. ScheduleHazardRecognizer *
  102. PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
  103. const ScheduleDAG *DAG) const {
  104. unsigned Directive =
  105. DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
  106. // FIXME: Leaving this as-is until we have POWER9 scheduling info
  107. if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
  108. return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
  109. // Most subtargets use a PPC970 recognizer.
  110. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
  111. Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
  112. assert(DAG->TII && "No InstrInfo?");
  113. return new PPCHazardRecognizer970(*DAG);
  114. }
  115. return new ScoreboardHazardRecognizer(II, DAG);
  116. }
  117. unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
  118. const MachineInstr &MI,
  119. unsigned *PredCost) const {
  120. if (!ItinData || UseOldLatencyCalc)
  121. return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
  122. // The default implementation of getInstrLatency calls getStageLatency, but
  123. // getStageLatency does not do the right thing for us. While we have
  124. // itinerary, most cores are fully pipelined, and so the itineraries only
  125. // express the first part of the pipeline, not every stage. Instead, we need
  126. // to use the listed output operand cycle number (using operand 0 here, which
  127. // is an output).
  128. unsigned Latency = 1;
  129. unsigned DefClass = MI.getDesc().getSchedClass();
  130. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
  131. const MachineOperand &MO = MI.getOperand(i);
  132. if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
  133. continue;
  134. int Cycle = ItinData->getOperandCycle(DefClass, i);
  135. if (Cycle < 0)
  136. continue;
  137. Latency = std::max(Latency, (unsigned) Cycle);
  138. }
  139. return Latency;
  140. }
  141. int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
  142. const MachineInstr &DefMI, unsigned DefIdx,
  143. const MachineInstr &UseMI,
  144. unsigned UseIdx) const {
  145. int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
  146. UseMI, UseIdx);
  147. if (!DefMI.getParent())
  148. return Latency;
  149. const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
  150. Register Reg = DefMO.getReg();
  151. bool IsRegCR;
  152. if (Reg.isVirtual()) {
  153. const MachineRegisterInfo *MRI =
  154. &DefMI.getParent()->getParent()->getRegInfo();
  155. IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
  156. MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
  157. } else {
  158. IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
  159. PPC::CRBITRCRegClass.contains(Reg);
  160. }
  161. if (UseMI.isBranch() && IsRegCR) {
  162. if (Latency < 0)
  163. Latency = getInstrLatency(ItinData, DefMI);
  164. // On some cores, there is an additional delay between writing to a condition
  165. // register, and using it from a branch.
  166. unsigned Directive = Subtarget.getCPUDirective();
  167. switch (Directive) {
  168. default: break;
  169. case PPC::DIR_7400:
  170. case PPC::DIR_750:
  171. case PPC::DIR_970:
  172. case PPC::DIR_E5500:
  173. case PPC::DIR_PWR4:
  174. case PPC::DIR_PWR5:
  175. case PPC::DIR_PWR5X:
  176. case PPC::DIR_PWR6:
  177. case PPC::DIR_PWR6X:
  178. case PPC::DIR_PWR7:
  179. case PPC::DIR_PWR8:
  180. // FIXME: Is this needed for POWER9?
  181. Latency += 2;
  182. break;
  183. }
  184. }
  185. return Latency;
  186. }
  187. /// This is an architecture-specific helper function of reassociateOps.
  188. /// Set special operand attributes for new instructions after reassociation.
  189. void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
  190. MachineInstr &OldMI2,
  191. MachineInstr &NewMI1,
  192. MachineInstr &NewMI2) const {
  193. // Propagate FP flags from the original instructions.
  194. // But clear poison-generating flags because those may not be valid now.
  195. uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
  196. NewMI1.setFlags(IntersectedFlags);
  197. NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
  198. NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
  199. NewMI1.clearFlag(MachineInstr::MIFlag::IsExact);
  200. NewMI2.setFlags(IntersectedFlags);
  201. NewMI2.clearFlag(MachineInstr::MIFlag::NoSWrap);
  202. NewMI2.clearFlag(MachineInstr::MIFlag::NoUWrap);
  203. NewMI2.clearFlag(MachineInstr::MIFlag::IsExact);
  204. }
  205. void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
  206. uint16_t Flags) const {
  207. MI.setFlags(Flags);
  208. MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
  209. MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
  210. MI.clearFlag(MachineInstr::MIFlag::IsExact);
  211. }
  212. // This function does not list all associative and commutative operations, but
  213. // only those worth feeding through the machine combiner in an attempt to
  214. // reduce the critical path. Mostly, this means floating-point operations,
  215. // because they have high latencies(>=5) (compared to other operations, such as
  216. // and/or, which are also associative and commutative, but have low latencies).
  217. bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
  218. bool Invert) const {
  219. if (Invert)
  220. return false;
  221. switch (Inst.getOpcode()) {
  222. // Floating point:
  223. // FP Add:
  224. case PPC::FADD:
  225. case PPC::FADDS:
  226. // FP Multiply:
  227. case PPC::FMUL:
  228. case PPC::FMULS:
  229. // Altivec Add:
  230. case PPC::VADDFP:
  231. // VSX Add:
  232. case PPC::XSADDDP:
  233. case PPC::XVADDDP:
  234. case PPC::XVADDSP:
  235. case PPC::XSADDSP:
  236. // VSX Multiply:
  237. case PPC::XSMULDP:
  238. case PPC::XVMULDP:
  239. case PPC::XVMULSP:
  240. case PPC::XSMULSP:
  241. return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
  242. Inst.getFlag(MachineInstr::MIFlag::FmNsz);
  243. // Fixed point:
  244. // Multiply:
  245. case PPC::MULHD:
  246. case PPC::MULLD:
  247. case PPC::MULHW:
  248. case PPC::MULLW:
  249. return true;
  250. default:
  251. return false;
  252. }
  253. }
  254. #define InfoArrayIdxFMAInst 0
  255. #define InfoArrayIdxFAddInst 1
  256. #define InfoArrayIdxFMULInst 2
  257. #define InfoArrayIdxAddOpIdx 3
  258. #define InfoArrayIdxMULOpIdx 4
  259. #define InfoArrayIdxFSubInst 5
  260. // Array keeps info for FMA instructions:
  261. // Index 0(InfoArrayIdxFMAInst): FMA instruction;
  262. // Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
  263. // Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
  264. // Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
  265. // Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
  266. // second MUL operand index is plus 1;
  267. // Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
  268. static const uint16_t FMAOpIdxInfo[][6] = {
  269. // FIXME: Add more FMA instructions like XSNMADDADP and so on.
  270. {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},
  271. {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},
  272. {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},
  273. {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},
  274. {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},
  275. {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};
  276. // Check if an opcode is a FMA instruction. If it is, return the index in array
  277. // FMAOpIdxInfo. Otherwise, return -1.
  278. int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
  279. for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)
  280. if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
  281. return I;
  282. return -1;
  283. }
  284. // On PowerPC target, we have two kinds of patterns related to FMA:
  285. // 1: Improve ILP.
  286. // Try to reassociate FMA chains like below:
  287. //
  288. // Pattern 1:
  289. // A = FADD X, Y (Leaf)
  290. // B = FMA A, M21, M22 (Prev)
  291. // C = FMA B, M31, M32 (Root)
  292. // -->
  293. // A = FMA X, M21, M22
  294. // B = FMA Y, M31, M32
  295. // C = FADD A, B
  296. //
  297. // Pattern 2:
  298. // A = FMA X, M11, M12 (Leaf)
  299. // B = FMA A, M21, M22 (Prev)
  300. // C = FMA B, M31, M32 (Root)
  301. // -->
  302. // A = FMUL M11, M12
  303. // B = FMA X, M21, M22
  304. // D = FMA A, M31, M32
  305. // C = FADD B, D
  306. //
  307. // breaking the dependency between A and B, allowing FMA to be executed in
  308. // parallel (or back-to-back in a pipeline) instead of depending on each other.
  309. //
  310. // 2: Reduce register pressure.
  311. // Try to reassociate FMA with FSUB and a constant like below:
  312. // C is a floating point const.
  313. //
  314. // Pattern 1:
  315. // A = FSUB X, Y (Leaf)
  316. // D = FMA B, C, A (Root)
  317. // -->
  318. // A = FMA B, Y, -C
  319. // D = FMA A, X, C
  320. //
  321. // Pattern 2:
  322. // A = FSUB X, Y (Leaf)
  323. // D = FMA B, A, C (Root)
  324. // -->
  325. // A = FMA B, Y, -C
  326. // D = FMA A, X, C
  327. //
  328. // Before the transformation, A must be assigned with different hardware
  329. // register with D. After the transformation, A and D must be assigned with
  330. // same hardware register due to TIE attribute of FMA instructions.
  331. //
  332. bool PPCInstrInfo::getFMAPatterns(
  333. MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
  334. bool DoRegPressureReduce) const {
  335. MachineBasicBlock *MBB = Root.getParent();
  336. const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
  337. const TargetRegisterInfo *TRI = &getRegisterInfo();
  338. auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
  339. for (const auto &MO : Instr.explicit_operands())
  340. if (!(MO.isReg() && MO.getReg().isVirtual()))
  341. return false;
  342. return true;
  343. };
  344. auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
  345. unsigned OpType) {
  346. if (Instr.getOpcode() !=
  347. FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])
  348. return false;
  349. // Instruction can be reassociated.
  350. // fast math flags may prohibit reassociation.
  351. if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
  352. Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
  353. return false;
  354. // Instruction operands are virtual registers for reassociation.
  355. if (!IsAllOpsVirtualReg(Instr))
  356. return false;
  357. // For register pressure reassociation, the FSub must have only one use as
  358. // we want to delete the sub to save its def.
  359. if (OpType == InfoArrayIdxFSubInst &&
  360. !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))
  361. return false;
  362. return true;
  363. };
  364. auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
  365. int16_t &MulOpIdx, bool IsLeaf) {
  366. int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());
  367. if (Idx < 0)
  368. return false;
  369. // Instruction can be reassociated.
  370. // fast math flags may prohibit reassociation.
  371. if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&
  372. Instr.getFlag(MachineInstr::MIFlag::FmNsz)))
  373. return false;
  374. // Instruction operands are virtual registers for reassociation.
  375. if (!IsAllOpsVirtualReg(Instr))
  376. return false;
  377. MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
  378. if (IsLeaf)
  379. return true;
  380. AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
  381. const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);
  382. MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());
  383. // If 'add' operand's def is not in current block, don't do ILP related opt.
  384. if (!MIAdd || MIAdd->getParent() != MBB)
  385. return false;
  386. // If this is not Leaf FMA Instr, its 'add' operand should only have one use
  387. // as this fma will be changed later.
  388. return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());
  389. };
  390. int16_t AddOpIdx = -1;
  391. int16_t MulOpIdx = -1;
  392. bool IsUsedOnceL = false;
  393. bool IsUsedOnceR = false;
  394. MachineInstr *MULInstrL = nullptr;
  395. MachineInstr *MULInstrR = nullptr;
  396. auto IsRPReductionCandidate = [&]() {
  397. // Currently, we only support float and double.
  398. // FIXME: add support for other types.
  399. unsigned Opcode = Root.getOpcode();
  400. if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
  401. return false;
  402. // Root must be a valid FMA like instruction.
  403. // Treat it as leaf as we don't care its add operand.
  404. if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {
  405. assert((MulOpIdx >= 0) && "mul operand index not right!");
  406. Register MULRegL = TRI->lookThruSingleUseCopyChain(
  407. Root.getOperand(MulOpIdx).getReg(), MRI);
  408. Register MULRegR = TRI->lookThruSingleUseCopyChain(
  409. Root.getOperand(MulOpIdx + 1).getReg(), MRI);
  410. if (!MULRegL && !MULRegR)
  411. return false;
  412. if (MULRegL && !MULRegR) {
  413. MULRegR =
  414. TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);
  415. IsUsedOnceL = true;
  416. } else if (!MULRegL && MULRegR) {
  417. MULRegL =
  418. TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);
  419. IsUsedOnceR = true;
  420. } else {
  421. IsUsedOnceL = true;
  422. IsUsedOnceR = true;
  423. }
  424. if (!MULRegL.isVirtual() || !MULRegR.isVirtual())
  425. return false;
  426. MULInstrL = MRI->getVRegDef(MULRegL);
  427. MULInstrR = MRI->getVRegDef(MULRegR);
  428. return true;
  429. }
  430. return false;
  431. };
  432. // Register pressure fma reassociation patterns.
  433. if (DoRegPressureReduce && IsRPReductionCandidate()) {
  434. assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
  435. // Register pressure pattern 1
  436. if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&
  437. IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {
  438. LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
  439. Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA);
  440. return true;
  441. }
  442. // Register pressure pattern 2
  443. if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&
  444. IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {
  445. LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
  446. Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC);
  447. return true;
  448. }
  449. }
  450. // ILP fma reassociation patterns.
  451. // Root must be a valid FMA like instruction.
  452. AddOpIdx = -1;
  453. if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))
  454. return false;
  455. assert((AddOpIdx >= 0) && "add operand index not right!");
  456. Register RegB = Root.getOperand(AddOpIdx).getReg();
  457. MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);
  458. // Prev must be a valid FMA like instruction.
  459. AddOpIdx = -1;
  460. if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))
  461. return false;
  462. assert((AddOpIdx >= 0) && "add operand index not right!");
  463. Register RegA = Prev->getOperand(AddOpIdx).getReg();
  464. MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);
  465. AddOpIdx = -1;
  466. if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {
  467. Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
  468. LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
  469. return true;
  470. }
  471. if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {
  472. Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM);
  473. LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
  474. return true;
  475. }
  476. return false;
  477. }
  478. void PPCInstrInfo::finalizeInsInstrs(
  479. MachineInstr &Root, MachineCombinerPattern &P,
  480. SmallVectorImpl<MachineInstr *> &InsInstrs) const {
  481. assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
  482. MachineFunction *MF = Root.getMF();
  483. MachineRegisterInfo *MRI = &MF->getRegInfo();
  484. const TargetRegisterInfo *TRI = &getRegisterInfo();
  485. MachineConstantPool *MCP = MF->getConstantPool();
  486. int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());
  487. if (Idx < 0)
  488. return;
  489. uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
  490. // For now we only need to fix up placeholder for register pressure reduce
  491. // patterns.
  492. Register ConstReg = 0;
  493. switch (P) {
  494. case MachineCombinerPattern::REASSOC_XY_BCA:
  495. ConstReg =
  496. TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);
  497. break;
  498. case MachineCombinerPattern::REASSOC_XY_BAC:
  499. ConstReg =
  500. TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);
  501. break;
  502. default:
  503. // Not register pressure reduce patterns.
  504. return;
  505. }
  506. MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);
  507. // Get const value from const pool.
  508. const Constant *C = getConstantFromConstantPool(ConstDefInstr);
  509. assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
  510. // Get negative fp const.
  511. APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());
  512. F1.changeSign();
  513. Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);
  514. Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());
  515. // Put negative fp const into constant pool.
  516. unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);
  517. MachineOperand *Placeholder = nullptr;
  518. // Record the placeholder PPC::ZERO8 we add in reassociateFMA.
  519. for (auto *Inst : InsInstrs) {
  520. for (MachineOperand &Operand : Inst->explicit_operands()) {
  521. assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
  522. if (Operand.getReg() == PPC::ZERO8) {
  523. Placeholder = &Operand;
  524. break;
  525. }
  526. }
  527. }
  528. assert(Placeholder && "Placeholder does not exist!");
  529. // Generate instructions to load the const fp from constant pool.
  530. // We only support PPC64 and medium code model.
  531. Register LoadNewConst =
  532. generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);
  533. // Fill the placeholder with the new load from constant pool.
  534. Placeholder->setReg(LoadNewConst);
  535. }
  536. bool PPCInstrInfo::shouldReduceRegisterPressure(
  537. const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {
  538. if (!EnableFMARegPressureReduction)
  539. return false;
  540. // Currently, we only enable register pressure reducing in machine combiner
  541. // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
  542. // support.
  543. //
  544. // So we need following instructions to access a TOC entry:
  545. //
  546. // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
  547. // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
  548. // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
  549. //
  550. // FIXME: add more supported targets, like Small and Large code model, PPC32,
  551. // AIX.
  552. if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
  553. Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium))
  554. return false;
  555. const TargetRegisterInfo *TRI = &getRegisterInfo();
  556. const MachineFunction *MF = MBB->getParent();
  557. const MachineRegisterInfo *MRI = &MF->getRegInfo();
  558. auto GetMBBPressure =
  559. [&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {
  560. RegionPressure Pressure;
  561. RegPressureTracker RPTracker(Pressure);
  562. // Initialize the register pressure tracker.
  563. RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),
  564. /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
  565. for (const auto &MI : reverse(*MBB)) {
  566. if (MI.isDebugValue() || MI.isDebugLabel())
  567. continue;
  568. RegisterOperands RegOpers;
  569. RegOpers.collect(MI, *TRI, *MRI, false, false);
  570. RPTracker.recedeSkipDebugValues();
  571. assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
  572. RPTracker.recede(RegOpers);
  573. }
  574. // Close the RPTracker to finalize live ins.
  575. RPTracker.closeRegion();
  576. return RPTracker.getPressure().MaxSetPressure;
  577. };
  578. // For now we only care about float and double type fma.
  579. unsigned VSSRCLimit = TRI->getRegPressureSetLimit(
  580. *MBB->getParent(), PPC::RegisterPressureSets::VSSRC);
  581. // Only reduce register pressure when pressure is high.
  582. return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >
  583. (float)VSSRCLimit * FMARPFactor;
  584. }
  585. bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const {
  586. // I has only one memory operand which is load from constant pool.
  587. if (!I->hasOneMemOperand())
  588. return false;
  589. MachineMemOperand *Op = I->memoperands()[0];
  590. return Op->isLoad() && Op->getPseudoValue() &&
  591. Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
  592. }
  593. Register PPCInstrInfo::generateLoadForNewConst(
  594. unsigned Idx, MachineInstr *MI, Type *Ty,
  595. SmallVectorImpl<MachineInstr *> &InsInstrs) const {
  596. // Now we only support PPC64, Medium code model and P9 with vector.
  597. // We have immutable pattern to access const pool. See function
  598. // shouldReduceRegisterPressure.
  599. assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
  600. Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) &&
  601. "Target not supported!\n");
  602. MachineFunction *MF = MI->getMF();
  603. MachineRegisterInfo *MRI = &MF->getRegInfo();
  604. // Generate ADDIStocHA8
  605. Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
  606. MachineInstrBuilder TOCOffset =
  607. BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)
  608. .addReg(PPC::X2)
  609. .addConstantPoolIndex(Idx);
  610. assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&
  611. "Only float and double are supported!");
  612. unsigned LoadOpcode;
  613. // Should be float type or double type.
  614. if (Ty->isFloatTy())
  615. LoadOpcode = PPC::DFLOADf32;
  616. else
  617. LoadOpcode = PPC::DFLOADf64;
  618. const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
  619. Register VReg2 = MRI->createVirtualRegister(RC);
  620. MachineMemOperand *MMO = MF->getMachineMemOperand(
  621. MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad,
  622. Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty));
  623. // Generate Load from constant pool.
  624. MachineInstrBuilder Load =
  625. BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)
  626. .addConstantPoolIndex(Idx)
  627. .addReg(VReg1, getKillRegState(true))
  628. .addMemOperand(MMO);
  629. Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);
  630. // Insert the toc load instructions into InsInstrs.
  631. InsInstrs.insert(InsInstrs.begin(), Load);
  632. InsInstrs.insert(InsInstrs.begin(), TOCOffset);
  633. return VReg2;
  634. }
  635. // This function returns the const value in constant pool if the \p I is a load
  636. // from constant pool.
  637. const Constant *
  638. PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const {
  639. MachineFunction *MF = I->getMF();
  640. MachineRegisterInfo *MRI = &MF->getRegInfo();
  641. MachineConstantPool *MCP = MF->getConstantPool();
  642. assert(I->mayLoad() && "Should be a load instruction.\n");
  643. for (auto MO : I->uses()) {
  644. if (!MO.isReg())
  645. continue;
  646. Register Reg = MO.getReg();
  647. if (Reg == 0 || !Reg.isVirtual())
  648. continue;
  649. // Find the toc address.
  650. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  651. for (auto MO2 : DefMI->uses())
  652. if (MO2.isCPI())
  653. return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
  654. }
  655. return nullptr;
  656. }
  657. bool PPCInstrInfo::getMachineCombinerPatterns(
  658. MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
  659. bool DoRegPressureReduce) const {
  660. // Using the machine combiner in this way is potentially expensive, so
  661. // restrict to when aggressive optimizations are desired.
  662. if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
  663. return false;
  664. if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
  665. return true;
  666. return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
  667. DoRegPressureReduce);
  668. }
  669. void PPCInstrInfo::genAlternativeCodeSequence(
  670. MachineInstr &Root, MachineCombinerPattern Pattern,
  671. SmallVectorImpl<MachineInstr *> &InsInstrs,
  672. SmallVectorImpl<MachineInstr *> &DelInstrs,
  673. DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
  674. switch (Pattern) {
  675. case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
  676. case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
  677. case MachineCombinerPattern::REASSOC_XY_BCA:
  678. case MachineCombinerPattern::REASSOC_XY_BAC:
  679. reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
  680. break;
  681. default:
  682. // Reassociate default patterns.
  683. TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
  684. DelInstrs, InstrIdxForVirtReg);
  685. break;
  686. }
  687. }
  688. void PPCInstrInfo::reassociateFMA(
  689. MachineInstr &Root, MachineCombinerPattern Pattern,
  690. SmallVectorImpl<MachineInstr *> &InsInstrs,
  691. SmallVectorImpl<MachineInstr *> &DelInstrs,
  692. DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
  693. MachineFunction *MF = Root.getMF();
  694. MachineRegisterInfo &MRI = MF->getRegInfo();
  695. const TargetRegisterInfo *TRI = &getRegisterInfo();
  696. MachineOperand &OpC = Root.getOperand(0);
  697. Register RegC = OpC.getReg();
  698. const TargetRegisterClass *RC = MRI.getRegClass(RegC);
  699. MRI.constrainRegClass(RegC, RC);
  700. unsigned FmaOp = Root.getOpcode();
  701. int16_t Idx = getFMAOpIdxInfo(FmaOp);
  702. assert(Idx >= 0 && "Root must be a FMA instruction");
  703. bool IsILPReassociate =
  704. (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) ||
  705. (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM);
  706. uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
  707. uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
  708. MachineInstr *Prev = nullptr;
  709. MachineInstr *Leaf = nullptr;
  710. switch (Pattern) {
  711. default:
  712. llvm_unreachable("not recognized pattern!");
  713. case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
  714. case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
  715. Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());
  716. Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());
  717. break;
  718. case MachineCombinerPattern::REASSOC_XY_BAC: {
  719. Register MULReg =
  720. TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);
  721. Leaf = MRI.getVRegDef(MULReg);
  722. break;
  723. }
  724. case MachineCombinerPattern::REASSOC_XY_BCA: {
  725. Register MULReg = TRI->lookThruCopyLike(
  726. Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);
  727. Leaf = MRI.getVRegDef(MULReg);
  728. break;
  729. }
  730. }
  731. uint16_t IntersectedFlags = 0;
  732. if (IsILPReassociate)
  733. IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
  734. else
  735. IntersectedFlags = Root.getFlags() & Leaf->getFlags();
  736. auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
  737. bool &KillFlag) {
  738. Reg = Operand.getReg();
  739. MRI.constrainRegClass(Reg, RC);
  740. KillFlag = Operand.isKill();
  741. };
  742. auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
  743. Register &MulOp2, Register &AddOp,
  744. bool &MulOp1KillFlag, bool &MulOp2KillFlag,
  745. bool &AddOpKillFlag) {
  746. GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);
  747. GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);
  748. GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);
  749. };
  750. Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
  751. RegA21, RegB;
  752. bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
  753. KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
  754. KillA11 = false, KillA21 = false, KillB = false;
  755. GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
  756. if (IsILPReassociate)
  757. GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
  758. if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
  759. GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
  760. GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);
  761. } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) {
  762. GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
  763. GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
  764. } else {
  765. // Get FSUB instruction info.
  766. GetOperandInfo(Leaf->getOperand(1), RegX, KillX);
  767. GetOperandInfo(Leaf->getOperand(2), RegY, KillY);
  768. }
  769. // Create new virtual registers for the new results instead of
  770. // recycling legacy ones because the MachineCombiner's computation of the
  771. // critical path requires a new register definition rather than an existing
  772. // one.
  773. // For register pressure reassociation, we only need create one virtual
  774. // register for the new fma.
  775. Register NewVRA = MRI.createVirtualRegister(RC);
  776. InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));
  777. Register NewVRB = 0;
  778. if (IsILPReassociate) {
  779. NewVRB = MRI.createVirtualRegister(RC);
  780. InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));
  781. }
  782. Register NewVRD = 0;
  783. if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
  784. NewVRD = MRI.createVirtualRegister(RC);
  785. InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));
  786. }
  787. auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,
  788. Register RegMul1, bool KillRegMul1,
  789. Register RegMul2, bool KillRegMul2) {
  790. MI->getOperand(AddOpIdx).setReg(RegAdd);
  791. MI->getOperand(AddOpIdx).setIsKill(KillAdd);
  792. MI->getOperand(FirstMulOpIdx).setReg(RegMul1);
  793. MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);
  794. MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);
  795. MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);
  796. };
  797. MachineInstrBuilder NewARegPressure, NewCRegPressure;
  798. switch (Pattern) {
  799. default:
  800. llvm_unreachable("not recognized pattern!");
  801. case MachineCombinerPattern::REASSOC_XY_AMM_BMM: {
  802. // Create new instructions for insertion.
  803. MachineInstrBuilder MINewB =
  804. BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
  805. .addReg(RegX, getKillRegState(KillX))
  806. .addReg(RegM21, getKillRegState(KillM21))
  807. .addReg(RegM22, getKillRegState(KillM22));
  808. MachineInstrBuilder MINewA =
  809. BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
  810. .addReg(RegY, getKillRegState(KillY))
  811. .addReg(RegM31, getKillRegState(KillM31))
  812. .addReg(RegM32, getKillRegState(KillM32));
  813. // If AddOpIdx is not 1, adjust the order.
  814. if (AddOpIdx != 1) {
  815. AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
  816. AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
  817. }
  818. MachineInstrBuilder MINewC =
  819. BuildMI(*MF, Root.getDebugLoc(),
  820. get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
  821. .addReg(NewVRB, getKillRegState(true))
  822. .addReg(NewVRA, getKillRegState(true));
  823. // Update flags for newly created instructions.
  824. setSpecialOperandAttr(*MINewA, IntersectedFlags);
  825. setSpecialOperandAttr(*MINewB, IntersectedFlags);
  826. setSpecialOperandAttr(*MINewC, IntersectedFlags);
  827. // Record new instructions for insertion.
  828. InsInstrs.push_back(MINewA);
  829. InsInstrs.push_back(MINewB);
  830. InsInstrs.push_back(MINewC);
  831. break;
  832. }
  833. case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: {
  834. assert(NewVRD && "new FMA register not created!");
  835. // Create new instructions for insertion.
  836. MachineInstrBuilder MINewA =
  837. BuildMI(*MF, Leaf->getDebugLoc(),
  838. get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA)
  839. .addReg(RegM11, getKillRegState(KillM11))
  840. .addReg(RegM12, getKillRegState(KillM12));
  841. MachineInstrBuilder MINewB =
  842. BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)
  843. .addReg(RegX, getKillRegState(KillX))
  844. .addReg(RegM21, getKillRegState(KillM21))
  845. .addReg(RegM22, getKillRegState(KillM22));
  846. MachineInstrBuilder MINewD =
  847. BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)
  848. .addReg(NewVRA, getKillRegState(true))
  849. .addReg(RegM31, getKillRegState(KillM31))
  850. .addReg(RegM32, getKillRegState(KillM32));
  851. // If AddOpIdx is not 1, adjust the order.
  852. if (AddOpIdx != 1) {
  853. AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
  854. AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,
  855. KillM32);
  856. }
  857. MachineInstrBuilder MINewC =
  858. BuildMI(*MF, Root.getDebugLoc(),
  859. get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)
  860. .addReg(NewVRB, getKillRegState(true))
  861. .addReg(NewVRD, getKillRegState(true));
  862. // Update flags for newly created instructions.
  863. setSpecialOperandAttr(*MINewA, IntersectedFlags);
  864. setSpecialOperandAttr(*MINewB, IntersectedFlags);
  865. setSpecialOperandAttr(*MINewD, IntersectedFlags);
  866. setSpecialOperandAttr(*MINewC, IntersectedFlags);
  867. // Record new instructions for insertion.
  868. InsInstrs.push_back(MINewA);
  869. InsInstrs.push_back(MINewB);
  870. InsInstrs.push_back(MINewD);
  871. InsInstrs.push_back(MINewC);
  872. break;
  873. }
  874. case MachineCombinerPattern::REASSOC_XY_BAC:
  875. case MachineCombinerPattern::REASSOC_XY_BCA: {
  876. Register VarReg;
  877. bool KillVarReg = false;
  878. if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) {
  879. VarReg = RegM31;
  880. KillVarReg = KillM31;
  881. } else {
  882. VarReg = RegM32;
  883. KillVarReg = KillM32;
  884. }
  885. // We don't want to get negative const from memory pool too early, as the
  886. // created entry will not be deleted even if it has no users. Since all
  887. // operand of Leaf and Root are virtual register, we use zero register
  888. // here as a placeholder. When the InsInstrs is selected in
  889. // MachineCombiner, we call finalizeInsInstrs to replace the zero register
  890. // with a virtual register which is a load from constant pool.
  891. NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)
  892. .addReg(RegB, getKillRegState(RegB))
  893. .addReg(RegY, getKillRegState(KillY))
  894. .addReg(PPC::ZERO8);
  895. NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)
  896. .addReg(NewVRA, getKillRegState(true))
  897. .addReg(RegX, getKillRegState(KillX))
  898. .addReg(VarReg, getKillRegState(KillVarReg));
  899. // For now, we only support xsmaddadp/xsmaddasp, their add operand are
  900. // both at index 1, no need to adjust.
  901. // FIXME: when add more fma instructions support, like fma/fmas, adjust
  902. // the operand index here.
  903. break;
  904. }
  905. }
  906. if (!IsILPReassociate) {
  907. setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);
  908. setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);
  909. InsInstrs.push_back(NewARegPressure);
  910. InsInstrs.push_back(NewCRegPressure);
  911. }
  912. assert(!InsInstrs.empty() &&
  913. "Insertion instructions set should not be empty!");
  914. // Record old instructions for deletion.
  915. DelInstrs.push_back(Leaf);
  916. if (IsILPReassociate)
  917. DelInstrs.push_back(Prev);
  918. DelInstrs.push_back(&Root);
  919. }
  920. // Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
  921. bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
  922. Register &SrcReg, Register &DstReg,
  923. unsigned &SubIdx) const {
  924. switch (MI.getOpcode()) {
  925. default: return false;
  926. case PPC::EXTSW:
  927. case PPC::EXTSW_32:
  928. case PPC::EXTSW_32_64:
  929. SrcReg = MI.getOperand(1).getReg();
  930. DstReg = MI.getOperand(0).getReg();
  931. SubIdx = PPC::sub_32;
  932. return true;
  933. }
  934. }
  935. unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
  936. int &FrameIndex) const {
  937. if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {
  938. // Check for the operands added by addFrameReference (the immediate is the
  939. // offset which defaults to 0).
  940. if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
  941. MI.getOperand(2).isFI()) {
  942. FrameIndex = MI.getOperand(2).getIndex();
  943. return MI.getOperand(0).getReg();
  944. }
  945. }
  946. return 0;
  947. }
  948. // For opcodes with the ReMaterializable flag set, this function is called to
  949. // verify the instruction is really rematable.
  950. bool PPCInstrInfo::isReallyTriviallyReMaterializable(
  951. const MachineInstr &MI) const {
  952. switch (MI.getOpcode()) {
  953. default:
  954. // This function should only be called for opcodes with the ReMaterializable
  955. // flag set.
  956. llvm_unreachable("Unknown rematerializable operation!");
  957. break;
  958. case PPC::LI:
  959. case PPC::LI8:
  960. case PPC::PLI:
  961. case PPC::PLI8:
  962. case PPC::LIS:
  963. case PPC::LIS8:
  964. case PPC::ADDIStocHA:
  965. case PPC::ADDIStocHA8:
  966. case PPC::ADDItocL:
  967. case PPC::LOAD_STACK_GUARD:
  968. case PPC::XXLXORz:
  969. case PPC::XXLXORspz:
  970. case PPC::XXLXORdpz:
  971. case PPC::XXLEQVOnes:
  972. case PPC::XXSPLTI32DX:
  973. case PPC::XXSPLTIW:
  974. case PPC::XXSPLTIDP:
  975. case PPC::V_SET0B:
  976. case PPC::V_SET0H:
  977. case PPC::V_SET0:
  978. case PPC::V_SETALLONESB:
  979. case PPC::V_SETALLONESH:
  980. case PPC::V_SETALLONES:
  981. case PPC::CRSET:
  982. case PPC::CRUNSET:
  983. case PPC::XXSETACCZ:
  984. case PPC::XXSETACCZW:
  985. return true;
  986. }
  987. return false;
  988. }
  989. unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
  990. int &FrameIndex) const {
  991. if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {
  992. if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&
  993. MI.getOperand(2).isFI()) {
  994. FrameIndex = MI.getOperand(2).getIndex();
  995. return MI.getOperand(0).getReg();
  996. }
  997. }
  998. return 0;
  999. }
  1000. MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
  1001. unsigned OpIdx1,
  1002. unsigned OpIdx2) const {
  1003. MachineFunction &MF = *MI.getParent()->getParent();
  1004. // Normal instructions can be commuted the obvious way.
  1005. if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
  1006. return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
  1007. // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
  1008. // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
  1009. // changing the relative order of the mask operands might change what happens
  1010. // to the high-bits of the mask (and, thus, the result).
  1011. // Cannot commute if it has a non-zero rotate count.
  1012. if (MI.getOperand(3).getImm() != 0)
  1013. return nullptr;
  1014. // If we have a zero rotate count, we have:
  1015. // M = mask(MB,ME)
  1016. // Op0 = (Op1 & ~M) | (Op2 & M)
  1017. // Change this to:
  1018. // M = mask((ME+1)&31, (MB-1)&31)
  1019. // Op0 = (Op2 & ~M) | (Op1 & M)
  1020. // Swap op1/op2
  1021. assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
  1022. "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
  1023. Register Reg0 = MI.getOperand(0).getReg();
  1024. Register Reg1 = MI.getOperand(1).getReg();
  1025. Register Reg2 = MI.getOperand(2).getReg();
  1026. unsigned SubReg1 = MI.getOperand(1).getSubReg();
  1027. unsigned SubReg2 = MI.getOperand(2).getSubReg();
  1028. bool Reg1IsKill = MI.getOperand(1).isKill();
  1029. bool Reg2IsKill = MI.getOperand(2).isKill();
  1030. bool ChangeReg0 = false;
  1031. // If machine instrs are no longer in two-address forms, update
  1032. // destination register as well.
  1033. if (Reg0 == Reg1) {
  1034. // Must be two address instruction!
  1035. assert(MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
  1036. "Expecting a two-address instruction!");
  1037. assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
  1038. Reg2IsKill = false;
  1039. ChangeReg0 = true;
  1040. }
  1041. // Masks.
  1042. unsigned MB = MI.getOperand(4).getImm();
  1043. unsigned ME = MI.getOperand(5).getImm();
  1044. // We can't commute a trivial mask (there is no way to represent an all-zero
  1045. // mask).
  1046. if (MB == 0 && ME == 31)
  1047. return nullptr;
  1048. if (NewMI) {
  1049. // Create a new instruction.
  1050. Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();
  1051. bool Reg0IsDead = MI.getOperand(0).isDead();
  1052. return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())
  1053. .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
  1054. .addReg(Reg2, getKillRegState(Reg2IsKill))
  1055. .addReg(Reg1, getKillRegState(Reg1IsKill))
  1056. .addImm((ME + 1) & 31)
  1057. .addImm((MB - 1) & 31);
  1058. }
  1059. if (ChangeReg0) {
  1060. MI.getOperand(0).setReg(Reg2);
  1061. MI.getOperand(0).setSubReg(SubReg2);
  1062. }
  1063. MI.getOperand(2).setReg(Reg1);
  1064. MI.getOperand(1).setReg(Reg2);
  1065. MI.getOperand(2).setSubReg(SubReg1);
  1066. MI.getOperand(1).setSubReg(SubReg2);
  1067. MI.getOperand(2).setIsKill(Reg1IsKill);
  1068. MI.getOperand(1).setIsKill(Reg2IsKill);
  1069. // Swap the mask around.
  1070. MI.getOperand(4).setImm((ME + 1) & 31);
  1071. MI.getOperand(5).setImm((MB - 1) & 31);
  1072. return &MI;
  1073. }
  1074. bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
  1075. unsigned &SrcOpIdx1,
  1076. unsigned &SrcOpIdx2) const {
  1077. // For VSX A-Type FMA instructions, it is the first two operands that can be
  1078. // commuted, however, because the non-encoded tied input operand is listed
  1079. // first, the operands to swap are actually the second and third.
  1080. int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
  1081. if (AltOpc == -1)
  1082. return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
  1083. // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
  1084. // and SrcOpIdx2.
  1085. return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
  1086. }
  1087. void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
  1088. MachineBasicBlock::iterator MI) const {
  1089. // This function is used for scheduling, and the nop wanted here is the type
  1090. // that terminates dispatch groups on the POWER cores.
  1091. unsigned Directive = Subtarget.getCPUDirective();
  1092. unsigned Opcode;
  1093. switch (Directive) {
  1094. default: Opcode = PPC::NOP; break;
  1095. case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
  1096. case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
  1097. case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
  1098. // FIXME: Update when POWER9 scheduling model is ready.
  1099. case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
  1100. }
  1101. DebugLoc DL;
  1102. BuildMI(MBB, MI, DL, get(Opcode));
  1103. }
  1104. /// Return the noop instruction to use for a noop.
  1105. MCInst PPCInstrInfo::getNop() const {
  1106. MCInst Nop;
  1107. Nop.setOpcode(PPC::NOP);
  1108. return Nop;
  1109. }
  1110. // Branch analysis.
  1111. // Note: If the condition register is set to CTR or CTR8 then this is a
  1112. // BDNZ (imm == 1) or BDZ (imm == 0) branch.
  1113. bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
  1114. MachineBasicBlock *&TBB,
  1115. MachineBasicBlock *&FBB,
  1116. SmallVectorImpl<MachineOperand> &Cond,
  1117. bool AllowModify) const {
  1118. bool isPPC64 = Subtarget.isPPC64();
  1119. // If the block has no terminators, it just falls into the block after it.
  1120. MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
  1121. if (I == MBB.end())
  1122. return false;
  1123. if (!isUnpredicatedTerminator(*I))
  1124. return false;
  1125. if (AllowModify) {
  1126. // If the BB ends with an unconditional branch to the fallthrough BB,
  1127. // we eliminate the branch instruction.
  1128. if (I->getOpcode() == PPC::B &&
  1129. MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
  1130. I->eraseFromParent();
  1131. // We update iterator after deleting the last branch.
  1132. I = MBB.getLastNonDebugInstr();
  1133. if (I == MBB.end() || !isUnpredicatedTerminator(*I))
  1134. return false;
  1135. }
  1136. }
  1137. // Get the last instruction in the block.
  1138. MachineInstr &LastInst = *I;
  1139. // If there is only one terminator instruction, process it.
  1140. if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
  1141. if (LastInst.getOpcode() == PPC::B) {
  1142. if (!LastInst.getOperand(0).isMBB())
  1143. return true;
  1144. TBB = LastInst.getOperand(0).getMBB();
  1145. return false;
  1146. } else if (LastInst.getOpcode() == PPC::BCC) {
  1147. if (!LastInst.getOperand(2).isMBB())
  1148. return true;
  1149. // Block ends with fall-through condbranch.
  1150. TBB = LastInst.getOperand(2).getMBB();
  1151. Cond.push_back(LastInst.getOperand(0));
  1152. Cond.push_back(LastInst.getOperand(1));
  1153. return false;
  1154. } else if (LastInst.getOpcode() == PPC::BC) {
  1155. if (!LastInst.getOperand(1).isMBB())
  1156. return true;
  1157. // Block ends with fall-through condbranch.
  1158. TBB = LastInst.getOperand(1).getMBB();
  1159. Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
  1160. Cond.push_back(LastInst.getOperand(0));
  1161. return false;
  1162. } else if (LastInst.getOpcode() == PPC::BCn) {
  1163. if (!LastInst.getOperand(1).isMBB())
  1164. return true;
  1165. // Block ends with fall-through condbranch.
  1166. TBB = LastInst.getOperand(1).getMBB();
  1167. Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
  1168. Cond.push_back(LastInst.getOperand(0));
  1169. return false;
  1170. } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
  1171. LastInst.getOpcode() == PPC::BDNZ) {
  1172. if (!LastInst.getOperand(0).isMBB())
  1173. return true;
  1174. if (DisableCTRLoopAnal)
  1175. return true;
  1176. TBB = LastInst.getOperand(0).getMBB();
  1177. Cond.push_back(MachineOperand::CreateImm(1));
  1178. Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
  1179. true));
  1180. return false;
  1181. } else if (LastInst.getOpcode() == PPC::BDZ8 ||
  1182. LastInst.getOpcode() == PPC::BDZ) {
  1183. if (!LastInst.getOperand(0).isMBB())
  1184. return true;
  1185. if (DisableCTRLoopAnal)
  1186. return true;
  1187. TBB = LastInst.getOperand(0).getMBB();
  1188. Cond.push_back(MachineOperand::CreateImm(0));
  1189. Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
  1190. true));
  1191. return false;
  1192. }
  1193. // Otherwise, don't know what this is.
  1194. return true;
  1195. }
  1196. // Get the instruction before it if it's a terminator.
  1197. MachineInstr &SecondLastInst = *I;
  1198. // If there are three terminators, we don't know what sort of block this is.
  1199. if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
  1200. return true;
  1201. // If the block ends with PPC::B and PPC:BCC, handle it.
  1202. if (SecondLastInst.getOpcode() == PPC::BCC &&
  1203. LastInst.getOpcode() == PPC::B) {
  1204. if (!SecondLastInst.getOperand(2).isMBB() ||
  1205. !LastInst.getOperand(0).isMBB())
  1206. return true;
  1207. TBB = SecondLastInst.getOperand(2).getMBB();
  1208. Cond.push_back(SecondLastInst.getOperand(0));
  1209. Cond.push_back(SecondLastInst.getOperand(1));
  1210. FBB = LastInst.getOperand(0).getMBB();
  1211. return false;
  1212. } else if (SecondLastInst.getOpcode() == PPC::BC &&
  1213. LastInst.getOpcode() == PPC::B) {
  1214. if (!SecondLastInst.getOperand(1).isMBB() ||
  1215. !LastInst.getOperand(0).isMBB())
  1216. return true;
  1217. TBB = SecondLastInst.getOperand(1).getMBB();
  1218. Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
  1219. Cond.push_back(SecondLastInst.getOperand(0));
  1220. FBB = LastInst.getOperand(0).getMBB();
  1221. return false;
  1222. } else if (SecondLastInst.getOpcode() == PPC::BCn &&
  1223. LastInst.getOpcode() == PPC::B) {
  1224. if (!SecondLastInst.getOperand(1).isMBB() ||
  1225. !LastInst.getOperand(0).isMBB())
  1226. return true;
  1227. TBB = SecondLastInst.getOperand(1).getMBB();
  1228. Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
  1229. Cond.push_back(SecondLastInst.getOperand(0));
  1230. FBB = LastInst.getOperand(0).getMBB();
  1231. return false;
  1232. } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
  1233. SecondLastInst.getOpcode() == PPC::BDNZ) &&
  1234. LastInst.getOpcode() == PPC::B) {
  1235. if (!SecondLastInst.getOperand(0).isMBB() ||
  1236. !LastInst.getOperand(0).isMBB())
  1237. return true;
  1238. if (DisableCTRLoopAnal)
  1239. return true;
  1240. TBB = SecondLastInst.getOperand(0).getMBB();
  1241. Cond.push_back(MachineOperand::CreateImm(1));
  1242. Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
  1243. true));
  1244. FBB = LastInst.getOperand(0).getMBB();
  1245. return false;
  1246. } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
  1247. SecondLastInst.getOpcode() == PPC::BDZ) &&
  1248. LastInst.getOpcode() == PPC::B) {
  1249. if (!SecondLastInst.getOperand(0).isMBB() ||
  1250. !LastInst.getOperand(0).isMBB())
  1251. return true;
  1252. if (DisableCTRLoopAnal)
  1253. return true;
  1254. TBB = SecondLastInst.getOperand(0).getMBB();
  1255. Cond.push_back(MachineOperand::CreateImm(0));
  1256. Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
  1257. true));
  1258. FBB = LastInst.getOperand(0).getMBB();
  1259. return false;
  1260. }
  1261. // If the block ends with two PPC:Bs, handle it. The second one is not
  1262. // executed, so remove it.
  1263. if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
  1264. if (!SecondLastInst.getOperand(0).isMBB())
  1265. return true;
  1266. TBB = SecondLastInst.getOperand(0).getMBB();
  1267. I = LastInst;
  1268. if (AllowModify)
  1269. I->eraseFromParent();
  1270. return false;
  1271. }
  1272. // Otherwise, can't handle this.
  1273. return true;
  1274. }
  1275. unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB,
  1276. int *BytesRemoved) const {
  1277. assert(!BytesRemoved && "code size not handled");
  1278. MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
  1279. if (I == MBB.end())
  1280. return 0;
  1281. if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
  1282. I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
  1283. I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
  1284. I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
  1285. return 0;
  1286. // Remove the branch.
  1287. I->eraseFromParent();
  1288. I = MBB.end();
  1289. if (I == MBB.begin()) return 1;
  1290. --I;
  1291. if (I->getOpcode() != PPC::BCC &&
  1292. I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
  1293. I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
  1294. I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
  1295. return 1;
  1296. // Remove the branch.
  1297. I->eraseFromParent();
  1298. return 2;
  1299. }
  1300. unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
  1301. MachineBasicBlock *TBB,
  1302. MachineBasicBlock *FBB,
  1303. ArrayRef<MachineOperand> Cond,
  1304. const DebugLoc &DL,
  1305. int *BytesAdded) const {
  1306. // Shouldn't be a fall through.
  1307. assert(TBB && "insertBranch must not be told to insert a fallthrough");
  1308. assert((Cond.size() == 2 || Cond.size() == 0) &&
  1309. "PPC branch conditions have two components!");
  1310. assert(!BytesAdded && "code size not handled");
  1311. bool isPPC64 = Subtarget.isPPC64();
  1312. // One-way branch.
  1313. if (!FBB) {
  1314. if (Cond.empty()) // Unconditional branch
  1315. BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
  1316. else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
  1317. BuildMI(&MBB, DL, get(Cond[0].getImm() ?
  1318. (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
  1319. (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
  1320. else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
  1321. BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
  1322. else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
  1323. BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
  1324. else // Conditional branch
  1325. BuildMI(&MBB, DL, get(PPC::BCC))
  1326. .addImm(Cond[0].getImm())
  1327. .add(Cond[1])
  1328. .addMBB(TBB);
  1329. return 1;
  1330. }
  1331. // Two-way Conditional Branch.
  1332. if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
  1333. BuildMI(&MBB, DL, get(Cond[0].getImm() ?
  1334. (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
  1335. (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
  1336. else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
  1337. BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
  1338. else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
  1339. BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
  1340. else
  1341. BuildMI(&MBB, DL, get(PPC::BCC))
  1342. .addImm(Cond[0].getImm())
  1343. .add(Cond[1])
  1344. .addMBB(TBB);
  1345. BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
  1346. return 2;
  1347. }
  1348. // Select analysis.
  1349. bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
  1350. ArrayRef<MachineOperand> Cond,
  1351. Register DstReg, Register TrueReg,
  1352. Register FalseReg, int &CondCycles,
  1353. int &TrueCycles, int &FalseCycles) const {
  1354. if (Cond.size() != 2)
  1355. return false;
  1356. // If this is really a bdnz-like condition, then it cannot be turned into a
  1357. // select.
  1358. if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
  1359. return false;
  1360. // If the conditional branch uses a physical register, then it cannot be
  1361. // turned into a select.
  1362. if (Cond[1].getReg().isPhysical())
  1363. return false;
  1364. // Check register classes.
  1365. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  1366. const TargetRegisterClass *RC =
  1367. RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
  1368. if (!RC)
  1369. return false;
  1370. // isel is for regular integer GPRs only.
  1371. if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
  1372. !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
  1373. !PPC::G8RCRegClass.hasSubClassEq(RC) &&
  1374. !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
  1375. return false;
  1376. // FIXME: These numbers are for the A2, how well they work for other cores is
  1377. // an open question. On the A2, the isel instruction has a 2-cycle latency
  1378. // but single-cycle throughput. These numbers are used in combination with
  1379. // the MispredictPenalty setting from the active SchedMachineModel.
  1380. CondCycles = 1;
  1381. TrueCycles = 1;
  1382. FalseCycles = 1;
  1383. return true;
  1384. }
  1385. void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
  1386. MachineBasicBlock::iterator MI,
  1387. const DebugLoc &dl, Register DestReg,
  1388. ArrayRef<MachineOperand> Cond, Register TrueReg,
  1389. Register FalseReg) const {
  1390. assert(Cond.size() == 2 &&
  1391. "PPC branch conditions have two components!");
  1392. // Get the register classes.
  1393. MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  1394. const TargetRegisterClass *RC =
  1395. RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
  1396. assert(RC && "TrueReg and FalseReg must have overlapping register classes");
  1397. bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
  1398. PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
  1399. assert((Is64Bit ||
  1400. PPC::GPRCRegClass.hasSubClassEq(RC) ||
  1401. PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
  1402. "isel is for regular integer GPRs only");
  1403. unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
  1404. auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
  1405. unsigned SubIdx = 0;
  1406. bool SwapOps = false;
  1407. switch (SelectPred) {
  1408. case PPC::PRED_EQ:
  1409. case PPC::PRED_EQ_MINUS:
  1410. case PPC::PRED_EQ_PLUS:
  1411. SubIdx = PPC::sub_eq; SwapOps = false; break;
  1412. case PPC::PRED_NE:
  1413. case PPC::PRED_NE_MINUS:
  1414. case PPC::PRED_NE_PLUS:
  1415. SubIdx = PPC::sub_eq; SwapOps = true; break;
  1416. case PPC::PRED_LT:
  1417. case PPC::PRED_LT_MINUS:
  1418. case PPC::PRED_LT_PLUS:
  1419. SubIdx = PPC::sub_lt; SwapOps = false; break;
  1420. case PPC::PRED_GE:
  1421. case PPC::PRED_GE_MINUS:
  1422. case PPC::PRED_GE_PLUS:
  1423. SubIdx = PPC::sub_lt; SwapOps = true; break;
  1424. case PPC::PRED_GT:
  1425. case PPC::PRED_GT_MINUS:
  1426. case PPC::PRED_GT_PLUS:
  1427. SubIdx = PPC::sub_gt; SwapOps = false; break;
  1428. case PPC::PRED_LE:
  1429. case PPC::PRED_LE_MINUS:
  1430. case PPC::PRED_LE_PLUS:
  1431. SubIdx = PPC::sub_gt; SwapOps = true; break;
  1432. case PPC::PRED_UN:
  1433. case PPC::PRED_UN_MINUS:
  1434. case PPC::PRED_UN_PLUS:
  1435. SubIdx = PPC::sub_un; SwapOps = false; break;
  1436. case PPC::PRED_NU:
  1437. case PPC::PRED_NU_MINUS:
  1438. case PPC::PRED_NU_PLUS:
  1439. SubIdx = PPC::sub_un; SwapOps = true; break;
  1440. case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
  1441. case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
  1442. }
  1443. Register FirstReg = SwapOps ? FalseReg : TrueReg,
  1444. SecondReg = SwapOps ? TrueReg : FalseReg;
  1445. // The first input register of isel cannot be r0. If it is a member
  1446. // of a register class that can be r0, then copy it first (the
  1447. // register allocator should eliminate the copy).
  1448. if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
  1449. MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
  1450. const TargetRegisterClass *FirstRC =
  1451. MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
  1452. &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
  1453. Register OldFirstReg = FirstReg;
  1454. FirstReg = MRI.createVirtualRegister(FirstRC);
  1455. BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
  1456. .addReg(OldFirstReg);
  1457. }
  1458. BuildMI(MBB, MI, dl, get(OpCode), DestReg)
  1459. .addReg(FirstReg).addReg(SecondReg)
  1460. .addReg(Cond[1].getReg(), 0, SubIdx);
  1461. }
  1462. static unsigned getCRBitValue(unsigned CRBit) {
  1463. unsigned Ret = 4;
  1464. if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
  1465. CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
  1466. CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
  1467. CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
  1468. Ret = 3;
  1469. if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
  1470. CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
  1471. CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
  1472. CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
  1473. Ret = 2;
  1474. if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
  1475. CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
  1476. CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
  1477. CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
  1478. Ret = 1;
  1479. if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
  1480. CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
  1481. CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
  1482. CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
  1483. Ret = 0;
  1484. assert(Ret != 4 && "Invalid CR bit register");
  1485. return Ret;
  1486. }
  1487. void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
  1488. MachineBasicBlock::iterator I,
  1489. const DebugLoc &DL, MCRegister DestReg,
  1490. MCRegister SrcReg, bool KillSrc) const {
  1491. // We can end up with self copies and similar things as a result of VSX copy
  1492. // legalization. Promote them here.
  1493. const TargetRegisterInfo *TRI = &getRegisterInfo();
  1494. if (PPC::F8RCRegClass.contains(DestReg) &&
  1495. PPC::VSRCRegClass.contains(SrcReg)) {
  1496. MCRegister SuperReg =
  1497. TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
  1498. if (VSXSelfCopyCrash && SrcReg == SuperReg)
  1499. llvm_unreachable("nop VSX copy");
  1500. DestReg = SuperReg;
  1501. } else if (PPC::F8RCRegClass.contains(SrcReg) &&
  1502. PPC::VSRCRegClass.contains(DestReg)) {
  1503. MCRegister SuperReg =
  1504. TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
  1505. if (VSXSelfCopyCrash && DestReg == SuperReg)
  1506. llvm_unreachable("nop VSX copy");
  1507. SrcReg = SuperReg;
  1508. }
  1509. // Different class register copy
  1510. if (PPC::CRBITRCRegClass.contains(SrcReg) &&
  1511. PPC::GPRCRegClass.contains(DestReg)) {
  1512. MCRegister CRReg = getCRFromCRBit(SrcReg);
  1513. BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);
  1514. getKillRegState(KillSrc);
  1515. // Rotate the CR bit in the CR fields to be the least significant bit and
  1516. // then mask with 0x1 (MB = ME = 31).
  1517. BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
  1518. .addReg(DestReg, RegState::Kill)
  1519. .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
  1520. .addImm(31)
  1521. .addImm(31);
  1522. return;
  1523. } else if (PPC::CRRCRegClass.contains(SrcReg) &&
  1524. (PPC::G8RCRegClass.contains(DestReg) ||
  1525. PPC::GPRCRegClass.contains(DestReg))) {
  1526. bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);
  1527. unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
  1528. unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
  1529. unsigned CRNum = TRI->getEncodingValue(SrcReg);
  1530. BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);
  1531. getKillRegState(KillSrc);
  1532. if (CRNum == 7)
  1533. return;
  1534. // Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
  1535. BuildMI(MBB, I, DL, get(ShCode), DestReg)
  1536. .addReg(DestReg, RegState::Kill)
  1537. .addImm(CRNum * 4 + 4)
  1538. .addImm(28)
  1539. .addImm(31);
  1540. return;
  1541. } else if (PPC::G8RCRegClass.contains(SrcReg) &&
  1542. PPC::VSFRCRegClass.contains(DestReg)) {
  1543. assert(Subtarget.hasDirectMove() &&
  1544. "Subtarget doesn't support directmove, don't know how to copy.");
  1545. BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);
  1546. NumGPRtoVSRSpill++;
  1547. getKillRegState(KillSrc);
  1548. return;
  1549. } else if (PPC::VSFRCRegClass.contains(SrcReg) &&
  1550. PPC::G8RCRegClass.contains(DestReg)) {
  1551. assert(Subtarget.hasDirectMove() &&
  1552. "Subtarget doesn't support directmove, don't know how to copy.");
  1553. BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);
  1554. getKillRegState(KillSrc);
  1555. return;
  1556. } else if (PPC::SPERCRegClass.contains(SrcReg) &&
  1557. PPC::GPRCRegClass.contains(DestReg)) {
  1558. BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);
  1559. getKillRegState(KillSrc);
  1560. return;
  1561. } else if (PPC::GPRCRegClass.contains(SrcReg) &&
  1562. PPC::SPERCRegClass.contains(DestReg)) {
  1563. BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);
  1564. getKillRegState(KillSrc);
  1565. return;
  1566. }
  1567. unsigned Opc;
  1568. if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
  1569. Opc = PPC::OR;
  1570. else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
  1571. Opc = PPC::OR8;
  1572. else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
  1573. Opc = PPC::FMR;
  1574. else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
  1575. Opc = PPC::MCRF;
  1576. else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
  1577. Opc = PPC::VOR;
  1578. else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
  1579. // There are two different ways this can be done:
  1580. // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
  1581. // issue in VSU pipeline 0.
  1582. // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
  1583. // can go to either pipeline.
  1584. // We'll always use xxlor here, because in practically all cases where
  1585. // copies are generated, they are close enough to some use that the
  1586. // lower-latency form is preferable.
  1587. Opc = PPC::XXLOR;
  1588. else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
  1589. PPC::VSSRCRegClass.contains(DestReg, SrcReg))
  1590. Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
  1591. else if (Subtarget.pairedVectorMemops() &&
  1592. PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
  1593. if (SrcReg > PPC::VSRp15)
  1594. SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
  1595. else
  1596. SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
  1597. if (DestReg > PPC::VSRp15)
  1598. DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
  1599. else
  1600. DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
  1601. BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
  1602. addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
  1603. BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
  1604. addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
  1605. return;
  1606. }
  1607. else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
  1608. Opc = PPC::CROR;
  1609. else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
  1610. Opc = PPC::EVOR;
  1611. else if ((PPC::ACCRCRegClass.contains(DestReg) ||
  1612. PPC::UACCRCRegClass.contains(DestReg)) &&
  1613. (PPC::ACCRCRegClass.contains(SrcReg) ||
  1614. PPC::UACCRCRegClass.contains(SrcReg))) {
  1615. // If primed, de-prime the source register, copy the individual registers
  1616. // and prime the destination if needed. The vector subregisters are
  1617. // vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the
  1618. // source is primed, we need to re-prime it after the copy as well.
  1619. PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
  1620. bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);
  1621. bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);
  1622. MCRegister VSLSrcReg =
  1623. PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
  1624. MCRegister VSLDestReg =
  1625. PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;
  1626. if (SrcPrimed)
  1627. BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
  1628. for (unsigned Idx = 0; Idx < 4; Idx++)
  1629. BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)
  1630. .addReg(VSLSrcReg + Idx)
  1631. .addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));
  1632. if (DestPrimed)
  1633. BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);
  1634. if (SrcPrimed && !KillSrc)
  1635. BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
  1636. return;
  1637. } else if (PPC::G8pRCRegClass.contains(DestReg) &&
  1638. PPC::G8pRCRegClass.contains(SrcReg)) {
  1639. // TODO: Handle G8RC to G8pRC (and vice versa) copy.
  1640. unsigned DestRegIdx = DestReg - PPC::G8p0;
  1641. MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;
  1642. MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;
  1643. unsigned SrcRegIdx = SrcReg - PPC::G8p0;
  1644. MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;
  1645. MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;
  1646. BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)
  1647. .addReg(SrcRegSub0)
  1648. .addReg(SrcRegSub0, getKillRegState(KillSrc));
  1649. BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)
  1650. .addReg(SrcRegSub1)
  1651. .addReg(SrcRegSub1, getKillRegState(KillSrc));
  1652. return;
  1653. } else
  1654. llvm_unreachable("Impossible reg-to-reg copy");
  1655. const MCInstrDesc &MCID = get(Opc);
  1656. if (MCID.getNumOperands() == 3)
  1657. BuildMI(MBB, I, DL, MCID, DestReg)
  1658. .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
  1659. else
  1660. BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
  1661. }
  1662. unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
  1663. int OpcodeIndex = 0;
  1664. if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
  1665. PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
  1666. OpcodeIndex = SOK_Int4Spill;
  1667. } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
  1668. PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
  1669. OpcodeIndex = SOK_Int8Spill;
  1670. } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
  1671. OpcodeIndex = SOK_Float8Spill;
  1672. } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
  1673. OpcodeIndex = SOK_Float4Spill;
  1674. } else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
  1675. OpcodeIndex = SOK_SPESpill;
  1676. } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
  1677. OpcodeIndex = SOK_CRSpill;
  1678. } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
  1679. OpcodeIndex = SOK_CRBitSpill;
  1680. } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
  1681. OpcodeIndex = SOK_VRVectorSpill;
  1682. } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
  1683. OpcodeIndex = SOK_VSXVectorSpill;
  1684. } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
  1685. OpcodeIndex = SOK_VectorFloat8Spill;
  1686. } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
  1687. OpcodeIndex = SOK_VectorFloat4Spill;
  1688. } else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
  1689. OpcodeIndex = SOK_SpillToVSR;
  1690. } else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
  1691. assert(Subtarget.pairedVectorMemops() &&
  1692. "Register unexpected when paired memops are disabled.");
  1693. OpcodeIndex = SOK_AccumulatorSpill;
  1694. } else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
  1695. assert(Subtarget.pairedVectorMemops() &&
  1696. "Register unexpected when paired memops are disabled.");
  1697. OpcodeIndex = SOK_UAccumulatorSpill;
  1698. } else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
  1699. assert(Subtarget.pairedVectorMemops() &&
  1700. "Register unexpected when paired memops are disabled.");
  1701. OpcodeIndex = SOK_WAccumulatorSpill;
  1702. } else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
  1703. assert(Subtarget.pairedVectorMemops() &&
  1704. "Register unexpected when paired memops are disabled.");
  1705. OpcodeIndex = SOK_PairedVecSpill;
  1706. } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
  1707. OpcodeIndex = SOK_PairedG8Spill;
  1708. } else {
  1709. llvm_unreachable("Unknown regclass!");
  1710. }
  1711. return OpcodeIndex;
  1712. }
  1713. unsigned
  1714. PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass *RC) const {
  1715. ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
  1716. return OpcodesForSpill[getSpillIndex(RC)];
  1717. }
  1718. unsigned
  1719. PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass *RC) const {
  1720. ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
  1721. return OpcodesForSpill[getSpillIndex(RC)];
  1722. }
  1723. void PPCInstrInfo::StoreRegToStackSlot(
  1724. MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
  1725. const TargetRegisterClass *RC,
  1726. SmallVectorImpl<MachineInstr *> &NewMIs) const {
  1727. unsigned Opcode = getStoreOpcodeForSpill(RC);
  1728. DebugLoc DL;
  1729. PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
  1730. FuncInfo->setHasSpills();
  1731. NewMIs.push_back(addFrameReference(
  1732. BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),
  1733. FrameIdx));
  1734. if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
  1735. PPC::CRBITRCRegClass.hasSubClassEq(RC))
  1736. FuncInfo->setSpillsCR();
  1737. if (isXFormMemOp(Opcode))
  1738. FuncInfo->setHasNonRISpills();
  1739. }
  1740. void PPCInstrInfo::storeRegToStackSlotNoUpd(
  1741. MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg,
  1742. bool isKill, int FrameIdx, const TargetRegisterClass *RC,
  1743. const TargetRegisterInfo *TRI) const {
  1744. MachineFunction &MF = *MBB.getParent();
  1745. SmallVector<MachineInstr *, 4> NewMIs;
  1746. StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
  1747. for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
  1748. MBB.insert(MI, NewMIs[i]);
  1749. const MachineFrameInfo &MFI = MF.getFrameInfo();
  1750. MachineMemOperand *MMO = MF.getMachineMemOperand(
  1751. MachinePointerInfo::getFixedStack(MF, FrameIdx),
  1752. MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
  1753. MFI.getObjectAlign(FrameIdx));
  1754. NewMIs.back()->addMemOperand(MF, MMO);
  1755. }
  1756. void PPCInstrInfo::storeRegToStackSlot(
  1757. MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
  1758. bool isKill, int FrameIdx, const TargetRegisterClass *RC,
  1759. const TargetRegisterInfo *TRI, Register VReg) const {
  1760. // We need to avoid a situation in which the value from a VRRC register is
  1761. // spilled using an Altivec instruction and reloaded into a VSRC register
  1762. // using a VSX instruction. The issue with this is that the VSX
  1763. // load/store instructions swap the doublewords in the vector and the Altivec
  1764. // ones don't. The register classes on the spill/reload may be different if
  1765. // the register is defined using an Altivec instruction and is then used by a
  1766. // VSX instruction.
  1767. RC = updatedRC(RC);
  1768. storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
  1769. }
  1770. void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
  1771. unsigned DestReg, int FrameIdx,
  1772. const TargetRegisterClass *RC,
  1773. SmallVectorImpl<MachineInstr *> &NewMIs)
  1774. const {
  1775. unsigned Opcode = getLoadOpcodeForSpill(RC);
  1776. NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),
  1777. FrameIdx));
  1778. PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
  1779. if (PPC::CRRCRegClass.hasSubClassEq(RC) ||
  1780. PPC::CRBITRCRegClass.hasSubClassEq(RC))
  1781. FuncInfo->setSpillsCR();
  1782. if (isXFormMemOp(Opcode))
  1783. FuncInfo->setHasNonRISpills();
  1784. }
  1785. void PPCInstrInfo::loadRegFromStackSlotNoUpd(
  1786. MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg,
  1787. int FrameIdx, const TargetRegisterClass *RC,
  1788. const TargetRegisterInfo *TRI) const {
  1789. MachineFunction &MF = *MBB.getParent();
  1790. SmallVector<MachineInstr*, 4> NewMIs;
  1791. DebugLoc DL;
  1792. if (MI != MBB.end()) DL = MI->getDebugLoc();
  1793. PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
  1794. FuncInfo->setHasSpills();
  1795. LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
  1796. for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
  1797. MBB.insert(MI, NewMIs[i]);
  1798. const MachineFrameInfo &MFI = MF.getFrameInfo();
  1799. MachineMemOperand *MMO = MF.getMachineMemOperand(
  1800. MachinePointerInfo::getFixedStack(MF, FrameIdx),
  1801. MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
  1802. MFI.getObjectAlign(FrameIdx));
  1803. NewMIs.back()->addMemOperand(MF, MMO);
  1804. }
  1805. void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
  1806. MachineBasicBlock::iterator MI,
  1807. Register DestReg, int FrameIdx,
  1808. const TargetRegisterClass *RC,
  1809. const TargetRegisterInfo *TRI,
  1810. Register VReg) const {
  1811. // We need to avoid a situation in which the value from a VRRC register is
  1812. // spilled using an Altivec instruction and reloaded into a VSRC register
  1813. // using a VSX instruction. The issue with this is that the VSX
  1814. // load/store instructions swap the doublewords in the vector and the Altivec
  1815. // ones don't. The register classes on the spill/reload may be different if
  1816. // the register is defined using an Altivec instruction and is then used by a
  1817. // VSX instruction.
  1818. RC = updatedRC(RC);
  1819. loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
  1820. }
  1821. bool PPCInstrInfo::
  1822. reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
  1823. assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
  1824. if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
  1825. Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
  1826. else
  1827. // Leave the CR# the same, but invert the condition.
  1828. Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
  1829. return false;
  1830. }
  1831. // For some instructions, it is legal to fold ZERO into the RA register field.
  1832. // This function performs that fold by replacing the operand with PPC::ZERO,
  1833. // it does not consider whether the load immediate zero is no longer in use.
  1834. bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
  1835. Register Reg) const {
  1836. // A zero immediate should always be loaded with a single li.
  1837. unsigned DefOpc = DefMI.getOpcode();
  1838. if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
  1839. return false;
  1840. if (!DefMI.getOperand(1).isImm())
  1841. return false;
  1842. if (DefMI.getOperand(1).getImm() != 0)
  1843. return false;
  1844. // Note that we cannot here invert the arguments of an isel in order to fold
  1845. // a ZERO into what is presented as the second argument. All we have here
  1846. // is the condition bit, and that might come from a CR-logical bit operation.
  1847. const MCInstrDesc &UseMCID = UseMI.getDesc();
  1848. // Only fold into real machine instructions.
  1849. if (UseMCID.isPseudo())
  1850. return false;
  1851. // We need to find which of the User's operands is to be folded, that will be
  1852. // the operand that matches the given register ID.
  1853. unsigned UseIdx;
  1854. for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)
  1855. if (UseMI.getOperand(UseIdx).isReg() &&
  1856. UseMI.getOperand(UseIdx).getReg() == Reg)
  1857. break;
  1858. assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
  1859. assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
  1860. const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
  1861. // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
  1862. // register (which might also be specified as a pointer class kind).
  1863. if (UseInfo->isLookupPtrRegClass()) {
  1864. if (UseInfo->RegClass /* Kind */ != 1)
  1865. return false;
  1866. } else {
  1867. if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
  1868. UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
  1869. return false;
  1870. }
  1871. // Make sure this is not tied to an output register (or otherwise
  1872. // constrained). This is true for ST?UX registers, for example, which
  1873. // are tied to their output registers.
  1874. if (UseInfo->Constraints != 0)
  1875. return false;
  1876. MCRegister ZeroReg;
  1877. if (UseInfo->isLookupPtrRegClass()) {
  1878. bool isPPC64 = Subtarget.isPPC64();
  1879. ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
  1880. } else {
  1881. ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
  1882. PPC::ZERO8 : PPC::ZERO;
  1883. }
  1884. LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
  1885. LLVM_DEBUG(UseMI.dump());
  1886. UseMI.getOperand(UseIdx).setReg(ZeroReg);
  1887. LLVM_DEBUG(dbgs() << "Into: ");
  1888. LLVM_DEBUG(UseMI.dump());
  1889. return true;
  1890. }
  1891. // Folds zero into instructions which have a load immediate zero as an operand
  1892. // but also recognize zero as immediate zero. If the definition of the load
  1893. // has no more users it is deleted.
  1894. bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
  1895. Register Reg, MachineRegisterInfo *MRI) const {
  1896. bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
  1897. if (MRI->use_nodbg_empty(Reg))
  1898. DefMI.eraseFromParent();
  1899. return Changed;
  1900. }
  1901. static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
  1902. for (MachineInstr &MI : MBB)
  1903. if (MI.definesRegister(PPC::CTR) || MI.definesRegister(PPC::CTR8))
  1904. return true;
  1905. return false;
  1906. }
  1907. // We should make sure that, if we're going to predicate both sides of a
  1908. // condition (a diamond), that both sides don't define the counter register. We
  1909. // can predicate counter-decrement-based branches, but while that predicates
  1910. // the branching, it does not predicate the counter decrement. If we tried to
  1911. // merge the triangle into one predicated block, we'd decrement the counter
  1912. // twice.
  1913. bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
  1914. unsigned NumT, unsigned ExtraT,
  1915. MachineBasicBlock &FMBB,
  1916. unsigned NumF, unsigned ExtraF,
  1917. BranchProbability Probability) const {
  1918. return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
  1919. }
  1920. bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
  1921. // The predicated branches are identified by their type, not really by the
  1922. // explicit presence of a predicate. Furthermore, some of them can be
  1923. // predicated more than once. Because if conversion won't try to predicate
  1924. // any instruction which already claims to be predicated (by returning true
  1925. // here), always return false. In doing so, we let isPredicable() be the
  1926. // final word on whether not the instruction can be (further) predicated.
  1927. return false;
  1928. }
  1929. bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
  1930. const MachineBasicBlock *MBB,
  1931. const MachineFunction &MF) const {
  1932. // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
  1933. // across them, since some FP operations may change content of FPSCR.
  1934. // TODO: Model FPSCR in PPC instruction definitions and remove the workaround
  1935. if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF)
  1936. return true;
  1937. return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
  1938. }
  1939. bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
  1940. ArrayRef<MachineOperand> Pred) const {
  1941. unsigned OpC = MI.getOpcode();
  1942. if (OpC == PPC::BLR || OpC == PPC::BLR8) {
  1943. if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
  1944. bool isPPC64 = Subtarget.isPPC64();
  1945. MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
  1946. : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
  1947. // Need add Def and Use for CTR implicit operand.
  1948. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  1949. .addReg(Pred[1].getReg(), RegState::Implicit)
  1950. .addReg(Pred[1].getReg(), RegState::ImplicitDefine);
  1951. } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
  1952. MI.setDesc(get(PPC::BCLR));
  1953. MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
  1954. } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
  1955. MI.setDesc(get(PPC::BCLRn));
  1956. MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
  1957. } else {
  1958. MI.setDesc(get(PPC::BCCLR));
  1959. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  1960. .addImm(Pred[0].getImm())
  1961. .add(Pred[1]);
  1962. }
  1963. return true;
  1964. } else if (OpC == PPC::B) {
  1965. if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
  1966. bool isPPC64 = Subtarget.isPPC64();
  1967. MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
  1968. : (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
  1969. // Need add Def and Use for CTR implicit operand.
  1970. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  1971. .addReg(Pred[1].getReg(), RegState::Implicit)
  1972. .addReg(Pred[1].getReg(), RegState::ImplicitDefine);
  1973. } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
  1974. MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
  1975. MI.removeOperand(0);
  1976. MI.setDesc(get(PPC::BC));
  1977. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  1978. .add(Pred[1])
  1979. .addMBB(MBB);
  1980. } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
  1981. MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
  1982. MI.removeOperand(0);
  1983. MI.setDesc(get(PPC::BCn));
  1984. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  1985. .add(Pred[1])
  1986. .addMBB(MBB);
  1987. } else {
  1988. MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
  1989. MI.removeOperand(0);
  1990. MI.setDesc(get(PPC::BCC));
  1991. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  1992. .addImm(Pred[0].getImm())
  1993. .add(Pred[1])
  1994. .addMBB(MBB);
  1995. }
  1996. return true;
  1997. } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
  1998. OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
  1999. OpC == PPC::BCTRL8_RM) {
  2000. if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
  2001. llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
  2002. bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
  2003. OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
  2004. bool isPPC64 = Subtarget.isPPC64();
  2005. if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
  2006. MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
  2007. : (setLR ? PPC::BCCTRL : PPC::BCCTR)));
  2008. MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
  2009. } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
  2010. MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
  2011. : (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
  2012. MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);
  2013. } else {
  2014. MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
  2015. : (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
  2016. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  2017. .addImm(Pred[0].getImm())
  2018. .add(Pred[1]);
  2019. }
  2020. // Need add Def and Use for LR implicit operand.
  2021. if (setLR)
  2022. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  2023. .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
  2024. .addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
  2025. if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
  2026. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  2027. .addReg(PPC::RM, RegState::ImplicitDefine);
  2028. return true;
  2029. }
  2030. return false;
  2031. }
  2032. bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
  2033. ArrayRef<MachineOperand> Pred2) const {
  2034. assert(Pred1.size() == 2 && "Invalid PPC first predicate");
  2035. assert(Pred2.size() == 2 && "Invalid PPC second predicate");
  2036. if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
  2037. return false;
  2038. if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
  2039. return false;
  2040. // P1 can only subsume P2 if they test the same condition register.
  2041. if (Pred1[1].getReg() != Pred2[1].getReg())
  2042. return false;
  2043. PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
  2044. PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
  2045. if (P1 == P2)
  2046. return true;
  2047. // Does P1 subsume P2, e.g. GE subsumes GT.
  2048. if (P1 == PPC::PRED_LE &&
  2049. (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
  2050. return true;
  2051. if (P1 == PPC::PRED_GE &&
  2052. (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
  2053. return true;
  2054. return false;
  2055. }
  2056. bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
  2057. std::vector<MachineOperand> &Pred,
  2058. bool SkipDead) const {
  2059. // Note: At the present time, the contents of Pred from this function is
  2060. // unused by IfConversion. This implementation follows ARM by pushing the
  2061. // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
  2062. // predicate, instructions defining CTR or CTR8 are also included as
  2063. // predicate-defining instructions.
  2064. const TargetRegisterClass *RCs[] =
  2065. { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
  2066. &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
  2067. bool Found = false;
  2068. for (const MachineOperand &MO : MI.operands()) {
  2069. for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {
  2070. const TargetRegisterClass *RC = RCs[c];
  2071. if (MO.isReg()) {
  2072. if (MO.isDef() && RC->contains(MO.getReg())) {
  2073. Pred.push_back(MO);
  2074. Found = true;
  2075. }
  2076. } else if (MO.isRegMask()) {
  2077. for (MCPhysReg R : *RC)
  2078. if (MO.clobbersPhysReg(R)) {
  2079. Pred.push_back(MO);
  2080. Found = true;
  2081. }
  2082. }
  2083. }
  2084. }
  2085. return Found;
  2086. }
  2087. bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
  2088. Register &SrcReg2, int64_t &Mask,
  2089. int64_t &Value) const {
  2090. unsigned Opc = MI.getOpcode();
  2091. switch (Opc) {
  2092. default: return false;
  2093. case PPC::CMPWI:
  2094. case PPC::CMPLWI:
  2095. case PPC::CMPDI:
  2096. case PPC::CMPLDI:
  2097. SrcReg = MI.getOperand(1).getReg();
  2098. SrcReg2 = 0;
  2099. Value = MI.getOperand(2).getImm();
  2100. Mask = 0xFFFF;
  2101. return true;
  2102. case PPC::CMPW:
  2103. case PPC::CMPLW:
  2104. case PPC::CMPD:
  2105. case PPC::CMPLD:
  2106. case PPC::FCMPUS:
  2107. case PPC::FCMPUD:
  2108. SrcReg = MI.getOperand(1).getReg();
  2109. SrcReg2 = MI.getOperand(2).getReg();
  2110. Value = 0;
  2111. Mask = 0;
  2112. return true;
  2113. }
  2114. }
  2115. bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
  2116. Register SrcReg2, int64_t Mask,
  2117. int64_t Value,
  2118. const MachineRegisterInfo *MRI) const {
  2119. if (DisableCmpOpt)
  2120. return false;
  2121. int OpC = CmpInstr.getOpcode();
  2122. Register CRReg = CmpInstr.getOperand(0).getReg();
  2123. // FP record forms set CR1 based on the exception status bits, not a
  2124. // comparison with zero.
  2125. if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
  2126. return false;
  2127. const TargetRegisterInfo *TRI = &getRegisterInfo();
  2128. // The record forms set the condition register based on a signed comparison
  2129. // with zero (so says the ISA manual). This is not as straightforward as it
  2130. // seems, however, because this is always a 64-bit comparison on PPC64, even
  2131. // for instructions that are 32-bit in nature (like slw for example).
  2132. // So, on PPC32, for unsigned comparisons, we can use the record forms only
  2133. // for equality checks (as those don't depend on the sign). On PPC64,
  2134. // we are restricted to equality for unsigned 64-bit comparisons and for
  2135. // signed 32-bit comparisons the applicability is more restricted.
  2136. bool isPPC64 = Subtarget.isPPC64();
  2137. bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
  2138. bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
  2139. bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
  2140. // Look through copies unless that gets us to a physical register.
  2141. Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
  2142. if (ActualSrc.isVirtual())
  2143. SrcReg = ActualSrc;
  2144. // Get the unique definition of SrcReg.
  2145. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
  2146. if (!MI) return false;
  2147. bool equalityOnly = false;
  2148. bool noSub = false;
  2149. if (isPPC64) {
  2150. if (is32BitSignedCompare) {
  2151. // We can perform this optimization only if SrcReg is sign-extending.
  2152. if (isSignExtended(SrcReg, MRI))
  2153. noSub = true;
  2154. else
  2155. return false;
  2156. } else if (is32BitUnsignedCompare) {
  2157. // We can perform this optimization, equality only, if SrcReg is
  2158. // zero-extending.
  2159. if (isZeroExtended(SrcReg, MRI)) {
  2160. noSub = true;
  2161. equalityOnly = true;
  2162. } else
  2163. return false;
  2164. } else
  2165. equalityOnly = is64BitUnsignedCompare;
  2166. } else
  2167. equalityOnly = is32BitUnsignedCompare;
  2168. if (equalityOnly) {
  2169. // We need to check the uses of the condition register in order to reject
  2170. // non-equality comparisons.
  2171. for (MachineRegisterInfo::use_instr_iterator
  2172. I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
  2173. I != IE; ++I) {
  2174. MachineInstr *UseMI = &*I;
  2175. if (UseMI->getOpcode() == PPC::BCC) {
  2176. PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
  2177. unsigned PredCond = PPC::getPredicateCondition(Pred);
  2178. // We ignore hint bits when checking for non-equality comparisons.
  2179. if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
  2180. return false;
  2181. } else if (UseMI->getOpcode() == PPC::ISEL ||
  2182. UseMI->getOpcode() == PPC::ISEL8) {
  2183. unsigned SubIdx = UseMI->getOperand(3).getSubReg();
  2184. if (SubIdx != PPC::sub_eq)
  2185. return false;
  2186. } else
  2187. return false;
  2188. }
  2189. }
  2190. MachineBasicBlock::iterator I = CmpInstr;
  2191. // Scan forward to find the first use of the compare.
  2192. for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
  2193. ++I) {
  2194. bool FoundUse = false;
  2195. for (MachineRegisterInfo::use_instr_iterator
  2196. J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();
  2197. J != JE; ++J)
  2198. if (&*J == &*I) {
  2199. FoundUse = true;
  2200. break;
  2201. }
  2202. if (FoundUse)
  2203. break;
  2204. }
  2205. SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
  2206. SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
  2207. // There are two possible candidates which can be changed to set CR[01].
  2208. // One is MI, the other is a SUB instruction.
  2209. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
  2210. MachineInstr *Sub = nullptr;
  2211. if (SrcReg2 != 0)
  2212. // MI is not a candidate for CMPrr.
  2213. MI = nullptr;
  2214. // FIXME: Conservatively refuse to convert an instruction which isn't in the
  2215. // same BB as the comparison. This is to allow the check below to avoid calls
  2216. // (and other explicit clobbers); instead we should really check for these
  2217. // more explicitly (in at least a few predecessors).
  2218. else if (MI->getParent() != CmpInstr.getParent())
  2219. return false;
  2220. else if (Value != 0) {
  2221. // The record-form instructions set CR bit based on signed comparison
  2222. // against 0. We try to convert a compare against 1 or -1 into a compare
  2223. // against 0 to exploit record-form instructions. For example, we change
  2224. // the condition "greater than -1" into "greater than or equal to 0"
  2225. // and "less than 1" into "less than or equal to 0".
  2226. // Since we optimize comparison based on a specific branch condition,
  2227. // we don't optimize if condition code is used by more than once.
  2228. if (equalityOnly || !MRI->hasOneUse(CRReg))
  2229. return false;
  2230. MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);
  2231. if (UseMI->getOpcode() != PPC::BCC)
  2232. return false;
  2233. PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();
  2234. unsigned PredCond = PPC::getPredicateCondition(Pred);
  2235. unsigned PredHint = PPC::getPredicateHint(Pred);
  2236. int16_t Immed = (int16_t)Value;
  2237. // When modifying the condition in the predicate, we propagate hint bits
  2238. // from the original predicate to the new one.
  2239. if (Immed == -1 && PredCond == PPC::PRED_GT)
  2240. // We convert "greater than -1" into "greater than or equal to 0",
  2241. // since we are assuming signed comparison by !equalityOnly
  2242. Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);
  2243. else if (Immed == -1 && PredCond == PPC::PRED_LE)
  2244. // We convert "less than or equal to -1" into "less than 0".
  2245. Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);
  2246. else if (Immed == 1 && PredCond == PPC::PRED_LT)
  2247. // We convert "less than 1" into "less than or equal to 0".
  2248. Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);
  2249. else if (Immed == 1 && PredCond == PPC::PRED_GE)
  2250. // We convert "greater than or equal to 1" into "greater than 0".
  2251. Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);
  2252. else
  2253. return false;
  2254. // Convert the comparison and its user to a compare against zero with the
  2255. // appropriate predicate on the branch. Zero comparison might provide
  2256. // optimization opportunities post-RA (see optimization in
  2257. // PPCPreEmitPeephole.cpp).
  2258. UseMI->getOperand(0).setImm(Pred);
  2259. CmpInstr.getOperand(2).setImm(0);
  2260. }
  2261. // Search for Sub.
  2262. --I;
  2263. // Get ready to iterate backward from CmpInstr.
  2264. MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
  2265. for (; I != E && !noSub; --I) {
  2266. const MachineInstr &Instr = *I;
  2267. unsigned IOpC = Instr.getOpcode();
  2268. if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||
  2269. Instr.readsRegister(PPC::CR0, TRI)))
  2270. // This instruction modifies or uses the record condition register after
  2271. // the one we want to change. While we could do this transformation, it
  2272. // would likely not be profitable. This transformation removes one
  2273. // instruction, and so even forcing RA to generate one move probably
  2274. // makes it unprofitable.
  2275. return false;
  2276. // Check whether CmpInstr can be made redundant by the current instruction.
  2277. if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
  2278. OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
  2279. (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
  2280. ((Instr.getOperand(1).getReg() == SrcReg &&
  2281. Instr.getOperand(2).getReg() == SrcReg2) ||
  2282. (Instr.getOperand(1).getReg() == SrcReg2 &&
  2283. Instr.getOperand(2).getReg() == SrcReg))) {
  2284. Sub = &*I;
  2285. break;
  2286. }
  2287. if (I == B)
  2288. // The 'and' is below the comparison instruction.
  2289. return false;
  2290. }
  2291. // Return false if no candidates exist.
  2292. if (!MI && !Sub)
  2293. return false;
  2294. // The single candidate is called MI.
  2295. if (!MI) MI = Sub;
  2296. int NewOpC = -1;
  2297. int MIOpC = MI->getOpcode();
  2298. if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||
  2299. MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)
  2300. NewOpC = MIOpC;
  2301. else {
  2302. NewOpC = PPC::getRecordFormOpcode(MIOpC);
  2303. if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
  2304. NewOpC = MIOpC;
  2305. }
  2306. // FIXME: On the non-embedded POWER architectures, only some of the record
  2307. // forms are fast, and we should use only the fast ones.
  2308. // The defining instruction has a record form (or is already a record
  2309. // form). It is possible, however, that we'll need to reverse the condition
  2310. // code of the users.
  2311. if (NewOpC == -1)
  2312. return false;
  2313. // This transformation should not be performed if `nsw` is missing and is not
  2314. // `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
  2315. // CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
  2316. // CRReg can reflect if compared values are equal, this optz is still valid.
  2317. if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&
  2318. Sub && !Sub->getFlag(MachineInstr::NoSWrap))
  2319. return false;
  2320. // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
  2321. // needs to be updated to be based on SUB. Push the condition code
  2322. // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
  2323. // condition code of these operands will be modified.
  2324. // Here, Value == 0 means we haven't converted comparison against 1 or -1 to
  2325. // comparison against 0, which may modify predicate.
  2326. bool ShouldSwap = false;
  2327. if (Sub && Value == 0) {
  2328. ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
  2329. Sub->getOperand(2).getReg() == SrcReg;
  2330. // The operands to subf are the opposite of sub, so only in the fixed-point
  2331. // case, invert the order.
  2332. ShouldSwap = !ShouldSwap;
  2333. }
  2334. if (ShouldSwap)
  2335. for (MachineRegisterInfo::use_instr_iterator
  2336. I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
  2337. I != IE; ++I) {
  2338. MachineInstr *UseMI = &*I;
  2339. if (UseMI->getOpcode() == PPC::BCC) {
  2340. PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
  2341. unsigned PredCond = PPC::getPredicateCondition(Pred);
  2342. assert((!equalityOnly ||
  2343. PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&
  2344. "Invalid predicate for equality-only optimization");
  2345. (void)PredCond; // To suppress warning in release build.
  2346. PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
  2347. PPC::getSwappedPredicate(Pred)));
  2348. } else if (UseMI->getOpcode() == PPC::ISEL ||
  2349. UseMI->getOpcode() == PPC::ISEL8) {
  2350. unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
  2351. assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
  2352. "Invalid CR bit for equality-only optimization");
  2353. if (NewSubReg == PPC::sub_lt)
  2354. NewSubReg = PPC::sub_gt;
  2355. else if (NewSubReg == PPC::sub_gt)
  2356. NewSubReg = PPC::sub_lt;
  2357. SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
  2358. NewSubReg));
  2359. } else // We need to abort on a user we don't understand.
  2360. return false;
  2361. }
  2362. assert(!(Value != 0 && ShouldSwap) &&
  2363. "Non-zero immediate support and ShouldSwap"
  2364. "may conflict in updating predicate");
  2365. // Create a new virtual register to hold the value of the CR set by the
  2366. // record-form instruction. If the instruction was not previously in
  2367. // record form, then set the kill flag on the CR.
  2368. CmpInstr.eraseFromParent();
  2369. MachineBasicBlock::iterator MII = MI;
  2370. BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
  2371. get(TargetOpcode::COPY), CRReg)
  2372. .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
  2373. // Even if CR0 register were dead before, it is alive now since the
  2374. // instruction we just built uses it.
  2375. MI->clearRegisterDeads(PPC::CR0);
  2376. if (MIOpC != NewOpC) {
  2377. // We need to be careful here: we're replacing one instruction with
  2378. // another, and we need to make sure that we get all of the right
  2379. // implicit uses and defs. On the other hand, the caller may be holding
  2380. // an iterator to this instruction, and so we can't delete it (this is
  2381. // specifically the case if this is the instruction directly after the
  2382. // compare).
  2383. // Rotates are expensive instructions. If we're emitting a record-form
  2384. // rotate that can just be an andi/andis, we should just emit that.
  2385. if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {
  2386. Register GPRRes = MI->getOperand(0).getReg();
  2387. int64_t SH = MI->getOperand(2).getImm();
  2388. int64_t MB = MI->getOperand(3).getImm();
  2389. int64_t ME = MI->getOperand(4).getImm();
  2390. // We can only do this if both the start and end of the mask are in the
  2391. // same halfword.
  2392. bool MBInLoHWord = MB >= 16;
  2393. bool MEInLoHWord = ME >= 16;
  2394. uint64_t Mask = ~0LLU;
  2395. if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {
  2396. Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
  2397. // The mask value needs to shift right 16 if we're emitting andis.
  2398. Mask >>= MBInLoHWord ? 0 : 16;
  2399. NewOpC = MIOpC == PPC::RLWINM
  2400. ? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
  2401. : (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
  2402. } else if (MRI->use_empty(GPRRes) && (ME == 31) &&
  2403. (ME - MB + 1 == SH) && (MB >= 16)) {
  2404. // If we are rotating by the exact number of bits as are in the mask
  2405. // and the mask is in the least significant bits of the register,
  2406. // that's just an andis. (as long as the GPR result has no uses).
  2407. Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);
  2408. Mask >>= 16;
  2409. NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
  2410. }
  2411. // If we've set the mask, we can transform.
  2412. if (Mask != ~0LLU) {
  2413. MI->removeOperand(4);
  2414. MI->removeOperand(3);
  2415. MI->getOperand(2).setImm(Mask);
  2416. NumRcRotatesConvertedToRcAnd++;
  2417. }
  2418. } else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {
  2419. int64_t MB = MI->getOperand(3).getImm();
  2420. if (MB >= 48) {
  2421. uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
  2422. NewOpC = PPC::ANDI8_rec;
  2423. MI->removeOperand(3);
  2424. MI->getOperand(2).setImm(Mask);
  2425. NumRcRotatesConvertedToRcAnd++;
  2426. }
  2427. }
  2428. const MCInstrDesc &NewDesc = get(NewOpC);
  2429. MI->setDesc(NewDesc);
  2430. for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
  2431. if (!MI->definesRegister(ImpDef)) {
  2432. MI->addOperand(*MI->getParent()->getParent(),
  2433. MachineOperand::CreateReg(ImpDef, true, true));
  2434. }
  2435. }
  2436. for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
  2437. if (!MI->readsRegister(ImpUse)) {
  2438. MI->addOperand(*MI->getParent()->getParent(),
  2439. MachineOperand::CreateReg(ImpUse, false, true));
  2440. }
  2441. }
  2442. }
  2443. assert(MI->definesRegister(PPC::CR0) &&
  2444. "Record-form instruction does not define cr0?");
  2445. // Modify the condition code of operands in OperandsToUpdate.
  2446. // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
  2447. // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
  2448. for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
  2449. PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
  2450. for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
  2451. SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
  2452. return true;
  2453. }
  2454. bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
  2455. MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
  2456. if (MRI->isSSA())
  2457. return false;
  2458. Register SrcReg, SrcReg2;
  2459. int64_t CmpMask, CmpValue;
  2460. if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
  2461. return false;
  2462. // Try to optimize the comparison against 0.
  2463. if (CmpValue || !CmpMask || SrcReg2)
  2464. return false;
  2465. // The record forms set the condition register based on a signed comparison
  2466. // with zero (see comments in optimizeCompareInstr). Since we can't do the
  2467. // equality checks in post-RA, we are more restricted on a unsigned
  2468. // comparison.
  2469. unsigned Opc = CmpMI.getOpcode();
  2470. if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
  2471. return false;
  2472. // The record forms are always based on a 64-bit comparison on PPC64
  2473. // (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
  2474. // comparison. Since we can't do the equality checks in post-RA, we bail out
  2475. // the case.
  2476. if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
  2477. return false;
  2478. // CmpMI can't be deleted if it has implicit def.
  2479. if (CmpMI.hasImplicitDef())
  2480. return false;
  2481. bool SrcRegHasOtherUse = false;
  2482. MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
  2483. if (!SrcMI || !SrcMI->definesRegister(SrcReg))
  2484. return false;
  2485. MachineOperand RegMO = CmpMI.getOperand(0);
  2486. Register CRReg = RegMO.getReg();
  2487. if (CRReg != PPC::CR0)
  2488. return false;
  2489. // Make sure there is no def/use of CRReg between SrcMI and CmpMI.
  2490. bool SeenUseOfCRReg = false;
  2491. bool IsCRRegKilled = false;
  2492. if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
  2493. SeenUseOfCRReg) ||
  2494. SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
  2495. return false;
  2496. int SrcMIOpc = SrcMI->getOpcode();
  2497. int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
  2498. if (NewOpC == -1)
  2499. return false;
  2500. LLVM_DEBUG(dbgs() << "Replace Instr: ");
  2501. LLVM_DEBUG(SrcMI->dump());
  2502. const MCInstrDesc &NewDesc = get(NewOpC);
  2503. SrcMI->setDesc(NewDesc);
  2504. MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
  2505. .addReg(CRReg, RegState::ImplicitDefine);
  2506. SrcMI->clearRegisterDeads(CRReg);
  2507. // Fix up killed/dead flag for SrcReg after transformation.
  2508. if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill())
  2509. fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg);
  2510. assert(SrcMI->definesRegister(PPC::CR0) &&
  2511. "Record-form instruction does not define cr0?");
  2512. LLVM_DEBUG(dbgs() << "with: ");
  2513. LLVM_DEBUG(SrcMI->dump());
  2514. LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
  2515. LLVM_DEBUG(CmpMI.dump());
  2516. return true;
  2517. }
  2518. bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
  2519. const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
  2520. int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
  2521. const TargetRegisterInfo *TRI) const {
  2522. const MachineOperand *BaseOp;
  2523. OffsetIsScalable = false;
  2524. if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
  2525. return false;
  2526. BaseOps.push_back(BaseOp);
  2527. return true;
  2528. }
  2529. static bool isLdStSafeToCluster(const MachineInstr &LdSt,
  2530. const TargetRegisterInfo *TRI) {
  2531. // If this is a volatile load/store, don't mess with it.
  2532. if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)
  2533. return false;
  2534. if (LdSt.getOperand(2).isFI())
  2535. return true;
  2536. assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");
  2537. // Can't cluster if the instruction modifies the base register
  2538. // or it is update form. e.g. ld r2,3(r2)
  2539. if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))
  2540. return false;
  2541. return true;
  2542. }
  2543. // Only cluster instruction pair that have the same opcode, and they are
  2544. // clusterable according to PowerPC specification.
  2545. static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
  2546. const PPCSubtarget &Subtarget) {
  2547. switch (FirstOpc) {
  2548. default:
  2549. return false;
  2550. case PPC::STD:
  2551. case PPC::STFD:
  2552. case PPC::STXSD:
  2553. case PPC::DFSTOREf64:
  2554. return FirstOpc == SecondOpc;
  2555. // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
  2556. // 32bit and 64bit instruction selection. They are clusterable pair though
  2557. // they are different opcode.
  2558. case PPC::STW:
  2559. case PPC::STW8:
  2560. return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;
  2561. }
  2562. }
  2563. bool PPCInstrInfo::shouldClusterMemOps(
  2564. ArrayRef<const MachineOperand *> BaseOps1,
  2565. ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
  2566. unsigned NumBytes) const {
  2567. assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
  2568. const MachineOperand &BaseOp1 = *BaseOps1.front();
  2569. const MachineOperand &BaseOp2 = *BaseOps2.front();
  2570. assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
  2571. "Only base registers and frame indices are supported.");
  2572. // The NumLoads means the number of loads that has been clustered.
  2573. // Don't cluster memory op if there are already two ops clustered at least.
  2574. if (NumLoads > 2)
  2575. return false;
  2576. // Cluster the load/store only when they have the same base
  2577. // register or FI.
  2578. if ((BaseOp1.isReg() != BaseOp2.isReg()) ||
  2579. (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||
  2580. (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
  2581. return false;
  2582. // Check if the load/store are clusterable according to the PowerPC
  2583. // specification.
  2584. const MachineInstr &FirstLdSt = *BaseOp1.getParent();
  2585. const MachineInstr &SecondLdSt = *BaseOp2.getParent();
  2586. unsigned FirstOpc = FirstLdSt.getOpcode();
  2587. unsigned SecondOpc = SecondLdSt.getOpcode();
  2588. const TargetRegisterInfo *TRI = &getRegisterInfo();
  2589. // Cluster the load/store only when they have the same opcode, and they are
  2590. // clusterable opcode according to PowerPC specification.
  2591. if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
  2592. return false;
  2593. // Can't cluster load/store that have ordered or volatile memory reference.
  2594. if (!isLdStSafeToCluster(FirstLdSt, TRI) ||
  2595. !isLdStSafeToCluster(SecondLdSt, TRI))
  2596. return false;
  2597. int64_t Offset1 = 0, Offset2 = 0;
  2598. unsigned Width1 = 0, Width2 = 0;
  2599. const MachineOperand *Base1 = nullptr, *Base2 = nullptr;
  2600. if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||
  2601. !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||
  2602. Width1 != Width2)
  2603. return false;
  2604. assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
  2605. "getMemOperandWithOffsetWidth return incorrect base op");
  2606. // The caller should already have ordered FirstMemOp/SecondMemOp by offset.
  2607. assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
  2608. return Offset1 + Width1 == Offset2;
  2609. }
  2610. /// GetInstSize - Return the number of bytes of code the specified
  2611. /// instruction may be. This returns the maximum number of bytes.
  2612. ///
  2613. unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
  2614. unsigned Opcode = MI.getOpcode();
  2615. if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {
  2616. const MachineFunction *MF = MI.getParent()->getParent();
  2617. const char *AsmStr = MI.getOperand(0).getSymbolName();
  2618. return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
  2619. } else if (Opcode == TargetOpcode::STACKMAP) {
  2620. StackMapOpers Opers(&MI);
  2621. return Opers.getNumPatchBytes();
  2622. } else if (Opcode == TargetOpcode::PATCHPOINT) {
  2623. PatchPointOpers Opers(&MI);
  2624. return Opers.getNumPatchBytes();
  2625. } else {
  2626. return get(Opcode).getSize();
  2627. }
  2628. }
  2629. std::pair<unsigned, unsigned>
  2630. PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
  2631. const unsigned Mask = PPCII::MO_ACCESS_MASK;
  2632. return std::make_pair(TF & Mask, TF & ~Mask);
  2633. }
  2634. ArrayRef<std::pair<unsigned, const char *>>
  2635. PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
  2636. using namespace PPCII;
  2637. static const std::pair<unsigned, const char *> TargetFlags[] = {
  2638. {MO_LO, "ppc-lo"},
  2639. {MO_HA, "ppc-ha"},
  2640. {MO_TPREL_LO, "ppc-tprel-lo"},
  2641. {MO_TPREL_HA, "ppc-tprel-ha"},
  2642. {MO_DTPREL_LO, "ppc-dtprel-lo"},
  2643. {MO_TLSLD_LO, "ppc-tlsld-lo"},
  2644. {MO_TOC_LO, "ppc-toc-lo"},
  2645. {MO_TLS, "ppc-tls"}};
  2646. return ArrayRef(TargetFlags);
  2647. }
  2648. ArrayRef<std::pair<unsigned, const char *>>
  2649. PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
  2650. using namespace PPCII;
  2651. static const std::pair<unsigned, const char *> TargetFlags[] = {
  2652. {MO_PLT, "ppc-plt"},
  2653. {MO_PIC_FLAG, "ppc-pic"},
  2654. {MO_PCREL_FLAG, "ppc-pcrel"},
  2655. {MO_GOT_FLAG, "ppc-got"},
  2656. {MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
  2657. {MO_TLSGD_FLAG, "ppc-tlsgd"},
  2658. {MO_TLSLD_FLAG, "ppc-tlsld"},
  2659. {MO_TPREL_FLAG, "ppc-tprel"},
  2660. {MO_TLSGDM_FLAG, "ppc-tlsgdm"},
  2661. {MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
  2662. {MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
  2663. {MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"}};
  2664. return ArrayRef(TargetFlags);
  2665. }
  2666. // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
  2667. // The VSX versions have the advantage of a full 64-register target whereas
  2668. // the FP ones have the advantage of lower latency and higher throughput. So
  2669. // what we are after is using the faster instructions in low register pressure
  2670. // situations and using the larger register file in high register pressure
  2671. // situations.
  2672. bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
  2673. unsigned UpperOpcode, LowerOpcode;
  2674. switch (MI.getOpcode()) {
  2675. case PPC::DFLOADf32:
  2676. UpperOpcode = PPC::LXSSP;
  2677. LowerOpcode = PPC::LFS;
  2678. break;
  2679. case PPC::DFLOADf64:
  2680. UpperOpcode = PPC::LXSD;
  2681. LowerOpcode = PPC::LFD;
  2682. break;
  2683. case PPC::DFSTOREf32:
  2684. UpperOpcode = PPC::STXSSP;
  2685. LowerOpcode = PPC::STFS;
  2686. break;
  2687. case PPC::DFSTOREf64:
  2688. UpperOpcode = PPC::STXSD;
  2689. LowerOpcode = PPC::STFD;
  2690. break;
  2691. case PPC::XFLOADf32:
  2692. UpperOpcode = PPC::LXSSPX;
  2693. LowerOpcode = PPC::LFSX;
  2694. break;
  2695. case PPC::XFLOADf64:
  2696. UpperOpcode = PPC::LXSDX;
  2697. LowerOpcode = PPC::LFDX;
  2698. break;
  2699. case PPC::XFSTOREf32:
  2700. UpperOpcode = PPC::STXSSPX;
  2701. LowerOpcode = PPC::STFSX;
  2702. break;
  2703. case PPC::XFSTOREf64:
  2704. UpperOpcode = PPC::STXSDX;
  2705. LowerOpcode = PPC::STFDX;
  2706. break;
  2707. case PPC::LIWAX:
  2708. UpperOpcode = PPC::LXSIWAX;
  2709. LowerOpcode = PPC::LFIWAX;
  2710. break;
  2711. case PPC::LIWZX:
  2712. UpperOpcode = PPC::LXSIWZX;
  2713. LowerOpcode = PPC::LFIWZX;
  2714. break;
  2715. case PPC::STIWX:
  2716. UpperOpcode = PPC::STXSIWX;
  2717. LowerOpcode = PPC::STFIWX;
  2718. break;
  2719. default:
  2720. llvm_unreachable("Unknown Operation!");
  2721. }
  2722. Register TargetReg = MI.getOperand(0).getReg();
  2723. unsigned Opcode;
  2724. if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
  2725. (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
  2726. Opcode = LowerOpcode;
  2727. else
  2728. Opcode = UpperOpcode;
  2729. MI.setDesc(get(Opcode));
  2730. return true;
  2731. }
  2732. static bool isAnImmediateOperand(const MachineOperand &MO) {
  2733. return MO.isCPI() || MO.isGlobal() || MO.isImm();
  2734. }
  2735. bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
  2736. auto &MBB = *MI.getParent();
  2737. auto DL = MI.getDebugLoc();
  2738. switch (MI.getOpcode()) {
  2739. case PPC::BUILD_UACC: {
  2740. MCRegister ACC = MI.getOperand(0).getReg();
  2741. MCRegister UACC = MI.getOperand(1).getReg();
  2742. if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
  2743. MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
  2744. MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
  2745. // FIXME: This can easily be improved to look up to the top of the MBB
  2746. // to see if the inputs are XXLOR's. If they are and SrcReg is killed,
  2747. // we can just re-target any such XXLOR's to DstVSR + offset.
  2748. for (int VecNo = 0; VecNo < 4; VecNo++)
  2749. BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
  2750. .addReg(SrcVSR + VecNo)
  2751. .addReg(SrcVSR + VecNo);
  2752. }
  2753. // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
  2754. // So after building the 4 copies, we can replace the BUILD_UACC instruction
  2755. // with a NOP.
  2756. [[fallthrough]];
  2757. }
  2758. case PPC::KILL_PAIR: {
  2759. MI.setDesc(get(PPC::UNENCODED_NOP));
  2760. MI.removeOperand(1);
  2761. MI.removeOperand(0);
  2762. return true;
  2763. }
  2764. case TargetOpcode::LOAD_STACK_GUARD: {
  2765. assert(Subtarget.isTargetLinux() &&
  2766. "Only Linux target is expected to contain LOAD_STACK_GUARD");
  2767. const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;
  2768. const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
  2769. MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
  2770. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  2771. .addImm(Offset)
  2772. .addReg(Reg);
  2773. return true;
  2774. }
  2775. case PPC::DFLOADf32:
  2776. case PPC::DFLOADf64:
  2777. case PPC::DFSTOREf32:
  2778. case PPC::DFSTOREf64: {
  2779. assert(Subtarget.hasP9Vector() &&
  2780. "Invalid D-Form Pseudo-ops on Pre-P9 target.");
  2781. assert(MI.getOperand(2).isReg() &&
  2782. isAnImmediateOperand(MI.getOperand(1)) &&
  2783. "D-form op must have register and immediate operands");
  2784. return expandVSXMemPseudo(MI);
  2785. }
  2786. case PPC::XFLOADf32:
  2787. case PPC::XFSTOREf32:
  2788. case PPC::LIWAX:
  2789. case PPC::LIWZX:
  2790. case PPC::STIWX: {
  2791. assert(Subtarget.hasP8Vector() &&
  2792. "Invalid X-Form Pseudo-ops on Pre-P8 target.");
  2793. assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
  2794. "X-form op must have register and register operands");
  2795. return expandVSXMemPseudo(MI);
  2796. }
  2797. case PPC::XFLOADf64:
  2798. case PPC::XFSTOREf64: {
  2799. assert(Subtarget.hasVSX() &&
  2800. "Invalid X-Form Pseudo-ops on target that has no VSX.");
  2801. assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&
  2802. "X-form op must have register and register operands");
  2803. return expandVSXMemPseudo(MI);
  2804. }
  2805. case PPC::SPILLTOVSR_LD: {
  2806. Register TargetReg = MI.getOperand(0).getReg();
  2807. if (PPC::VSFRCRegClass.contains(TargetReg)) {
  2808. MI.setDesc(get(PPC::DFLOADf64));
  2809. return expandPostRAPseudo(MI);
  2810. }
  2811. else
  2812. MI.setDesc(get(PPC::LD));
  2813. return true;
  2814. }
  2815. case PPC::SPILLTOVSR_ST: {
  2816. Register SrcReg = MI.getOperand(0).getReg();
  2817. if (PPC::VSFRCRegClass.contains(SrcReg)) {
  2818. NumStoreSPILLVSRRCAsVec++;
  2819. MI.setDesc(get(PPC::DFSTOREf64));
  2820. return expandPostRAPseudo(MI);
  2821. } else {
  2822. NumStoreSPILLVSRRCAsGpr++;
  2823. MI.setDesc(get(PPC::STD));
  2824. }
  2825. return true;
  2826. }
  2827. case PPC::SPILLTOVSR_LDX: {
  2828. Register TargetReg = MI.getOperand(0).getReg();
  2829. if (PPC::VSFRCRegClass.contains(TargetReg))
  2830. MI.setDesc(get(PPC::LXSDX));
  2831. else
  2832. MI.setDesc(get(PPC::LDX));
  2833. return true;
  2834. }
  2835. case PPC::SPILLTOVSR_STX: {
  2836. Register SrcReg = MI.getOperand(0).getReg();
  2837. if (PPC::VSFRCRegClass.contains(SrcReg)) {
  2838. NumStoreSPILLVSRRCAsVec++;
  2839. MI.setDesc(get(PPC::STXSDX));
  2840. } else {
  2841. NumStoreSPILLVSRRCAsGpr++;
  2842. MI.setDesc(get(PPC::STDX));
  2843. }
  2844. return true;
  2845. }
  2846. // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
  2847. case PPC::CFENCE8: {
  2848. auto Val = MI.getOperand(0).getReg();
  2849. BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
  2850. BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
  2851. .addImm(PPC::PRED_NE_MINUS)
  2852. .addReg(PPC::CR7)
  2853. .addImm(1);
  2854. MI.setDesc(get(PPC::ISYNC));
  2855. MI.removeOperand(0);
  2856. return true;
  2857. }
  2858. }
  2859. return false;
  2860. }
  2861. // Essentially a compile-time implementation of a compare->isel sequence.
  2862. // It takes two constants to compare, along with the true/false registers
  2863. // and the comparison type (as a subreg to a CR field) and returns one
  2864. // of the true/false registers, depending on the comparison results.
  2865. static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
  2866. unsigned TrueReg, unsigned FalseReg,
  2867. unsigned CRSubReg) {
  2868. // Signed comparisons. The immediates are assumed to be sign-extended.
  2869. if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {
  2870. switch (CRSubReg) {
  2871. default: llvm_unreachable("Unknown integer comparison type.");
  2872. case PPC::sub_lt:
  2873. return Imm1 < Imm2 ? TrueReg : FalseReg;
  2874. case PPC::sub_gt:
  2875. return Imm1 > Imm2 ? TrueReg : FalseReg;
  2876. case PPC::sub_eq:
  2877. return Imm1 == Imm2 ? TrueReg : FalseReg;
  2878. }
  2879. }
  2880. // Unsigned comparisons.
  2881. else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {
  2882. switch (CRSubReg) {
  2883. default: llvm_unreachable("Unknown integer comparison type.");
  2884. case PPC::sub_lt:
  2885. return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
  2886. case PPC::sub_gt:
  2887. return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
  2888. case PPC::sub_eq:
  2889. return Imm1 == Imm2 ? TrueReg : FalseReg;
  2890. }
  2891. }
  2892. return PPC::NoRegister;
  2893. }
  2894. void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
  2895. unsigned OpNo,
  2896. int64_t Imm) const {
  2897. assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
  2898. // Replace the REG with the Immediate.
  2899. Register InUseReg = MI.getOperand(OpNo).getReg();
  2900. MI.getOperand(OpNo).ChangeToImmediate(Imm);
  2901. // We need to make sure that the MI didn't have any implicit use
  2902. // of this REG any more. We don't call MI.implicit_operands().empty() to
  2903. // return early, since MI's MCID might be changed in calling context, as a
  2904. // result its number of explicit operands may be changed, thus the begin of
  2905. // implicit operand is changed.
  2906. const TargetRegisterInfo *TRI = &getRegisterInfo();
  2907. int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, false, TRI);
  2908. if (UseOpIdx >= 0) {
  2909. MachineOperand &MO = MI.getOperand(UseOpIdx);
  2910. if (MO.isImplicit())
  2911. // The operands must always be in the following order:
  2912. // - explicit reg defs,
  2913. // - other explicit operands (reg uses, immediates, etc.),
  2914. // - implicit reg defs
  2915. // - implicit reg uses
  2916. // Therefore, removing the implicit operand won't change the explicit
  2917. // operands layout.
  2918. MI.removeOperand(UseOpIdx);
  2919. }
  2920. }
  2921. // Replace an instruction with one that materializes a constant (and sets
  2922. // CR0 if the original instruction was a record-form instruction).
  2923. void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
  2924. const LoadImmediateInfo &LII) const {
  2925. // Remove existing operands.
  2926. int OperandToKeep = LII.SetCR ? 1 : 0;
  2927. for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)
  2928. MI.removeOperand(i);
  2929. // Replace the instruction.
  2930. if (LII.SetCR) {
  2931. MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
  2932. // Set the immediate.
  2933. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  2934. .addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);
  2935. return;
  2936. }
  2937. else
  2938. MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));
  2939. // Set the immediate.
  2940. MachineInstrBuilder(*MI.getParent()->getParent(), MI)
  2941. .addImm(LII.Imm);
  2942. }
  2943. MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI,
  2944. bool &SeenIntermediateUse) const {
  2945. assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
  2946. "Should be called after register allocation.");
  2947. const TargetRegisterInfo *TRI = &getRegisterInfo();
  2948. MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
  2949. It++;
  2950. SeenIntermediateUse = false;
  2951. for (; It != E; ++It) {
  2952. if (It->modifiesRegister(Reg, TRI))
  2953. return &*It;
  2954. if (It->readsRegister(Reg, TRI))
  2955. SeenIntermediateUse = true;
  2956. }
  2957. return nullptr;
  2958. }
  2959. void PPCInstrInfo::materializeImmPostRA(MachineBasicBlock &MBB,
  2960. MachineBasicBlock::iterator MBBI,
  2961. const DebugLoc &DL, Register Reg,
  2962. int64_t Imm) const {
  2963. assert(!MBB.getParent()->getRegInfo().isSSA() &&
  2964. "Register should be in non-SSA form after RA");
  2965. bool isPPC64 = Subtarget.isPPC64();
  2966. // FIXME: Materialization here is not optimal.
  2967. // For some special bit patterns we can use less instructions.
  2968. // See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
  2969. if (isInt<16>(Imm)) {
  2970. BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);
  2971. } else if (isInt<32>(Imm)) {
  2972. BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)
  2973. .addImm(Imm >> 16);
  2974. if (Imm & 0xFFFF)
  2975. BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)
  2976. .addReg(Reg, RegState::Kill)
  2977. .addImm(Imm & 0xFFFF);
  2978. } else {
  2979. assert(isPPC64 && "Materializing 64-bit immediate to single register is "
  2980. "only supported in PPC64");
  2981. BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);
  2982. if ((Imm >> 32) & 0xFFFF)
  2983. BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
  2984. .addReg(Reg, RegState::Kill)
  2985. .addImm((Imm >> 32) & 0xFFFF);
  2986. BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)
  2987. .addReg(Reg, RegState::Kill)
  2988. .addImm(32)
  2989. .addImm(31);
  2990. BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)
  2991. .addReg(Reg, RegState::Kill)
  2992. .addImm((Imm >> 16) & 0xFFFF);
  2993. if (Imm & 0xFFFF)
  2994. BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)
  2995. .addReg(Reg, RegState::Kill)
  2996. .addImm(Imm & 0xFFFF);
  2997. }
  2998. }
  2999. MachineInstr *PPCInstrInfo::getForwardingDefMI(
  3000. MachineInstr &MI,
  3001. unsigned &OpNoForForwarding,
  3002. bool &SeenIntermediateUse) const {
  3003. OpNoForForwarding = ~0U;
  3004. MachineInstr *DefMI = nullptr;
  3005. MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
  3006. const TargetRegisterInfo *TRI = &getRegisterInfo();
  3007. // If we're in SSA, get the defs through the MRI. Otherwise, only look
  3008. // within the basic block to see if the register is defined using an
  3009. // LI/LI8/ADDI/ADDI8.
  3010. if (MRI->isSSA()) {
  3011. for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
  3012. if (!MI.getOperand(i).isReg())
  3013. continue;
  3014. Register Reg = MI.getOperand(i).getReg();
  3015. if (!Reg.isVirtual())
  3016. continue;
  3017. Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);
  3018. if (TrueReg.isVirtual()) {
  3019. MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);
  3020. if (DefMIForTrueReg->getOpcode() == PPC::LI ||
  3021. DefMIForTrueReg->getOpcode() == PPC::LI8 ||
  3022. DefMIForTrueReg->getOpcode() == PPC::ADDI ||
  3023. DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
  3024. OpNoForForwarding = i;
  3025. DefMI = DefMIForTrueReg;
  3026. // The ADDI and LI operand maybe exist in one instruction at same
  3027. // time. we prefer to fold LI operand as LI only has one Imm operand
  3028. // and is more possible to be converted. So if current DefMI is
  3029. // ADDI/ADDI8, we continue to find possible LI/LI8.
  3030. if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)
  3031. break;
  3032. }
  3033. }
  3034. }
  3035. } else {
  3036. // Looking back through the definition for each operand could be expensive,
  3037. // so exit early if this isn't an instruction that either has an immediate
  3038. // form or is already an immediate form that we can handle.
  3039. ImmInstrInfo III;
  3040. unsigned Opc = MI.getOpcode();
  3041. bool ConvertibleImmForm =
  3042. Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||
  3043. Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||
  3044. Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||
  3045. Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||
  3046. Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||
  3047. Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||
  3048. Opc == PPC::RLWINM8_rec;
  3049. bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())
  3050. ? isVFRegister(MI.getOperand(0).getReg())
  3051. : false;
  3052. if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))
  3053. return nullptr;
  3054. // Don't convert or %X, %Y, %Y since that's just a register move.
  3055. if ((Opc == PPC::OR || Opc == PPC::OR8) &&
  3056. MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
  3057. return nullptr;
  3058. for (int i = 1, e = MI.getNumOperands(); i < e; i++) {
  3059. MachineOperand &MO = MI.getOperand(i);
  3060. SeenIntermediateUse = false;
  3061. if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
  3062. Register Reg = MI.getOperand(i).getReg();
  3063. // If we see another use of this reg between the def and the MI,
  3064. // we want to flag it so the def isn't deleted.
  3065. MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
  3066. if (DefMI) {
  3067. // Is this register defined by some form of add-immediate (including
  3068. // load-immediate) within this basic block?
  3069. switch (DefMI->getOpcode()) {
  3070. default:
  3071. break;
  3072. case PPC::LI:
  3073. case PPC::LI8:
  3074. case PPC::ADDItocL:
  3075. case PPC::ADDI:
  3076. case PPC::ADDI8:
  3077. OpNoForForwarding = i;
  3078. return DefMI;
  3079. }
  3080. }
  3081. }
  3082. }
  3083. }
  3084. return OpNoForForwarding == ~0U ? nullptr : DefMI;
  3085. }
  3086. unsigned PPCInstrInfo::getSpillTarget() const {
  3087. // With P10, we may need to spill paired vector registers or accumulator
  3088. // registers. MMA implies paired vectors, so we can just check that.
  3089. bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
  3090. return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
  3091. 2 : Subtarget.hasP9Vector() ?
  3092. 1 : 0;
  3093. }
  3094. ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
  3095. return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
  3096. }
  3097. ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
  3098. return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
  3099. }
  3100. void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
  3101. unsigned RegNo) const {
  3102. // Conservatively clear kill flag for the register if the instructions are in
  3103. // different basic blocks and in SSA form, because the kill flag may no longer
  3104. // be right. There is no need to bother with dead flags since defs with no
  3105. // uses will be handled by DCE.
  3106. MachineRegisterInfo &MRI = StartMI->getParent()->getParent()->getRegInfo();
  3107. if (MRI.isSSA() && (StartMI->getParent() != EndMI->getParent())) {
  3108. MRI.clearKillFlags(RegNo);
  3109. return;
  3110. }
  3111. // Instructions between [StartMI, EndMI] should be in same basic block.
  3112. assert((StartMI->getParent() == EndMI->getParent()) &&
  3113. "Instructions are not in same basic block");
  3114. // If before RA, StartMI may be def through COPY, we need to adjust it to the
  3115. // real def. See function getForwardingDefMI.
  3116. if (MRI.isSSA()) {
  3117. bool Reads, Writes;
  3118. std::tie(Reads, Writes) = StartMI->readsWritesVirtualRegister(RegNo);
  3119. if (!Reads && !Writes) {
  3120. assert(Register::isVirtualRegister(RegNo) &&
  3121. "Must be a virtual register");
  3122. // Get real def and ignore copies.
  3123. StartMI = MRI.getVRegDef(RegNo);
  3124. }
  3125. }
  3126. bool IsKillSet = false;
  3127. auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
  3128. MachineOperand &MO = MI.getOperand(Index);
  3129. if (MO.isReg() && MO.isUse() && MO.isKill() &&
  3130. getRegisterInfo().regsOverlap(MO.getReg(), RegNo))
  3131. MO.setIsKill(false);
  3132. };
  3133. // Set killed flag for EndMI.
  3134. // No need to do anything if EndMI defines RegNo.
  3135. int UseIndex =
  3136. EndMI->findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
  3137. if (UseIndex != -1) {
  3138. EndMI->getOperand(UseIndex).setIsKill(true);
  3139. IsKillSet = true;
  3140. // Clear killed flag for other EndMI operands related to RegNo. In some
  3141. // upexpected cases, killed may be set multiple times for same register
  3142. // operand in same MI.
  3143. for (int i = 0, e = EndMI->getNumOperands(); i != e; ++i)
  3144. if (i != UseIndex)
  3145. clearOperandKillInfo(*EndMI, i);
  3146. }
  3147. // Walking the inst in reverse order (EndMI -> StartMI].
  3148. MachineBasicBlock::reverse_iterator It = *EndMI;
  3149. MachineBasicBlock::reverse_iterator E = EndMI->getParent()->rend();
  3150. // EndMI has been handled above, skip it here.
  3151. It++;
  3152. MachineOperand *MO = nullptr;
  3153. for (; It != E; ++It) {
  3154. // Skip insturctions which could not be a def/use of RegNo.
  3155. if (It->isDebugInstr() || It->isPosition())
  3156. continue;
  3157. // Clear killed flag for all It operands related to RegNo. In some
  3158. // upexpected cases, killed may be set multiple times for same register
  3159. // operand in same MI.
  3160. for (int i = 0, e = It->getNumOperands(); i != e; ++i)
  3161. clearOperandKillInfo(*It, i);
  3162. // If killed is not set, set killed for its last use or set dead for its def
  3163. // if no use found.
  3164. if (!IsKillSet) {
  3165. if ((MO = It->findRegisterUseOperand(RegNo, false, &getRegisterInfo()))) {
  3166. // Use found, set it killed.
  3167. IsKillSet = true;
  3168. MO->setIsKill(true);
  3169. continue;
  3170. } else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
  3171. &getRegisterInfo()))) {
  3172. // No use found, set dead for its def.
  3173. assert(&*It == StartMI && "No new def between StartMI and EndMI.");
  3174. MO->setIsDead(true);
  3175. break;
  3176. }
  3177. }
  3178. if ((&*It) == StartMI)
  3179. break;
  3180. }
  3181. // Ensure RegMo liveness is killed after EndMI.
  3182. assert((IsKillSet || (MO && MO->isDead())) &&
  3183. "RegNo should be killed or dead");
  3184. }
  3185. // This opt tries to convert the following imm form to an index form to save an
  3186. // add for stack variables.
  3187. // Return false if no such pattern found.
  3188. //
  3189. // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
  3190. // ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
  3191. // Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
  3192. //
  3193. // can be converted to:
  3194. //
  3195. // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
  3196. // Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
  3197. //
  3198. // In order to eliminate ADD instr, make sure that:
  3199. // 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
  3200. // new ADDI instr and ADDI can only take int16 Imm.
  3201. // 2: ToBeChangedReg must be killed in ADD instr and there is no other use
  3202. // between ADDI and ADD instr since its original def in ADDI will be changed
  3203. // in new ADDI instr. And also there should be no new def for it between
  3204. // ADD and Imm instr as ToBeChangedReg will be used in Index instr.
  3205. // 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
  3206. // between ADD and Imm instr since ADD instr will be eliminated.
  3207. // 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
  3208. // moved to Index instr.
  3209. bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
  3210. MachineFunction *MF = MI.getParent()->getParent();
  3211. MachineRegisterInfo *MRI = &MF->getRegInfo();
  3212. bool PostRA = !MRI->isSSA();
  3213. // Do this opt after PEI which is after RA. The reason is stack slot expansion
  3214. // in PEI may expose such opportunities since in PEI, stack slot offsets to
  3215. // frame base(OffsetAddi) are determined.
  3216. if (!PostRA)
  3217. return false;
  3218. unsigned ToBeDeletedReg = 0;
  3219. int64_t OffsetImm = 0;
  3220. unsigned XFormOpcode = 0;
  3221. ImmInstrInfo III;
  3222. // Check if Imm instr meets requirement.
  3223. if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,
  3224. III))
  3225. return false;
  3226. bool OtherIntermediateUse = false;
  3227. MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);
  3228. // Exit if there is other use between ADD and Imm instr or no def found.
  3229. if (OtherIntermediateUse || !ADDMI)
  3230. return false;
  3231. // Check if ADD instr meets requirement.
  3232. if (!isADDInstrEligibleForFolding(*ADDMI))
  3233. return false;
  3234. unsigned ScaleRegIdx = 0;
  3235. int64_t OffsetAddi = 0;
  3236. MachineInstr *ADDIMI = nullptr;
  3237. // Check if there is a valid ToBeChangedReg in ADDMI.
  3238. // 1: It must be killed.
  3239. // 2: Its definition must be a valid ADDIMI.
  3240. // 3: It must satify int16 offset requirement.
  3241. if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))
  3242. ScaleRegIdx = 2;
  3243. else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))
  3244. ScaleRegIdx = 1;
  3245. else
  3246. return false;
  3247. assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
  3248. Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();
  3249. Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();
  3250. auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
  3251. MachineBasicBlock::iterator End) {
  3252. for (auto It = ++Start; It != End; It++)
  3253. if (It->modifiesRegister(Reg, &getRegisterInfo()))
  3254. return true;
  3255. return false;
  3256. };
  3257. // We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
  3258. // treated as special zero when ScaleReg is R0/X0 register.
  3259. if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
  3260. (ScaleReg == PPC::R0 || ScaleReg == PPC::X0))
  3261. return false;
  3262. // Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
  3263. // and Imm Instr.
  3264. if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))
  3265. return false;
  3266. // Now start to do the transformation.
  3267. LLVM_DEBUG(dbgs() << "Replace instruction: "
  3268. << "\n");
  3269. LLVM_DEBUG(ADDIMI->dump());
  3270. LLVM_DEBUG(ADDMI->dump());
  3271. LLVM_DEBUG(MI.dump());
  3272. LLVM_DEBUG(dbgs() << "with: "
  3273. << "\n");
  3274. // Update ADDI instr.
  3275. ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);
  3276. // Update Imm instr.
  3277. MI.setDesc(get(XFormOpcode));
  3278. MI.getOperand(III.ImmOpNo)
  3279. .ChangeToRegister(ScaleReg, false, false,
  3280. ADDMI->getOperand(ScaleRegIdx).isKill());
  3281. MI.getOperand(III.OpNoForForwarding)
  3282. .ChangeToRegister(ToBeChangedReg, false, false, true);
  3283. // Eliminate ADD instr.
  3284. ADDMI->eraseFromParent();
  3285. LLVM_DEBUG(ADDIMI->dump());
  3286. LLVM_DEBUG(MI.dump());
  3287. return true;
  3288. }
  3289. bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
  3290. int64_t &Imm) const {
  3291. unsigned Opc = ADDIMI.getOpcode();
  3292. // Exit if the instruction is not ADDI.
  3293. if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
  3294. return false;
  3295. // The operand may not necessarily be an immediate - it could be a relocation.
  3296. if (!ADDIMI.getOperand(2).isImm())
  3297. return false;
  3298. Imm = ADDIMI.getOperand(2).getImm();
  3299. return true;
  3300. }
  3301. bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {
  3302. unsigned Opc = ADDMI.getOpcode();
  3303. // Exit if the instruction is not ADD.
  3304. return Opc == PPC::ADD4 || Opc == PPC::ADD8;
  3305. }
  3306. bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
  3307. unsigned &ToBeDeletedReg,
  3308. unsigned &XFormOpcode,
  3309. int64_t &OffsetImm,
  3310. ImmInstrInfo &III) const {
  3311. // Only handle load/store.
  3312. if (!MI.mayLoadOrStore())
  3313. return false;
  3314. unsigned Opc = MI.getOpcode();
  3315. XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);
  3316. // Exit if instruction has no index form.
  3317. if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
  3318. return false;
  3319. // TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
  3320. if (!instrHasImmForm(XFormOpcode, isVFRegister(MI.getOperand(0).getReg()),
  3321. III, true))
  3322. return false;
  3323. if (!III.IsSummingOperands)
  3324. return false;
  3325. MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);
  3326. MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);
  3327. // Only support imm operands, not relocation slots or others.
  3328. if (!ImmOperand.isImm())
  3329. return false;
  3330. assert(RegOperand.isReg() && "Instruction format is not right");
  3331. // There are other use for ToBeDeletedReg after Imm instr, can not delete it.
  3332. if (!RegOperand.isKill())
  3333. return false;
  3334. ToBeDeletedReg = RegOperand.getReg();
  3335. OffsetImm = ImmOperand.getImm();
  3336. return true;
  3337. }
  3338. bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,
  3339. MachineInstr *&ADDIMI,
  3340. int64_t &OffsetAddi,
  3341. int64_t OffsetImm) const {
  3342. assert((Index == 1 || Index == 2) && "Invalid operand index for add.");
  3343. MachineOperand &MO = ADDMI->getOperand(Index);
  3344. if (!MO.isKill())
  3345. return false;
  3346. bool OtherIntermediateUse = false;
  3347. ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);
  3348. // Currently handle only one "add + Imminstr" pair case, exit if other
  3349. // intermediate use for ToBeChangedReg found.
  3350. // TODO: handle the cases where there are other "add + Imminstr" pairs
  3351. // with same offset in Imminstr which is like:
  3352. //
  3353. // ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
  3354. // ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
  3355. // Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
  3356. // ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
  3357. // Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
  3358. //
  3359. // can be converted to:
  3360. //
  3361. // new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
  3362. // (OffsetAddi + OffsetImm)
  3363. // Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
  3364. // Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
  3365. if (OtherIntermediateUse || !ADDIMI)
  3366. return false;
  3367. // Check if ADDI instr meets requirement.
  3368. if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))
  3369. return false;
  3370. if (isInt<16>(OffsetAddi + OffsetImm))
  3371. return true;
  3372. return false;
  3373. }
  3374. // If this instruction has an immediate form and one of its operands is a
  3375. // result of a load-immediate or an add-immediate, convert it to
  3376. // the immediate form if the constant is in range.
  3377. bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
  3378. MachineInstr **KilledDef) const {
  3379. MachineFunction *MF = MI.getParent()->getParent();
  3380. MachineRegisterInfo *MRI = &MF->getRegInfo();
  3381. bool PostRA = !MRI->isSSA();
  3382. bool SeenIntermediateUse = true;
  3383. unsigned ForwardingOperand = ~0U;
  3384. MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,
  3385. SeenIntermediateUse);
  3386. if (!DefMI)
  3387. return false;
  3388. assert(ForwardingOperand < MI.getNumOperands() &&
  3389. "The forwarding operand needs to be valid at this point");
  3390. bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();
  3391. bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
  3392. if (KilledDef && KillFwdDefMI)
  3393. *KilledDef = DefMI;
  3394. // If this is a imm instruction and its register operands is produced by ADDI,
  3395. // put the imm into imm inst directly.
  3396. if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=
  3397. PPC::INSTRUCTION_LIST_END &&
  3398. transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))
  3399. return true;
  3400. ImmInstrInfo III;
  3401. bool IsVFReg = MI.getOperand(0).isReg()
  3402. ? isVFRegister(MI.getOperand(0).getReg())
  3403. : false;
  3404. bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);
  3405. // If this is a reg+reg instruction that has a reg+imm form,
  3406. // and one of the operands is produced by an add-immediate,
  3407. // try to convert it.
  3408. if (HasImmForm &&
  3409. transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,
  3410. KillFwdDefMI))
  3411. return true;
  3412. // If this is a reg+reg instruction that has a reg+imm form,
  3413. // and one of the operands is produced by LI, convert it now.
  3414. if (HasImmForm &&
  3415. transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))
  3416. return true;
  3417. // If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
  3418. // can be simpified to LI.
  3419. if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))
  3420. return true;
  3421. return false;
  3422. }
  3423. bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
  3424. MachineInstr **ToErase) const {
  3425. MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
  3426. Register FoldingReg = MI.getOperand(1).getReg();
  3427. if (!FoldingReg.isVirtual())
  3428. return false;
  3429. MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
  3430. if (SrcMI->getOpcode() != PPC::RLWINM &&
  3431. SrcMI->getOpcode() != PPC::RLWINM_rec &&
  3432. SrcMI->getOpcode() != PPC::RLWINM8 &&
  3433. SrcMI->getOpcode() != PPC::RLWINM8_rec)
  3434. return false;
  3435. assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
  3436. MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
  3437. SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
  3438. "Invalid PPC::RLWINM Instruction!");
  3439. uint64_t SHSrc = SrcMI->getOperand(2).getImm();
  3440. uint64_t SHMI = MI.getOperand(2).getImm();
  3441. uint64_t MBSrc = SrcMI->getOperand(3).getImm();
  3442. uint64_t MBMI = MI.getOperand(3).getImm();
  3443. uint64_t MESrc = SrcMI->getOperand(4).getImm();
  3444. uint64_t MEMI = MI.getOperand(4).getImm();
  3445. assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
  3446. "Invalid PPC::RLWINM Instruction!");
  3447. // If MBMI is bigger than MEMI, we always can not get run of ones.
  3448. // RotatedSrcMask non-wrap:
  3449. // 0........31|32........63
  3450. // RotatedSrcMask: B---E B---E
  3451. // MaskMI: -----------|--E B------
  3452. // Result: ----- --- (Bad candidate)
  3453. //
  3454. // RotatedSrcMask wrap:
  3455. // 0........31|32........63
  3456. // RotatedSrcMask: --E B----|--E B----
  3457. // MaskMI: -----------|--E B------
  3458. // Result: --- -----|--- ----- (Bad candidate)
  3459. //
  3460. // One special case is RotatedSrcMask is a full set mask.
  3461. // RotatedSrcMask full:
  3462. // 0........31|32........63
  3463. // RotatedSrcMask: ------EB---|-------EB---
  3464. // MaskMI: -----------|--E B------
  3465. // Result: -----------|--- ------- (Good candidate)
  3466. // Mark special case.
  3467. bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
  3468. // For other MBMI > MEMI cases, just return.
  3469. if ((MBMI > MEMI) && !SrcMaskFull)
  3470. return false;
  3471. // Handle MBMI <= MEMI cases.
  3472. APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
  3473. // In MI, we only need low 32 bits of SrcMI, just consider about low 32
  3474. // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
  3475. // while in PowerPC ISA, lowerest bit is at index 63.
  3476. APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
  3477. APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
  3478. APInt FinalMask = RotatedSrcMask & MaskMI;
  3479. uint32_t NewMB, NewME;
  3480. bool Simplified = false;
  3481. // If final mask is 0, MI result should be 0 too.
  3482. if (FinalMask.isZero()) {
  3483. bool Is64Bit =
  3484. (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
  3485. Simplified = true;
  3486. LLVM_DEBUG(dbgs() << "Replace Instr: ");
  3487. LLVM_DEBUG(MI.dump());
  3488. if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
  3489. // Replace MI with "LI 0"
  3490. MI.removeOperand(4);
  3491. MI.removeOperand(3);
  3492. MI.removeOperand(2);
  3493. MI.getOperand(1).ChangeToImmediate(0);
  3494. MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));
  3495. } else {
  3496. // Replace MI with "ANDI_rec reg, 0"
  3497. MI.removeOperand(4);
  3498. MI.removeOperand(3);
  3499. MI.getOperand(2).setImm(0);
  3500. MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
  3501. MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
  3502. if (SrcMI->getOperand(1).isKill()) {
  3503. MI.getOperand(1).setIsKill(true);
  3504. SrcMI->getOperand(1).setIsKill(false);
  3505. } else
  3506. // About to replace MI.getOperand(1), clear its kill flag.
  3507. MI.getOperand(1).setIsKill(false);
  3508. }
  3509. LLVM_DEBUG(dbgs() << "With: ");
  3510. LLVM_DEBUG(MI.dump());
  3511. } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&
  3512. NewMB <= NewME) ||
  3513. SrcMaskFull) {
  3514. // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
  3515. // than NewME. Otherwise we get a 64 bit value after folding, but MI
  3516. // return a 32 bit value.
  3517. Simplified = true;
  3518. LLVM_DEBUG(dbgs() << "Converting Instr: ");
  3519. LLVM_DEBUG(MI.dump());
  3520. uint16_t NewSH = (SHSrc + SHMI) % 32;
  3521. MI.getOperand(2).setImm(NewSH);
  3522. // If SrcMI mask is full, no need to update MBMI and MEMI.
  3523. if (!SrcMaskFull) {
  3524. MI.getOperand(3).setImm(NewMB);
  3525. MI.getOperand(4).setImm(NewME);
  3526. }
  3527. MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
  3528. if (SrcMI->getOperand(1).isKill()) {
  3529. MI.getOperand(1).setIsKill(true);
  3530. SrcMI->getOperand(1).setIsKill(false);
  3531. } else
  3532. // About to replace MI.getOperand(1), clear its kill flag.
  3533. MI.getOperand(1).setIsKill(false);
  3534. LLVM_DEBUG(dbgs() << "To: ");
  3535. LLVM_DEBUG(MI.dump());
  3536. }
  3537. if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
  3538. !SrcMI->hasImplicitDef()) {
  3539. // If FoldingReg has no non-debug use and it has no implicit def (it
  3540. // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
  3541. // Otherwise keep it.
  3542. *ToErase = SrcMI;
  3543. LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
  3544. LLVM_DEBUG(SrcMI->dump());
  3545. }
  3546. return Simplified;
  3547. }
  3548. bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
  3549. ImmInstrInfo &III, bool PostRA) const {
  3550. // The vast majority of the instructions would need their operand 2 replaced
  3551. // with an immediate when switching to the reg+imm form. A marked exception
  3552. // are the update form loads/stores for which a constant operand 2 would need
  3553. // to turn into a displacement and move operand 1 to the operand 2 position.
  3554. III.ImmOpNo = 2;
  3555. III.OpNoForForwarding = 2;
  3556. III.ImmWidth = 16;
  3557. III.ImmMustBeMultipleOf = 1;
  3558. III.TruncateImmTo = 0;
  3559. III.IsSummingOperands = false;
  3560. switch (Opc) {
  3561. default: return false;
  3562. case PPC::ADD4:
  3563. case PPC::ADD8:
  3564. III.SignedImm = true;
  3565. III.ZeroIsSpecialOrig = 0;
  3566. III.ZeroIsSpecialNew = 1;
  3567. III.IsCommutative = true;
  3568. III.IsSummingOperands = true;
  3569. III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
  3570. break;
  3571. case PPC::ADDC:
  3572. case PPC::ADDC8:
  3573. III.SignedImm = true;
  3574. III.ZeroIsSpecialOrig = 0;
  3575. III.ZeroIsSpecialNew = 0;
  3576. III.IsCommutative = true;
  3577. III.IsSummingOperands = true;
  3578. III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
  3579. break;
  3580. case PPC::ADDC_rec:
  3581. III.SignedImm = true;
  3582. III.ZeroIsSpecialOrig = 0;
  3583. III.ZeroIsSpecialNew = 0;
  3584. III.IsCommutative = true;
  3585. III.IsSummingOperands = true;
  3586. III.ImmOpcode = PPC::ADDIC_rec;
  3587. break;
  3588. case PPC::SUBFC:
  3589. case PPC::SUBFC8:
  3590. III.SignedImm = true;
  3591. III.ZeroIsSpecialOrig = 0;
  3592. III.ZeroIsSpecialNew = 0;
  3593. III.IsCommutative = false;
  3594. III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
  3595. break;
  3596. case PPC::CMPW:
  3597. case PPC::CMPD:
  3598. III.SignedImm = true;
  3599. III.ZeroIsSpecialOrig = 0;
  3600. III.ZeroIsSpecialNew = 0;
  3601. III.IsCommutative = false;
  3602. III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
  3603. break;
  3604. case PPC::CMPLW:
  3605. case PPC::CMPLD:
  3606. III.SignedImm = false;
  3607. III.ZeroIsSpecialOrig = 0;
  3608. III.ZeroIsSpecialNew = 0;
  3609. III.IsCommutative = false;
  3610. III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
  3611. break;
  3612. case PPC::AND_rec:
  3613. case PPC::AND8_rec:
  3614. case PPC::OR:
  3615. case PPC::OR8:
  3616. case PPC::XOR:
  3617. case PPC::XOR8:
  3618. III.SignedImm = false;
  3619. III.ZeroIsSpecialOrig = 0;
  3620. III.ZeroIsSpecialNew = 0;
  3621. III.IsCommutative = true;
  3622. switch(Opc) {
  3623. default: llvm_unreachable("Unknown opcode");
  3624. case PPC::AND_rec:
  3625. III.ImmOpcode = PPC::ANDI_rec;
  3626. break;
  3627. case PPC::AND8_rec:
  3628. III.ImmOpcode = PPC::ANDI8_rec;
  3629. break;
  3630. case PPC::OR: III.ImmOpcode = PPC::ORI; break;
  3631. case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
  3632. case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
  3633. case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
  3634. }
  3635. break;
  3636. case PPC::RLWNM:
  3637. case PPC::RLWNM8:
  3638. case PPC::RLWNM_rec:
  3639. case PPC::RLWNM8_rec:
  3640. case PPC::SLW:
  3641. case PPC::SLW8:
  3642. case PPC::SLW_rec:
  3643. case PPC::SLW8_rec:
  3644. case PPC::SRW:
  3645. case PPC::SRW8:
  3646. case PPC::SRW_rec:
  3647. case PPC::SRW8_rec:
  3648. case PPC::SRAW:
  3649. case PPC::SRAW_rec:
  3650. III.SignedImm = false;
  3651. III.ZeroIsSpecialOrig = 0;
  3652. III.ZeroIsSpecialNew = 0;
  3653. III.IsCommutative = false;
  3654. // This isn't actually true, but the instructions ignore any of the
  3655. // upper bits, so any immediate loaded with an LI is acceptable.
  3656. // This does not apply to shift right algebraic because a value
  3657. // out of range will produce a -1/0.
  3658. III.ImmWidth = 16;
  3659. if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||
  3660. Opc == PPC::RLWNM8_rec)
  3661. III.TruncateImmTo = 5;
  3662. else
  3663. III.TruncateImmTo = 6;
  3664. switch(Opc) {
  3665. default: llvm_unreachable("Unknown opcode");
  3666. case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
  3667. case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
  3668. case PPC::RLWNM_rec:
  3669. III.ImmOpcode = PPC::RLWINM_rec;
  3670. break;
  3671. case PPC::RLWNM8_rec:
  3672. III.ImmOpcode = PPC::RLWINM8_rec;
  3673. break;
  3674. case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
  3675. case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
  3676. case PPC::SLW_rec:
  3677. III.ImmOpcode = PPC::RLWINM_rec;
  3678. break;
  3679. case PPC::SLW8_rec:
  3680. III.ImmOpcode = PPC::RLWINM8_rec;
  3681. break;
  3682. case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
  3683. case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
  3684. case PPC::SRW_rec:
  3685. III.ImmOpcode = PPC::RLWINM_rec;
  3686. break;
  3687. case PPC::SRW8_rec:
  3688. III.ImmOpcode = PPC::RLWINM8_rec;
  3689. break;
  3690. case PPC::SRAW:
  3691. III.ImmWidth = 5;
  3692. III.TruncateImmTo = 0;
  3693. III.ImmOpcode = PPC::SRAWI;
  3694. break;
  3695. case PPC::SRAW_rec:
  3696. III.ImmWidth = 5;
  3697. III.TruncateImmTo = 0;
  3698. III.ImmOpcode = PPC::SRAWI_rec;
  3699. break;
  3700. }
  3701. break;
  3702. case PPC::RLDCL:
  3703. case PPC::RLDCL_rec:
  3704. case PPC::RLDCR:
  3705. case PPC::RLDCR_rec:
  3706. case PPC::SLD:
  3707. case PPC::SLD_rec:
  3708. case PPC::SRD:
  3709. case PPC::SRD_rec:
  3710. case PPC::SRAD:
  3711. case PPC::SRAD_rec:
  3712. III.SignedImm = false;
  3713. III.ZeroIsSpecialOrig = 0;
  3714. III.ZeroIsSpecialNew = 0;
  3715. III.IsCommutative = false;
  3716. // This isn't actually true, but the instructions ignore any of the
  3717. // upper bits, so any immediate loaded with an LI is acceptable.
  3718. // This does not apply to shift right algebraic because a value
  3719. // out of range will produce a -1/0.
  3720. III.ImmWidth = 16;
  3721. if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||
  3722. Opc == PPC::RLDCR_rec)
  3723. III.TruncateImmTo = 6;
  3724. else
  3725. III.TruncateImmTo = 7;
  3726. switch(Opc) {
  3727. default: llvm_unreachable("Unknown opcode");
  3728. case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
  3729. case PPC::RLDCL_rec:
  3730. III.ImmOpcode = PPC::RLDICL_rec;
  3731. break;
  3732. case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
  3733. case PPC::RLDCR_rec:
  3734. III.ImmOpcode = PPC::RLDICR_rec;
  3735. break;
  3736. case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
  3737. case PPC::SLD_rec:
  3738. III.ImmOpcode = PPC::RLDICR_rec;
  3739. break;
  3740. case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
  3741. case PPC::SRD_rec:
  3742. III.ImmOpcode = PPC::RLDICL_rec;
  3743. break;
  3744. case PPC::SRAD:
  3745. III.ImmWidth = 6;
  3746. III.TruncateImmTo = 0;
  3747. III.ImmOpcode = PPC::SRADI;
  3748. break;
  3749. case PPC::SRAD_rec:
  3750. III.ImmWidth = 6;
  3751. III.TruncateImmTo = 0;
  3752. III.ImmOpcode = PPC::SRADI_rec;
  3753. break;
  3754. }
  3755. break;
  3756. // Loads and stores:
  3757. case PPC::LBZX:
  3758. case PPC::LBZX8:
  3759. case PPC::LHZX:
  3760. case PPC::LHZX8:
  3761. case PPC::LHAX:
  3762. case PPC::LHAX8:
  3763. case PPC::LWZX:
  3764. case PPC::LWZX8:
  3765. case PPC::LWAX:
  3766. case PPC::LDX:
  3767. case PPC::LFSX:
  3768. case PPC::LFDX:
  3769. case PPC::STBX:
  3770. case PPC::STBX8:
  3771. case PPC::STHX:
  3772. case PPC::STHX8:
  3773. case PPC::STWX:
  3774. case PPC::STWX8:
  3775. case PPC::STDX:
  3776. case PPC::STFSX:
  3777. case PPC::STFDX:
  3778. III.SignedImm = true;
  3779. III.ZeroIsSpecialOrig = 1;
  3780. III.ZeroIsSpecialNew = 2;
  3781. III.IsCommutative = true;
  3782. III.IsSummingOperands = true;
  3783. III.ImmOpNo = 1;
  3784. III.OpNoForForwarding = 2;
  3785. switch(Opc) {
  3786. default: llvm_unreachable("Unknown opcode");
  3787. case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
  3788. case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
  3789. case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
  3790. case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
  3791. case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
  3792. case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
  3793. case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
  3794. case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
  3795. case PPC::LWAX:
  3796. III.ImmOpcode = PPC::LWA;
  3797. III.ImmMustBeMultipleOf = 4;
  3798. break;
  3799. case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;
  3800. case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
  3801. case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
  3802. case PPC::STBX: III.ImmOpcode = PPC::STB; break;
  3803. case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
  3804. case PPC::STHX: III.ImmOpcode = PPC::STH; break;
  3805. case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
  3806. case PPC::STWX: III.ImmOpcode = PPC::STW; break;
  3807. case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
  3808. case PPC::STDX:
  3809. III.ImmOpcode = PPC::STD;
  3810. III.ImmMustBeMultipleOf = 4;
  3811. break;
  3812. case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
  3813. case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
  3814. }
  3815. break;
  3816. case PPC::LBZUX:
  3817. case PPC::LBZUX8:
  3818. case PPC::LHZUX:
  3819. case PPC::LHZUX8:
  3820. case PPC::LHAUX:
  3821. case PPC::LHAUX8:
  3822. case PPC::LWZUX:
  3823. case PPC::LWZUX8:
  3824. case PPC::LDUX:
  3825. case PPC::LFSUX:
  3826. case PPC::LFDUX:
  3827. case PPC::STBUX:
  3828. case PPC::STBUX8:
  3829. case PPC::STHUX:
  3830. case PPC::STHUX8:
  3831. case PPC::STWUX:
  3832. case PPC::STWUX8:
  3833. case PPC::STDUX:
  3834. case PPC::STFSUX:
  3835. case PPC::STFDUX:
  3836. III.SignedImm = true;
  3837. III.ZeroIsSpecialOrig = 2;
  3838. III.ZeroIsSpecialNew = 3;
  3839. III.IsCommutative = false;
  3840. III.IsSummingOperands = true;
  3841. III.ImmOpNo = 2;
  3842. III.OpNoForForwarding = 3;
  3843. switch(Opc) {
  3844. default: llvm_unreachable("Unknown opcode");
  3845. case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
  3846. case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
  3847. case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
  3848. case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
  3849. case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
  3850. case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
  3851. case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
  3852. case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
  3853. case PPC::LDUX:
  3854. III.ImmOpcode = PPC::LDU;
  3855. III.ImmMustBeMultipleOf = 4;
  3856. break;
  3857. case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
  3858. case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
  3859. case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
  3860. case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
  3861. case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
  3862. case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
  3863. case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
  3864. case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
  3865. case PPC::STDUX:
  3866. III.ImmOpcode = PPC::STDU;
  3867. III.ImmMustBeMultipleOf = 4;
  3868. break;
  3869. case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
  3870. case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
  3871. }
  3872. break;
  3873. // Power9 and up only. For some of these, the X-Form version has access to all
  3874. // 64 VSR's whereas the D-Form only has access to the VR's. We replace those
  3875. // with pseudo-ops pre-ra and for post-ra, we check that the register loaded
  3876. // into or stored from is one of the VR registers.
  3877. case PPC::LXVX:
  3878. case PPC::LXSSPX:
  3879. case PPC::LXSDX:
  3880. case PPC::STXVX:
  3881. case PPC::STXSSPX:
  3882. case PPC::STXSDX:
  3883. case PPC::XFLOADf32:
  3884. case PPC::XFLOADf64:
  3885. case PPC::XFSTOREf32:
  3886. case PPC::XFSTOREf64:
  3887. if (!Subtarget.hasP9Vector())
  3888. return false;
  3889. III.SignedImm = true;
  3890. III.ZeroIsSpecialOrig = 1;
  3891. III.ZeroIsSpecialNew = 2;
  3892. III.IsCommutative = true;
  3893. III.IsSummingOperands = true;
  3894. III.ImmOpNo = 1;
  3895. III.OpNoForForwarding = 2;
  3896. III.ImmMustBeMultipleOf = 4;
  3897. switch(Opc) {
  3898. default: llvm_unreachable("Unknown opcode");
  3899. case PPC::LXVX:
  3900. III.ImmOpcode = PPC::LXV;
  3901. III.ImmMustBeMultipleOf = 16;
  3902. break;
  3903. case PPC::LXSSPX:
  3904. if (PostRA) {
  3905. if (IsVFReg)
  3906. III.ImmOpcode = PPC::LXSSP;
  3907. else {
  3908. III.ImmOpcode = PPC::LFS;
  3909. III.ImmMustBeMultipleOf = 1;
  3910. }
  3911. break;
  3912. }
  3913. [[fallthrough]];
  3914. case PPC::XFLOADf32:
  3915. III.ImmOpcode = PPC::DFLOADf32;
  3916. break;
  3917. case PPC::LXSDX:
  3918. if (PostRA) {
  3919. if (IsVFReg)
  3920. III.ImmOpcode = PPC::LXSD;
  3921. else {
  3922. III.ImmOpcode = PPC::LFD;
  3923. III.ImmMustBeMultipleOf = 1;
  3924. }
  3925. break;
  3926. }
  3927. [[fallthrough]];
  3928. case PPC::XFLOADf64:
  3929. III.ImmOpcode = PPC::DFLOADf64;
  3930. break;
  3931. case PPC::STXVX:
  3932. III.ImmOpcode = PPC::STXV;
  3933. III.ImmMustBeMultipleOf = 16;
  3934. break;
  3935. case PPC::STXSSPX:
  3936. if (PostRA) {
  3937. if (IsVFReg)
  3938. III.ImmOpcode = PPC::STXSSP;
  3939. else {
  3940. III.ImmOpcode = PPC::STFS;
  3941. III.ImmMustBeMultipleOf = 1;
  3942. }
  3943. break;
  3944. }
  3945. [[fallthrough]];
  3946. case PPC::XFSTOREf32:
  3947. III.ImmOpcode = PPC::DFSTOREf32;
  3948. break;
  3949. case PPC::STXSDX:
  3950. if (PostRA) {
  3951. if (IsVFReg)
  3952. III.ImmOpcode = PPC::STXSD;
  3953. else {
  3954. III.ImmOpcode = PPC::STFD;
  3955. III.ImmMustBeMultipleOf = 1;
  3956. }
  3957. break;
  3958. }
  3959. [[fallthrough]];
  3960. case PPC::XFSTOREf64:
  3961. III.ImmOpcode = PPC::DFSTOREf64;
  3962. break;
  3963. }
  3964. break;
  3965. }
  3966. return true;
  3967. }
  3968. // Utility function for swaping two arbitrary operands of an instruction.
  3969. static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
  3970. assert(Op1 != Op2 && "Cannot swap operand with itself.");
  3971. unsigned MaxOp = std::max(Op1, Op2);
  3972. unsigned MinOp = std::min(Op1, Op2);
  3973. MachineOperand MOp1 = MI.getOperand(MinOp);
  3974. MachineOperand MOp2 = MI.getOperand(MaxOp);
  3975. MI.removeOperand(std::max(Op1, Op2));
  3976. MI.removeOperand(std::min(Op1, Op2));
  3977. // If the operands we are swapping are the two at the end (the common case)
  3978. // we can just remove both and add them in the opposite order.
  3979. if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {
  3980. MI.addOperand(MOp2);
  3981. MI.addOperand(MOp1);
  3982. } else {
  3983. // Store all operands in a temporary vector, remove them and re-add in the
  3984. // right order.
  3985. SmallVector<MachineOperand, 2> MOps;
  3986. unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.
  3987. for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {
  3988. MOps.push_back(MI.getOperand(i));
  3989. MI.removeOperand(i);
  3990. }
  3991. // MOp2 needs to be added next.
  3992. MI.addOperand(MOp2);
  3993. // Now add the rest.
  3994. for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
  3995. if (i == MaxOp)
  3996. MI.addOperand(MOp1);
  3997. else {
  3998. MI.addOperand(MOps.back());
  3999. MOps.pop_back();
  4000. }
  4001. }
  4002. }
  4003. }
  4004. // Check if the 'MI' that has the index OpNoForForwarding
  4005. // meets the requirement described in the ImmInstrInfo.
  4006. bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
  4007. const ImmInstrInfo &III,
  4008. unsigned OpNoForForwarding
  4009. ) const {
  4010. // As the algorithm of checking for PPC::ZERO/PPC::ZERO8
  4011. // would not work pre-RA, we can only do the check post RA.
  4012. MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
  4013. if (MRI.isSSA())
  4014. return false;
  4015. // Cannot do the transform if MI isn't summing the operands.
  4016. if (!III.IsSummingOperands)
  4017. return false;
  4018. // The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
  4019. if (!III.ZeroIsSpecialOrig)
  4020. return false;
  4021. // We cannot do the transform if the operand we are trying to replace
  4022. // isn't the same as the operand the instruction allows.
  4023. if (OpNoForForwarding != III.OpNoForForwarding)
  4024. return false;
  4025. // Check if the instruction we are trying to transform really has
  4026. // the special zero register as its operand.
  4027. if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
  4028. MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
  4029. return false;
  4030. // This machine instruction is convertible if it is,
  4031. // 1. summing the operands.
  4032. // 2. one of the operands is special zero register.
  4033. // 3. the operand we are trying to replace is allowed by the MI.
  4034. return true;
  4035. }
  4036. // Check if the DefMI is the add inst and set the ImmMO and RegMO
  4037. // accordingly.
  4038. bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
  4039. const ImmInstrInfo &III,
  4040. MachineOperand *&ImmMO,
  4041. MachineOperand *&RegMO) const {
  4042. unsigned Opc = DefMI.getOpcode();
  4043. if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8)
  4044. return false;
  4045. assert(DefMI.getNumOperands() >= 3 &&
  4046. "Add inst must have at least three operands");
  4047. RegMO = &DefMI.getOperand(1);
  4048. ImmMO = &DefMI.getOperand(2);
  4049. // Before RA, ADDI first operand could be a frame index.
  4050. if (!RegMO->isReg())
  4051. return false;
  4052. // This DefMI is elgible for forwarding if it is:
  4053. // 1. add inst
  4054. // 2. one of the operands is Imm/CPI/Global.
  4055. return isAnImmediateOperand(*ImmMO);
  4056. }
  4057. bool PPCInstrInfo::isRegElgibleForForwarding(
  4058. const MachineOperand &RegMO, const MachineInstr &DefMI,
  4059. const MachineInstr &MI, bool KillDefMI,
  4060. bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
  4061. // x = addi y, imm
  4062. // ...
  4063. // z = lfdx 0, x -> z = lfd imm(y)
  4064. // The Reg "y" can be forwarded to the MI(z) only when there is no DEF
  4065. // of "y" between the DEF of "x" and "z".
  4066. // The query is only valid post RA.
  4067. const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
  4068. if (MRI.isSSA())
  4069. return false;
  4070. Register Reg = RegMO.getReg();
  4071. // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
  4072. MachineBasicBlock::const_reverse_iterator It = MI;
  4073. MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
  4074. It++;
  4075. for (; It != E; ++It) {
  4076. if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
  4077. return false;
  4078. else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
  4079. IsFwdFeederRegKilled = true;
  4080. if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
  4081. SeenIntermediateUse = true;
  4082. // Made it to DefMI without encountering a clobber.
  4083. if ((&*It) == &DefMI)
  4084. break;
  4085. }
  4086. assert((&*It) == &DefMI && "DefMI is missing");
  4087. // If DefMI also defines the register to be forwarded, we can only forward it
  4088. // if DefMI is being erased.
  4089. if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))
  4090. return KillDefMI;
  4091. return true;
  4092. }
  4093. bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
  4094. const MachineInstr &DefMI,
  4095. const ImmInstrInfo &III,
  4096. int64_t &Imm,
  4097. int64_t BaseImm) const {
  4098. assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
  4099. if (DefMI.getOpcode() == PPC::ADDItocL) {
  4100. // The operand for ADDItocL is CPI, which isn't imm at compiling time,
  4101. // However, we know that, it is 16-bit width, and has the alignment of 4.
  4102. // Check if the instruction met the requirement.
  4103. if (III.ImmMustBeMultipleOf > 4 ||
  4104. III.TruncateImmTo || III.ImmWidth != 16)
  4105. return false;
  4106. // Going from XForm to DForm loads means that the displacement needs to be
  4107. // not just an immediate but also a multiple of 4, or 16 depending on the
  4108. // load. A DForm load cannot be represented if it is a multiple of say 2.
  4109. // XForm loads do not have this restriction.
  4110. if (ImmMO.isGlobal()) {
  4111. const DataLayout &DL = ImmMO.getGlobal()->getParent()->getDataLayout();
  4112. if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf)
  4113. return false;
  4114. }
  4115. return true;
  4116. }
  4117. if (ImmMO.isImm()) {
  4118. // It is Imm, we need to check if the Imm fit the range.
  4119. // Sign-extend to 64-bits.
  4120. // DefMI may be folded with another imm form instruction, the result Imm is
  4121. // the sum of Imm of DefMI and BaseImm which is from imm form instruction.
  4122. APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);
  4123. if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))
  4124. return false;
  4125. if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))
  4126. return false;
  4127. Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);
  4128. if (Imm % III.ImmMustBeMultipleOf)
  4129. return false;
  4130. if (III.TruncateImmTo)
  4131. Imm &= ((1 << III.TruncateImmTo) - 1);
  4132. }
  4133. else
  4134. return false;
  4135. // This ImmMO is forwarded if it meets the requriement describle
  4136. // in ImmInstrInfo
  4137. return true;
  4138. }
  4139. bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
  4140. unsigned OpNoForForwarding,
  4141. MachineInstr **KilledDef) const {
  4142. if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
  4143. !DefMI.getOperand(1).isImm())
  4144. return false;
  4145. MachineFunction *MF = MI.getParent()->getParent();
  4146. MachineRegisterInfo *MRI = &MF->getRegInfo();
  4147. bool PostRA = !MRI->isSSA();
  4148. int64_t Immediate = DefMI.getOperand(1).getImm();
  4149. // Sign-extend to 64-bits.
  4150. int64_t SExtImm = SignExtend64<16>(Immediate);
  4151. bool IsForwardingOperandKilled = MI.getOperand(OpNoForForwarding).isKill();
  4152. Register ForwardingOperandReg = MI.getOperand(OpNoForForwarding).getReg();
  4153. bool ReplaceWithLI = false;
  4154. bool Is64BitLI = false;
  4155. int64_t NewImm = 0;
  4156. bool SetCR = false;
  4157. unsigned Opc = MI.getOpcode();
  4158. switch (Opc) {
  4159. default:
  4160. return false;
  4161. // FIXME: Any branches conditional on such a comparison can be made
  4162. // unconditional. At this time, this happens too infrequently to be worth
  4163. // the implementation effort, but if that ever changes, we could convert
  4164. // such a pattern here.
  4165. case PPC::CMPWI:
  4166. case PPC::CMPLWI:
  4167. case PPC::CMPDI:
  4168. case PPC::CMPLDI: {
  4169. // Doing this post-RA would require dataflow analysis to reliably find uses
  4170. // of the CR register set by the compare.
  4171. // No need to fixup killed/dead flag since this transformation is only valid
  4172. // before RA.
  4173. if (PostRA)
  4174. return false;
  4175. // If a compare-immediate is fed by an immediate and is itself an input of
  4176. // an ISEL (the most common case) into a COPY of the correct register.
  4177. bool Changed = false;
  4178. Register DefReg = MI.getOperand(0).getReg();
  4179. int64_t Comparand = MI.getOperand(2).getImm();
  4180. int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 0
  4181. ? (Comparand | 0xFFFFFFFFFFFF0000)
  4182. : Comparand;
  4183. for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {
  4184. unsigned UseOpc = CompareUseMI.getOpcode();
  4185. if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
  4186. continue;
  4187. unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();
  4188. Register TrueReg = CompareUseMI.getOperand(1).getReg();
  4189. Register FalseReg = CompareUseMI.getOperand(2).getReg();
  4190. unsigned RegToCopy =
  4191. selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);
  4192. if (RegToCopy == PPC::NoRegister)
  4193. continue;
  4194. // Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
  4195. if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {
  4196. CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
  4197. replaceInstrOperandWithImm(CompareUseMI, 1, 0);
  4198. CompareUseMI.removeOperand(3);
  4199. CompareUseMI.removeOperand(2);
  4200. continue;
  4201. }
  4202. LLVM_DEBUG(
  4203. dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
  4204. LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
  4205. LLVM_DEBUG(dbgs() << "Is converted to:\n");
  4206. // Convert to copy and remove unneeded operands.
  4207. CompareUseMI.setDesc(get(PPC::COPY));
  4208. CompareUseMI.removeOperand(3);
  4209. CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);
  4210. CmpIselsConverted++;
  4211. Changed = true;
  4212. LLVM_DEBUG(CompareUseMI.dump());
  4213. }
  4214. if (Changed)
  4215. return true;
  4216. // This may end up incremented multiple times since this function is called
  4217. // during a fixed-point transformation, but it is only meant to indicate the
  4218. // presence of this opportunity.
  4219. MissedConvertibleImmediateInstrs++;
  4220. return false;
  4221. }
  4222. // Immediate forms - may simply be convertable to an LI.
  4223. case PPC::ADDI:
  4224. case PPC::ADDI8: {
  4225. // Does the sum fit in a 16-bit signed field?
  4226. int64_t Addend = MI.getOperand(2).getImm();
  4227. if (isInt<16>(Addend + SExtImm)) {
  4228. ReplaceWithLI = true;
  4229. Is64BitLI = Opc == PPC::ADDI8;
  4230. NewImm = Addend + SExtImm;
  4231. break;
  4232. }
  4233. return false;
  4234. }
  4235. case PPC::SUBFIC:
  4236. case PPC::SUBFIC8: {
  4237. // Only transform this if the CARRY implicit operand is dead.
  4238. if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())
  4239. return false;
  4240. int64_t Minuend = MI.getOperand(2).getImm();
  4241. if (isInt<16>(Minuend - SExtImm)) {
  4242. ReplaceWithLI = true;
  4243. Is64BitLI = Opc == PPC::SUBFIC8;
  4244. NewImm = Minuend - SExtImm;
  4245. break;
  4246. }
  4247. return false;
  4248. }
  4249. case PPC::RLDICL:
  4250. case PPC::RLDICL_rec:
  4251. case PPC::RLDICL_32:
  4252. case PPC::RLDICL_32_64: {
  4253. // Use APInt's rotate function.
  4254. int64_t SH = MI.getOperand(2).getImm();
  4255. int64_t MB = MI.getOperand(3).getImm();
  4256. APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,
  4257. SExtImm, true);
  4258. InVal = InVal.rotl(SH);
  4259. uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;
  4260. InVal &= Mask;
  4261. // Can't replace negative values with an LI as that will sign-extend
  4262. // and not clear the left bits. If we're setting the CR bit, we will use
  4263. // ANDI_rec which won't sign extend, so that's safe.
  4264. if (isUInt<15>(InVal.getSExtValue()) ||
  4265. (Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {
  4266. ReplaceWithLI = true;
  4267. Is64BitLI = Opc != PPC::RLDICL_32;
  4268. NewImm = InVal.getSExtValue();
  4269. SetCR = Opc == PPC::RLDICL_rec;
  4270. break;
  4271. }
  4272. return false;
  4273. }
  4274. case PPC::RLWINM:
  4275. case PPC::RLWINM8:
  4276. case PPC::RLWINM_rec:
  4277. case PPC::RLWINM8_rec: {
  4278. int64_t SH = MI.getOperand(2).getImm();
  4279. int64_t MB = MI.getOperand(3).getImm();
  4280. int64_t ME = MI.getOperand(4).getImm();
  4281. APInt InVal(32, SExtImm, true);
  4282. InVal = InVal.rotl(SH);
  4283. APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);
  4284. InVal &= Mask;
  4285. // Can't replace negative values with an LI as that will sign-extend
  4286. // and not clear the left bits. If we're setting the CR bit, we will use
  4287. // ANDI_rec which won't sign extend, so that's safe.
  4288. bool ValueFits = isUInt<15>(InVal.getSExtValue());
  4289. ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&
  4290. isUInt<16>(InVal.getSExtValue()));
  4291. if (ValueFits) {
  4292. ReplaceWithLI = true;
  4293. Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;
  4294. NewImm = InVal.getSExtValue();
  4295. SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;
  4296. break;
  4297. }
  4298. return false;
  4299. }
  4300. case PPC::ORI:
  4301. case PPC::ORI8:
  4302. case PPC::XORI:
  4303. case PPC::XORI8: {
  4304. int64_t LogicalImm = MI.getOperand(2).getImm();
  4305. int64_t Result = 0;
  4306. if (Opc == PPC::ORI || Opc == PPC::ORI8)
  4307. Result = LogicalImm | SExtImm;
  4308. else
  4309. Result = LogicalImm ^ SExtImm;
  4310. if (isInt<16>(Result)) {
  4311. ReplaceWithLI = true;
  4312. Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;
  4313. NewImm = Result;
  4314. break;
  4315. }
  4316. return false;
  4317. }
  4318. }
  4319. if (ReplaceWithLI) {
  4320. // We need to be careful with CR-setting instructions we're replacing.
  4321. if (SetCR) {
  4322. // We don't know anything about uses when we're out of SSA, so only
  4323. // replace if the new immediate will be reproduced.
  4324. bool ImmChanged = (SExtImm & NewImm) != NewImm;
  4325. if (PostRA && ImmChanged)
  4326. return false;
  4327. if (!PostRA) {
  4328. // If the defining load-immediate has no other uses, we can just replace
  4329. // the immediate with the new immediate.
  4330. if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))
  4331. DefMI.getOperand(1).setImm(NewImm);
  4332. // If we're not using the GPR result of the CR-setting instruction, we
  4333. // just need to and with zero/non-zero depending on the new immediate.
  4334. else if (MRI->use_empty(MI.getOperand(0).getReg())) {
  4335. if (NewImm) {
  4336. assert(Immediate && "Transformation converted zero to non-zero?");
  4337. NewImm = Immediate;
  4338. }
  4339. } else if (ImmChanged)
  4340. return false;
  4341. }
  4342. }
  4343. LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
  4344. LLVM_DEBUG(MI.dump());
  4345. LLVM_DEBUG(dbgs() << "Fed by:\n");
  4346. LLVM_DEBUG(DefMI.dump());
  4347. LoadImmediateInfo LII;
  4348. LII.Imm = NewImm;
  4349. LII.Is64Bit = Is64BitLI;
  4350. LII.SetCR = SetCR;
  4351. // If we're setting the CR, the original load-immediate must be kept (as an
  4352. // operand to ANDI_rec/ANDI8_rec).
  4353. if (KilledDef && SetCR)
  4354. *KilledDef = nullptr;
  4355. replaceInstrWithLI(MI, LII);
  4356. // Fixup killed/dead flag after transformation.
  4357. // Pattern:
  4358. // ForwardingOperandReg = LI imm1
  4359. // y = op2 imm2, ForwardingOperandReg(killed)
  4360. if (IsForwardingOperandKilled)
  4361. fixupIsDeadOrKill(&DefMI, &MI, ForwardingOperandReg);
  4362. LLVM_DEBUG(dbgs() << "With:\n");
  4363. LLVM_DEBUG(MI.dump());
  4364. return true;
  4365. }
  4366. return false;
  4367. }
  4368. bool PPCInstrInfo::transformToNewImmFormFedByAdd(
  4369. MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
  4370. MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
  4371. bool PostRA = !MRI->isSSA();
  4372. // FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
  4373. // for post-ra.
  4374. if (PostRA)
  4375. return false;
  4376. // Only handle load/store.
  4377. if (!MI.mayLoadOrStore())
  4378. return false;
  4379. unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());
  4380. assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
  4381. "MI must have x-form opcode");
  4382. // get Imm Form info.
  4383. ImmInstrInfo III;
  4384. bool IsVFReg = MI.getOperand(0).isReg()
  4385. ? isVFRegister(MI.getOperand(0).getReg())
  4386. : false;
  4387. if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))
  4388. return false;
  4389. if (!III.IsSummingOperands)
  4390. return false;
  4391. if (OpNoForForwarding != III.OpNoForForwarding)
  4392. return false;
  4393. MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);
  4394. if (!ImmOperandMI.isImm())
  4395. return false;
  4396. // Check DefMI.
  4397. MachineOperand *ImmMO = nullptr;
  4398. MachineOperand *RegMO = nullptr;
  4399. if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
  4400. return false;
  4401. assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
  4402. // Check Imm.
  4403. // Set ImmBase from imm instruction as base and get new Imm inside
  4404. // isImmElgibleForForwarding.
  4405. int64_t ImmBase = ImmOperandMI.getImm();
  4406. int64_t Imm = 0;
  4407. if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))
  4408. return false;
  4409. // Get killed info in case fixup needed after transformation.
  4410. unsigned ForwardKilledOperandReg = ~0U;
  4411. if (MI.getOperand(III.OpNoForForwarding).isKill())
  4412. ForwardKilledOperandReg = MI.getOperand(III.OpNoForForwarding).getReg();
  4413. // Do the transform
  4414. LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
  4415. LLVM_DEBUG(MI.dump());
  4416. LLVM_DEBUG(dbgs() << "Fed by:\n");
  4417. LLVM_DEBUG(DefMI.dump());
  4418. MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());
  4419. if (RegMO->isKill()) {
  4420. MI.getOperand(III.OpNoForForwarding).setIsKill(true);
  4421. // Clear the killed flag in RegMO. Doing this here can handle some cases
  4422. // that DefMI and MI are not in same basic block.
  4423. RegMO->setIsKill(false);
  4424. }
  4425. MI.getOperand(III.ImmOpNo).setImm(Imm);
  4426. // FIXME: fix kill/dead flag if MI and DefMI are not in same basic block.
  4427. if (DefMI.getParent() == MI.getParent()) {
  4428. // Check if reg is killed between MI and DefMI.
  4429. auto IsKilledFor = [&](unsigned Reg) {
  4430. MachineBasicBlock::const_reverse_iterator It = MI;
  4431. MachineBasicBlock::const_reverse_iterator E = DefMI;
  4432. It++;
  4433. for (; It != E; ++It) {
  4434. if (It->killsRegister(Reg))
  4435. return true;
  4436. }
  4437. return false;
  4438. };
  4439. // Update kill flag
  4440. if (RegMO->isKill() || IsKilledFor(RegMO->getReg()))
  4441. fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
  4442. if (ForwardKilledOperandReg != ~0U)
  4443. fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
  4444. }
  4445. LLVM_DEBUG(dbgs() << "With:\n");
  4446. LLVM_DEBUG(MI.dump());
  4447. return true;
  4448. }
  4449. // If an X-Form instruction is fed by an add-immediate and one of its operands
  4450. // is the literal zero, attempt to forward the source of the add-immediate to
  4451. // the corresponding D-Form instruction with the displacement coming from
  4452. // the immediate being added.
  4453. bool PPCInstrInfo::transformToImmFormFedByAdd(
  4454. MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
  4455. MachineInstr &DefMI, bool KillDefMI) const {
  4456. // RegMO ImmMO
  4457. // | |
  4458. // x = addi reg, imm <----- DefMI
  4459. // y = op 0 , x <----- MI
  4460. // |
  4461. // OpNoForForwarding
  4462. // Check if the MI meet the requirement described in the III.
  4463. if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
  4464. return false;
  4465. // Check if the DefMI meet the requirement
  4466. // described in the III. If yes, set the ImmMO and RegMO accordingly.
  4467. MachineOperand *ImmMO = nullptr;
  4468. MachineOperand *RegMO = nullptr;
  4469. if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
  4470. return false;
  4471. assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
  4472. // As we get the Imm operand now, we need to check if the ImmMO meet
  4473. // the requirement described in the III. If yes set the Imm.
  4474. int64_t Imm = 0;
  4475. if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))
  4476. return false;
  4477. bool IsFwdFeederRegKilled = false;
  4478. bool SeenIntermediateUse = false;
  4479. // Check if the RegMO can be forwarded to MI.
  4480. if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
  4481. IsFwdFeederRegKilled, SeenIntermediateUse))
  4482. return false;
  4483. // Get killed info in case fixup needed after transformation.
  4484. unsigned ForwardKilledOperandReg = ~0U;
  4485. MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
  4486. bool PostRA = !MRI.isSSA();
  4487. if (PostRA && MI.getOperand(OpNoForForwarding).isKill())
  4488. ForwardKilledOperandReg = MI.getOperand(OpNoForForwarding).getReg();
  4489. // We know that, the MI and DefMI both meet the pattern, and
  4490. // the Imm also meet the requirement with the new Imm-form.
  4491. // It is safe to do the transformation now.
  4492. LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
  4493. LLVM_DEBUG(MI.dump());
  4494. LLVM_DEBUG(dbgs() << "Fed by:\n");
  4495. LLVM_DEBUG(DefMI.dump());
  4496. // Update the base reg first.
  4497. MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),
  4498. false, false,
  4499. RegMO->isKill());
  4500. // Then, update the imm.
  4501. if (ImmMO->isImm()) {
  4502. // If the ImmMO is Imm, change the operand that has ZERO to that Imm
  4503. // directly.
  4504. replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm);
  4505. }
  4506. else {
  4507. // Otherwise, it is Constant Pool Index(CPI) or Global,
  4508. // which is relocation in fact. We need to replace the special zero
  4509. // register with ImmMO.
  4510. // Before that, we need to fixup the target flags for imm.
  4511. // For some reason, we miss to set the flag for the ImmMO if it is CPI.
  4512. if (DefMI.getOpcode() == PPC::ADDItocL)
  4513. ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
  4514. // MI didn't have the interface such as MI.setOperand(i) though
  4515. // it has MI.getOperand(i). To repalce the ZERO MachineOperand with
  4516. // ImmMO, we need to remove ZERO operand and all the operands behind it,
  4517. // and, add the ImmMO, then, move back all the operands behind ZERO.
  4518. SmallVector<MachineOperand, 2> MOps;
  4519. for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {
  4520. MOps.push_back(MI.getOperand(i));
  4521. MI.removeOperand(i);
  4522. }
  4523. // Remove the last MO in the list, which is ZERO operand in fact.
  4524. MOps.pop_back();
  4525. // Add the imm operand.
  4526. MI.addOperand(*ImmMO);
  4527. // Now add the rest back.
  4528. for (auto &MO : MOps)
  4529. MI.addOperand(MO);
  4530. }
  4531. // Update the opcode.
  4532. MI.setDesc(get(III.ImmOpcode));
  4533. // Fix up killed/dead flag after transformation.
  4534. // Pattern 1:
  4535. // x = ADD KilledFwdFeederReg, imm
  4536. // n = opn KilledFwdFeederReg(killed), regn
  4537. // y = XOP 0, x
  4538. // Pattern 2:
  4539. // x = ADD reg(killed), imm
  4540. // y = XOP 0, x
  4541. if (IsFwdFeederRegKilled || RegMO->isKill())
  4542. fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
  4543. // Pattern 3:
  4544. // ForwardKilledOperandReg = ADD reg, imm
  4545. // y = XOP 0, ForwardKilledOperandReg(killed)
  4546. if (ForwardKilledOperandReg != ~0U)
  4547. fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
  4548. LLVM_DEBUG(dbgs() << "With:\n");
  4549. LLVM_DEBUG(MI.dump());
  4550. return true;
  4551. }
  4552. bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
  4553. const ImmInstrInfo &III,
  4554. unsigned ConstantOpNo,
  4555. MachineInstr &DefMI) const {
  4556. // DefMI must be LI or LI8.
  4557. if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||
  4558. !DefMI.getOperand(1).isImm())
  4559. return false;
  4560. // Get Imm operand and Sign-extend to 64-bits.
  4561. int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());
  4562. MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
  4563. bool PostRA = !MRI.isSSA();
  4564. // Exit early if we can't convert this.
  4565. if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
  4566. return false;
  4567. if (Imm % III.ImmMustBeMultipleOf)
  4568. return false;
  4569. if (III.TruncateImmTo)
  4570. Imm &= ((1 << III.TruncateImmTo) - 1);
  4571. if (III.SignedImm) {
  4572. APInt ActualValue(64, Imm, true);
  4573. if (!ActualValue.isSignedIntN(III.ImmWidth))
  4574. return false;
  4575. } else {
  4576. uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;
  4577. if ((uint64_t)Imm > UnsignedMax)
  4578. return false;
  4579. }
  4580. // If we're post-RA, the instructions don't agree on whether register zero is
  4581. // special, we can transform this as long as the register operand that will
  4582. // end up in the location where zero is special isn't R0.
  4583. if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
  4584. unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
  4585. III.ZeroIsSpecialNew + 1;
  4586. Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();
  4587. Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();
  4588. // If R0 is in the operand where zero is special for the new instruction,
  4589. // it is unsafe to transform if the constant operand isn't that operand.
  4590. if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&
  4591. ConstantOpNo != III.ZeroIsSpecialNew)
  4592. return false;
  4593. if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&
  4594. ConstantOpNo != PosForOrigZero)
  4595. return false;
  4596. }
  4597. // Get killed info in case fixup needed after transformation.
  4598. unsigned ForwardKilledOperandReg = ~0U;
  4599. if (PostRA && MI.getOperand(ConstantOpNo).isKill())
  4600. ForwardKilledOperandReg = MI.getOperand(ConstantOpNo).getReg();
  4601. unsigned Opc = MI.getOpcode();
  4602. bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||
  4603. Opc == PPC::SRW || Opc == PPC::SRW_rec ||
  4604. Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||
  4605. Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;
  4606. bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||
  4607. Opc == PPC::SRD || Opc == PPC::SRD_rec;
  4608. bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||
  4609. Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;
  4610. bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||
  4611. Opc == PPC::SRD_rec;
  4612. LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
  4613. LLVM_DEBUG(MI.dump());
  4614. LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
  4615. LLVM_DEBUG(DefMI.dump());
  4616. MI.setDesc(get(III.ImmOpcode));
  4617. if (ConstantOpNo == III.OpNoForForwarding) {
  4618. // Converting shifts to immediate form is a bit tricky since they may do
  4619. // one of three things:
  4620. // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero
  4621. // 2. If the shift amount is zero, the result is unchanged (save for maybe
  4622. // setting CR0)
  4623. // 3. If the shift amount is in [1, OpSize), it's just a shift
  4624. if (SpecialShift32 || SpecialShift64) {
  4625. LoadImmediateInfo LII;
  4626. LII.Imm = 0;
  4627. LII.SetCR = SetCR;
  4628. LII.Is64Bit = SpecialShift64;
  4629. uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);
  4630. if (Imm & (SpecialShift32 ? 0x20 : 0x40))
  4631. replaceInstrWithLI(MI, LII);
  4632. // Shifts by zero don't change the value. If we don't need to set CR0,
  4633. // just convert this to a COPY. Can't do this post-RA since we've already
  4634. // cleaned up the copies.
  4635. else if (!SetCR && ShAmt == 0 && !PostRA) {
  4636. MI.removeOperand(2);
  4637. MI.setDesc(get(PPC::COPY));
  4638. } else {
  4639. // The 32 bit and 64 bit instructions are quite different.
  4640. if (SpecialShift32) {
  4641. // Left shifts use (N, 0, 31-N).
  4642. // Right shifts use (32-N, N, 31) if 0 < N < 32.
  4643. // use (0, 0, 31) if N == 0.
  4644. uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;
  4645. uint64_t MB = RightShift ? ShAmt : 0;
  4646. uint64_t ME = RightShift ? 31 : 31 - ShAmt;
  4647. replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
  4648. MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)
  4649. .addImm(ME);
  4650. } else {
  4651. // Left shifts use (N, 63-N).
  4652. // Right shifts use (64-N, N) if 0 < N < 64.
  4653. // use (0, 0) if N == 0.
  4654. uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;
  4655. uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;
  4656. replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);
  4657. MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);
  4658. }
  4659. }
  4660. } else
  4661. replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
  4662. }
  4663. // Convert commutative instructions (switch the operands and convert the
  4664. // desired one to an immediate.
  4665. else if (III.IsCommutative) {
  4666. replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);
  4667. swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);
  4668. } else
  4669. llvm_unreachable("Should have exited early!");
  4670. // For instructions for which the constant register replaces a different
  4671. // operand than where the immediate goes, we need to swap them.
  4672. if (III.OpNoForForwarding != III.ImmOpNo)
  4673. swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo);
  4674. // If the special R0/X0 register index are different for original instruction
  4675. // and new instruction, we need to fix up the register class in new
  4676. // instruction.
  4677. if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
  4678. if (III.ZeroIsSpecialNew) {
  4679. // If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
  4680. // need to fix up register class.
  4681. Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();
  4682. if (RegToModify.isVirtual()) {
  4683. const TargetRegisterClass *NewRC =
  4684. MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?
  4685. &PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
  4686. MRI.setRegClass(RegToModify, NewRC);
  4687. }
  4688. }
  4689. }
  4690. // Fix up killed/dead flag after transformation.
  4691. // Pattern:
  4692. // ForwardKilledOperandReg = LI imm
  4693. // y = XOP reg, ForwardKilledOperandReg(killed)
  4694. if (ForwardKilledOperandReg != ~0U)
  4695. fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
  4696. LLVM_DEBUG(dbgs() << "With: ");
  4697. LLVM_DEBUG(MI.dump());
  4698. LLVM_DEBUG(dbgs() << "\n");
  4699. return true;
  4700. }
  4701. const TargetRegisterClass *
  4702. PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
  4703. if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
  4704. return &PPC::VSRCRegClass;
  4705. return RC;
  4706. }
  4707. int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
  4708. return PPC::getRecordFormOpcode(Opcode);
  4709. }
  4710. static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
  4711. return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||
  4712. Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||
  4713. Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||
  4714. Opcode == PPC::LHZUX8);
  4715. }
  4716. // This function checks for sign extension from 32 bits to 64 bits.
  4717. static bool definedBySignExtendingOp(const unsigned Reg,
  4718. const MachineRegisterInfo *MRI) {
  4719. if (!Register::isVirtualRegister(Reg))
  4720. return false;
  4721. MachineInstr *MI = MRI->getVRegDef(Reg);
  4722. if (!MI)
  4723. return false;
  4724. int Opcode = MI->getOpcode();
  4725. const PPCInstrInfo *TII =
  4726. MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
  4727. if (TII->isSExt32To64(Opcode))
  4728. return true;
  4729. // The first def of LBZU/LHZU is sign extended.
  4730. if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)
  4731. return true;
  4732. // RLDICL generates sign-extended output if it clears at least
  4733. // 33 bits from the left (MSB).
  4734. if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)
  4735. return true;
  4736. // If at least one bit from left in a lower word is masked out,
  4737. // all of 0 to 32-th bits of the output are cleared.
  4738. // Hence the output is already sign extended.
  4739. if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
  4740. Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&
  4741. MI->getOperand(3).getImm() > 0 &&
  4742. MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
  4743. return true;
  4744. // If the most significant bit of immediate in ANDIS is zero,
  4745. // all of 0 to 32-th bits are cleared.
  4746. if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {
  4747. uint16_t Imm = MI->getOperand(2).getImm();
  4748. if ((Imm & 0x8000) == 0)
  4749. return true;
  4750. }
  4751. return false;
  4752. }
  4753. // This function checks the machine instruction that defines the input register
  4754. // Reg. If that machine instruction always outputs a value that has only zeros
  4755. // in the higher 32 bits then this function will return true.
  4756. static bool definedByZeroExtendingOp(const unsigned Reg,
  4757. const MachineRegisterInfo *MRI) {
  4758. if (!Register::isVirtualRegister(Reg))
  4759. return false;
  4760. MachineInstr *MI = MRI->getVRegDef(Reg);
  4761. if (!MI)
  4762. return false;
  4763. int Opcode = MI->getOpcode();
  4764. const PPCInstrInfo *TII =
  4765. MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
  4766. if (TII->isZExt32To64(Opcode))
  4767. return true;
  4768. // The first def of LBZU/LHZU/LWZU are zero extended.
  4769. if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||
  4770. Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&
  4771. MI->getOperand(0).getReg() == Reg)
  4772. return true;
  4773. // The 16-bit immediate is sign-extended in li/lis.
  4774. // If the most significant bit is zero, all higher bits are zero.
  4775. if (Opcode == PPC::LI || Opcode == PPC::LI8 ||
  4776. Opcode == PPC::LIS || Opcode == PPC::LIS8) {
  4777. int64_t Imm = MI->getOperand(1).getImm();
  4778. if (((uint64_t)Imm & ~0x7FFFuLL) == 0)
  4779. return true;
  4780. }
  4781. // We have some variations of rotate-and-mask instructions
  4782. // that clear higher 32-bits.
  4783. if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
  4784. Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||
  4785. Opcode == PPC::RLDICL_32_64) &&
  4786. MI->getOperand(3).getImm() >= 32)
  4787. return true;
  4788. if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
  4789. MI->getOperand(3).getImm() >= 32 &&
  4790. MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
  4791. return true;
  4792. if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
  4793. Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
  4794. Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
  4795. MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
  4796. return true;
  4797. return false;
  4798. }
  4799. // This function returns true if the input MachineInstr is a TOC save
  4800. // instruction.
  4801. bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
  4802. if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())
  4803. return false;
  4804. unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
  4805. unsigned StackOffset = MI.getOperand(1).getImm();
  4806. Register StackReg = MI.getOperand(2).getReg();
  4807. Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
  4808. if (StackReg == SPReg && StackOffset == TOCSaveOffset)
  4809. return true;
  4810. return false;
  4811. }
  4812. // We limit the max depth to track incoming values of PHIs or binary ops
  4813. // (e.g. AND) to avoid excessive cost.
  4814. const unsigned MAX_BINOP_DEPTH = 1;
  4815. // The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
  4816. // does not count all of the recursions. The parameter BinOpDepth is incremented
  4817. // only when isSignOrZeroExtended calls itself more than once. This is done to
  4818. // prevent expontential recursion. There is no parameter to track linear
  4819. // recursion.
  4820. std::pair<bool, bool>
  4821. PPCInstrInfo::isSignOrZeroExtended(const unsigned Reg,
  4822. const unsigned BinOpDepth,
  4823. const MachineRegisterInfo *MRI) const {
  4824. if (!Register::isVirtualRegister(Reg))
  4825. return std::pair<bool, bool>(false, false);
  4826. MachineInstr *MI = MRI->getVRegDef(Reg);
  4827. if (!MI)
  4828. return std::pair<bool, bool>(false, false);
  4829. bool IsSExt = definedBySignExtendingOp(Reg, MRI);
  4830. bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
  4831. // If we know the instruction always returns sign- and zero-extended result,
  4832. // return here.
  4833. if (IsSExt && IsZExt)
  4834. return std::pair<bool, bool>(IsSExt, IsZExt);
  4835. switch (MI->getOpcode()) {
  4836. case PPC::COPY: {
  4837. Register SrcReg = MI->getOperand(1).getReg();
  4838. // In both ELFv1 and v2 ABI, method parameters and the return value
  4839. // are sign- or zero-extended.
  4840. const MachineFunction *MF = MI->getMF();
  4841. if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
  4842. // If this is a copy from another register, we recursively check source.
  4843. auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
  4844. return std::pair<bool, bool>(SrcExt.first || IsSExt,
  4845. SrcExt.second || IsZExt);
  4846. }
  4847. // From here on everything is SVR4ABI
  4848. const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
  4849. // We check the ZExt/SExt flags for a method parameter.
  4850. if (MI->getParent()->getBasicBlock() ==
  4851. &MF->getFunction().getEntryBlock()) {
  4852. Register VReg = MI->getOperand(0).getReg();
  4853. if (MF->getRegInfo().isLiveIn(VReg)) {
  4854. IsSExt |= FuncInfo->isLiveInSExt(VReg);
  4855. IsZExt |= FuncInfo->isLiveInZExt(VReg);
  4856. return std::pair<bool, bool>(IsSExt, IsZExt);
  4857. }
  4858. }
  4859. if (SrcReg != PPC::X3) {
  4860. // If this is a copy from another register, we recursively check source.
  4861. auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
  4862. return std::pair<bool, bool>(SrcExt.first || IsSExt,
  4863. SrcExt.second || IsZExt);
  4864. }
  4865. // For a method return value, we check the ZExt/SExt flags in attribute.
  4866. // We assume the following code sequence for method call.
  4867. // ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
  4868. // BL8_NOP @func,...
  4869. // ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
  4870. // %5 = COPY %x3; G8RC:%5
  4871. const MachineBasicBlock *MBB = MI->getParent();
  4872. std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
  4873. MachineBasicBlock::const_instr_iterator II =
  4874. MachineBasicBlock::const_instr_iterator(MI);
  4875. if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
  4876. return IsExtendPair;
  4877. const MachineInstr &CallMI = *(--II);
  4878. if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
  4879. return IsExtendPair;
  4880. const Function *CalleeFn =
  4881. dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
  4882. if (!CalleeFn)
  4883. return IsExtendPair;
  4884. const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
  4885. if (IntTy && IntTy->getBitWidth() <= 32) {
  4886. const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
  4887. IsSExt |= Attrs.hasAttribute(Attribute::SExt);
  4888. IsZExt |= Attrs.hasAttribute(Attribute::ZExt);
  4889. return std::pair<bool, bool>(IsSExt, IsZExt);
  4890. }
  4891. return IsExtendPair;
  4892. }
  4893. // OR, XOR with 16-bit immediate does not change the upper 48 bits.
  4894. // So, we track the operand register as we do for register copy.
  4895. case PPC::ORI:
  4896. case PPC::XORI:
  4897. case PPC::ORI8:
  4898. case PPC::XORI8: {
  4899. Register SrcReg = MI->getOperand(1).getReg();
  4900. auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
  4901. return std::pair<bool, bool>(SrcExt.first || IsSExt,
  4902. SrcExt.second || IsZExt);
  4903. }
  4904. // OR, XOR with shifted 16-bit immediate does not change the upper
  4905. // 32 bits. So, we track the operand register for zero extension.
  4906. // For sign extension when the MSB of the immediate is zero, we also
  4907. // track the operand register since the upper 33 bits are unchanged.
  4908. case PPC::ORIS:
  4909. case PPC::XORIS:
  4910. case PPC::ORIS8:
  4911. case PPC::XORIS8: {
  4912. Register SrcReg = MI->getOperand(1).getReg();
  4913. auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);
  4914. uint16_t Imm = MI->getOperand(2).getImm();
  4915. if (Imm & 0x8000)
  4916. return std::pair<bool, bool>(false, SrcExt.second || IsZExt);
  4917. else
  4918. return std::pair<bool, bool>(SrcExt.first || IsSExt,
  4919. SrcExt.second || IsZExt);
  4920. }
  4921. // If all incoming values are sign-/zero-extended,
  4922. // the output of OR, ISEL or PHI is also sign-/zero-extended.
  4923. case PPC::OR:
  4924. case PPC::OR8:
  4925. case PPC::ISEL:
  4926. case PPC::PHI: {
  4927. if (BinOpDepth >= MAX_BINOP_DEPTH)
  4928. return std::pair<bool, bool>(false, false);
  4929. // The input registers for PHI are operand 1, 3, ...
  4930. // The input registers for others are operand 1 and 2.
  4931. unsigned OperandEnd = 3, OperandStride = 1;
  4932. if (MI->getOpcode() == PPC::PHI) {
  4933. OperandEnd = MI->getNumOperands();
  4934. OperandStride = 2;
  4935. }
  4936. IsSExt = true;
  4937. IsZExt = true;
  4938. for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
  4939. if (!MI->getOperand(I).isReg())
  4940. return std::pair<bool, bool>(false, false);
  4941. Register SrcReg = MI->getOperand(I).getReg();
  4942. auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);
  4943. IsSExt &= SrcExt.first;
  4944. IsZExt &= SrcExt.second;
  4945. }
  4946. return std::pair<bool, bool>(IsSExt, IsZExt);
  4947. }
  4948. // If at least one of the incoming values of an AND is zero extended
  4949. // then the output is also zero-extended. If both of the incoming values
  4950. // are sign-extended then the output is also sign extended.
  4951. case PPC::AND:
  4952. case PPC::AND8: {
  4953. if (BinOpDepth >= MAX_BINOP_DEPTH)
  4954. return std::pair<bool, bool>(false, false);
  4955. Register SrcReg1 = MI->getOperand(1).getReg();
  4956. Register SrcReg2 = MI->getOperand(2).getReg();
  4957. auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);
  4958. auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);
  4959. return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
  4960. Src1Ext.second || Src2Ext.second);
  4961. }
  4962. default:
  4963. break;
  4964. }
  4965. return std::pair<bool, bool>(IsSExt, IsZExt);
  4966. }
  4967. bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
  4968. return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
  4969. }
  4970. namespace {
  4971. class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
  4972. MachineInstr *Loop, *EndLoop, *LoopCount;
  4973. MachineFunction *MF;
  4974. const TargetInstrInfo *TII;
  4975. int64_t TripCount;
  4976. public:
  4977. PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
  4978. MachineInstr *LoopCount)
  4979. : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
  4980. MF(Loop->getParent()->getParent()),
  4981. TII(MF->getSubtarget().getInstrInfo()) {
  4982. // Inspect the Loop instruction up-front, as it may be deleted when we call
  4983. // createTripCountGreaterCondition.
  4984. if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)
  4985. TripCount = LoopCount->getOperand(1).getImm();
  4986. else
  4987. TripCount = -1;
  4988. }
  4989. bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
  4990. // Only ignore the terminator.
  4991. return MI == EndLoop;
  4992. }
  4993. std::optional<bool> createTripCountGreaterCondition(
  4994. int TC, MachineBasicBlock &MBB,
  4995. SmallVectorImpl<MachineOperand> &Cond) override {
  4996. if (TripCount == -1) {
  4997. // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
  4998. // so we don't need to generate any thing here.
  4999. Cond.push_back(MachineOperand::CreateImm(0));
  5000. Cond.push_back(MachineOperand::CreateReg(
  5001. MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
  5002. true));
  5003. return {};
  5004. }
  5005. return TripCount > TC;
  5006. }
  5007. void setPreheader(MachineBasicBlock *NewPreheader) override {
  5008. // Do nothing. We want the LOOP setup instruction to stay in the *old*
  5009. // preheader, so we can use BDZ in the prologs to adapt the loop trip count.
  5010. }
  5011. void adjustTripCount(int TripCountAdjust) override {
  5012. // If the loop trip count is a compile-time value, then just change the
  5013. // value.
  5014. if (LoopCount->getOpcode() == PPC::LI8 ||
  5015. LoopCount->getOpcode() == PPC::LI) {
  5016. int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;
  5017. LoopCount->getOperand(1).setImm(TripCount);
  5018. return;
  5019. }
  5020. // Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
  5021. // so we don't need to generate any thing here.
  5022. }
  5023. void disposed() override {
  5024. Loop->eraseFromParent();
  5025. // Ensure the loop setup instruction is deleted too.
  5026. LoopCount->eraseFromParent();
  5027. }
  5028. };
  5029. } // namespace
  5030. std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
  5031. PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
  5032. // We really "analyze" only hardware loops right now.
  5033. MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
  5034. MachineBasicBlock *Preheader = *LoopBB->pred_begin();
  5035. if (Preheader == LoopBB)
  5036. Preheader = *std::next(LoopBB->pred_begin());
  5037. MachineFunction *MF = Preheader->getParent();
  5038. if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {
  5039. SmallPtrSet<MachineBasicBlock *, 8> Visited;
  5040. if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {
  5041. Register LoopCountReg = LoopInst->getOperand(0).getReg();
  5042. MachineRegisterInfo &MRI = MF->getRegInfo();
  5043. MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);
  5044. return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);
  5045. }
  5046. }
  5047. return nullptr;
  5048. }
  5049. MachineInstr *PPCInstrInfo::findLoopInstr(
  5050. MachineBasicBlock &PreHeader,
  5051. SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
  5052. unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
  5053. // The loop set-up instruction should be in preheader
  5054. for (auto &I : PreHeader.instrs())
  5055. if (I.getOpcode() == LOOPi)
  5056. return &I;
  5057. return nullptr;
  5058. }
  5059. // Return true if get the base operand, byte offset of an instruction and the
  5060. // memory width. Width is the size of memory that is being loaded/stored.
  5061. bool PPCInstrInfo::getMemOperandWithOffsetWidth(
  5062. const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
  5063. unsigned &Width, const TargetRegisterInfo *TRI) const {
  5064. if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)
  5065. return false;
  5066. // Handle only loads/stores with base register followed by immediate offset.
  5067. if (!LdSt.getOperand(1).isImm() ||
  5068. (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
  5069. return false;
  5070. if (!LdSt.getOperand(1).isImm() ||
  5071. (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))
  5072. return false;
  5073. if (!LdSt.hasOneMemOperand())
  5074. return false;
  5075. Width = (*LdSt.memoperands_begin())->getSize();
  5076. Offset = LdSt.getOperand(1).getImm();
  5077. BaseReg = &LdSt.getOperand(2);
  5078. return true;
  5079. }
  5080. bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
  5081. const MachineInstr &MIa, const MachineInstr &MIb) const {
  5082. assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
  5083. assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
  5084. if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
  5085. MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
  5086. return false;
  5087. // Retrieve the base register, offset from the base register and width. Width
  5088. // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
  5089. // base registers are identical, and the offset of a lower memory access +
  5090. // the width doesn't overlap the offset of a higher memory access,
  5091. // then the memory accesses are different.
  5092. const TargetRegisterInfo *TRI = &getRegisterInfo();
  5093. const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
  5094. int64_t OffsetA = 0, OffsetB = 0;
  5095. unsigned int WidthA = 0, WidthB = 0;
  5096. if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
  5097. getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
  5098. if (BaseOpA->isIdenticalTo(*BaseOpB)) {
  5099. int LowOffset = std::min(OffsetA, OffsetB);
  5100. int HighOffset = std::max(OffsetA, OffsetB);
  5101. int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
  5102. if (LowOffset + LowWidth <= HighOffset)
  5103. return true;
  5104. }
  5105. }
  5106. return false;
  5107. }