ARMISelDAGToDAG.cpp 225 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920
  1. //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines an instruction selector for the ARM target.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "ARM.h"
  13. #include "ARMBaseInstrInfo.h"
  14. #include "ARMTargetMachine.h"
  15. #include "MCTargetDesc/ARMAddressingModes.h"
  16. #include "Utils/ARMBaseInfo.h"
  17. #include "llvm/ADT/APSInt.h"
  18. #include "llvm/ADT/StringSwitch.h"
  19. #include "llvm/CodeGen/MachineFrameInfo.h"
  20. #include "llvm/CodeGen/MachineFunction.h"
  21. #include "llvm/CodeGen/MachineInstrBuilder.h"
  22. #include "llvm/CodeGen/MachineRegisterInfo.h"
  23. #include "llvm/CodeGen/SelectionDAG.h"
  24. #include "llvm/CodeGen/SelectionDAGISel.h"
  25. #include "llvm/CodeGen/TargetLowering.h"
  26. #include "llvm/IR/CallingConv.h"
  27. #include "llvm/IR/Constants.h"
  28. #include "llvm/IR/DerivedTypes.h"
  29. #include "llvm/IR/Function.h"
  30. #include "llvm/IR/Intrinsics.h"
  31. #include "llvm/IR/IntrinsicsARM.h"
  32. #include "llvm/IR/LLVMContext.h"
  33. #include "llvm/Support/CommandLine.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/ErrorHandling.h"
  36. #include "llvm/Target/TargetOptions.h"
  37. using namespace llvm;
  38. #define DEBUG_TYPE "arm-isel"
  39. static cl::opt<bool>
  40. DisableShifterOp("disable-shifter-op", cl::Hidden,
  41. cl::desc("Disable isel of shifter-op"),
  42. cl::init(false));
  43. //===--------------------------------------------------------------------===//
  44. /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  45. /// instructions for SelectionDAG operations.
  46. ///
  47. namespace {
  48. class ARMDAGToDAGISel : public SelectionDAGISel {
  49. /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  50. /// make the right decision when generating code for different targets.
  51. const ARMSubtarget *Subtarget;
  52. public:
  53. explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  54. : SelectionDAGISel(tm, OptLevel) {}
  55. bool runOnMachineFunction(MachineFunction &MF) override {
  56. // Reset the subtarget each time through.
  57. Subtarget = &MF.getSubtarget<ARMSubtarget>();
  58. SelectionDAGISel::runOnMachineFunction(MF);
  59. return true;
  60. }
  61. StringRef getPassName() const override { return "ARM Instruction Selection"; }
  62. void PreprocessISelDAG() override;
  63. /// getI32Imm - Return a target constant of type i32 with the specified
  64. /// value.
  65. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  66. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  67. }
  68. void Select(SDNode *N) override;
  69. /// Return true as some complex patterns, like those that call
  70. /// canExtractShiftFromMul can modify the DAG inplace.
  71. bool ComplexPatternFuncMutatesDAG() const override { return true; }
  72. bool hasNoVMLxHazardUse(SDNode *N) const;
  73. bool isShifterOpProfitable(const SDValue &Shift,
  74. ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  75. bool SelectRegShifterOperand(SDValue N, SDValue &A,
  76. SDValue &B, SDValue &C,
  77. bool CheckProfitability = true);
  78. bool SelectImmShifterOperand(SDValue N, SDValue &A,
  79. SDValue &B, bool CheckProfitability = true);
  80. bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
  81. SDValue &C) {
  82. // Don't apply the profitability check
  83. return SelectRegShifterOperand(N, A, B, C, false);
  84. }
  85. bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
  86. // Don't apply the profitability check
  87. return SelectImmShifterOperand(N, A, B, false);
  88. }
  89. bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
  90. if (!N.hasOneUse())
  91. return false;
  92. return SelectImmShifterOperand(N, A, B, false);
  93. }
  94. bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
  95. bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
  96. bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
  97. bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
  98. const ConstantSDNode *CN = cast<ConstantSDNode>(N);
  99. Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
  100. Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
  101. return true;
  102. }
  103. bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
  104. SDValue &Offset, SDValue &Opc);
  105. bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
  106. SDValue &Offset, SDValue &Opc);
  107. bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
  108. SDValue &Offset, SDValue &Opc);
  109. bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
  110. bool SelectAddrMode3(SDValue N, SDValue &Base,
  111. SDValue &Offset, SDValue &Opc);
  112. bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
  113. SDValue &Offset, SDValue &Opc);
  114. bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
  115. bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
  116. bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
  117. bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
  118. bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
  119. bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
  120. // Thumb Addressing Modes:
  121. bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
  122. bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
  123. bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
  124. SDValue &OffImm);
  125. bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
  126. SDValue &OffImm);
  127. bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
  128. SDValue &OffImm);
  129. bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
  130. SDValue &OffImm);
  131. bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
  132. template <unsigned Shift>
  133. bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
  134. // Thumb 2 Addressing Modes:
  135. bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
  136. template <unsigned Shift>
  137. bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
  138. bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
  139. SDValue &OffImm);
  140. bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
  141. SDValue &OffImm);
  142. template <unsigned Shift>
  143. bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
  144. bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
  145. unsigned Shift);
  146. template <unsigned Shift>
  147. bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
  148. bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
  149. SDValue &OffReg, SDValue &ShImm);
  150. bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
  151. template<int Min, int Max>
  152. bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
  153. inline bool is_so_imm(unsigned Imm) const {
  154. return ARM_AM::getSOImmVal(Imm) != -1;
  155. }
  156. inline bool is_so_imm_not(unsigned Imm) const {
  157. return ARM_AM::getSOImmVal(~Imm) != -1;
  158. }
  159. inline bool is_t2_so_imm(unsigned Imm) const {
  160. return ARM_AM::getT2SOImmVal(Imm) != -1;
  161. }
  162. inline bool is_t2_so_imm_not(unsigned Imm) const {
  163. return ARM_AM::getT2SOImmVal(~Imm) != -1;
  164. }
  165. // Include the pieces autogenerated from the target description.
  166. #include "ARMGenDAGISel.inc"
  167. private:
  168. void transferMemOperands(SDNode *Src, SDNode *Dst);
  169. /// Indexed (pre/post inc/dec) load matching code for ARM.
  170. bool tryARMIndexedLoad(SDNode *N);
  171. bool tryT1IndexedLoad(SDNode *N);
  172. bool tryT2IndexedLoad(SDNode *N);
  173. bool tryMVEIndexedLoad(SDNode *N);
  174. bool tryFMULFixed(SDNode *N, SDLoc dl);
  175. bool tryFP_TO_INT(SDNode *N, SDLoc dl);
  176. bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
  177. bool IsUnsigned,
  178. bool FixedToFloat);
  179. /// SelectVLD - Select NEON load intrinsics. NumVecs should be
  180. /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
  181. /// loads of D registers and even subregs and odd subregs of Q registers.
  182. /// For NumVecs <= 2, QOpcodes1 is not used.
  183. void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
  184. const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
  185. const uint16_t *QOpcodes1);
  186. /// SelectVST - Select NEON store intrinsics. NumVecs should
  187. /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
  188. /// stores of D registers and even subregs and odd subregs of Q registers.
  189. /// For NumVecs <= 2, QOpcodes1 is not used.
  190. void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
  191. const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
  192. const uint16_t *QOpcodes1);
  193. /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
  194. /// be 2, 3 or 4. The opcode arrays specify the instructions used for
  195. /// load/store of D registers and Q registers.
  196. void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
  197. unsigned NumVecs, const uint16_t *DOpcodes,
  198. const uint16_t *QOpcodes);
  199. /// Helper functions for setting up clusters of MVE predication operands.
  200. template <typename SDValueVector>
  201. void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  202. SDValue PredicateMask);
  203. template <typename SDValueVector>
  204. void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  205. SDValue PredicateMask, SDValue Inactive);
  206. template <typename SDValueVector>
  207. void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
  208. template <typename SDValueVector>
  209. void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
  210. /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
  211. void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
  212. /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
  213. void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
  214. bool HasSaturationOperand);
  215. /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
  216. void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
  217. uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
  218. /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
  219. /// vector lanes.
  220. void SelectMVE_VSHLC(SDNode *N, bool Predicated);
  221. /// Select long MVE vector reductions with two vector operands
  222. /// Stride is the number of vector element widths the instruction can operate
  223. /// on:
  224. /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
  225. /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
  226. /// Stride is used when addressing the OpcodesS array which contains multiple
  227. /// opcodes for each element width.
  228. /// TySize is the index into the list of element types listed above
  229. void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
  230. const uint16_t *OpcodesS, const uint16_t *OpcodesU,
  231. size_t Stride, size_t TySize);
  232. /// Select a 64-bit MVE vector reduction with two vector operands
  233. /// arm_mve_vmlldava_[predicated]
  234. void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
  235. const uint16_t *OpcodesU);
  236. /// Select a 72-bit MVE vector rounding reduction with two vector operands
  237. /// int_arm_mve_vrmlldavha[_predicated]
  238. void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
  239. const uint16_t *OpcodesU);
  240. /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
  241. /// should be 2 or 4. The opcode array specifies the instructions
  242. /// used for 8, 16 and 32-bit lane sizes respectively, and each
  243. /// pointer points to a set of NumVecs sub-opcodes used for the
  244. /// different stages (e.g. VLD20 versus VLD21) of each load family.
  245. void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
  246. const uint16_t *const *Opcodes, bool HasWriteback);
  247. /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
  248. /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
  249. void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
  250. bool Wrapping, bool Predicated);
  251. /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
  252. /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
  253. /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
  254. /// the accumulator and the immediate operand, i.e. 0
  255. /// for CX1*, 1 for CX2*, 2 for CX3*
  256. /// \arg \c HasAccum whether the instruction has an accumulator operand
  257. void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
  258. bool HasAccum);
  259. /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
  260. /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
  261. /// for loading D registers.
  262. void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
  263. unsigned NumVecs, const uint16_t *DOpcodes,
  264. const uint16_t *QOpcodes0 = nullptr,
  265. const uint16_t *QOpcodes1 = nullptr);
  266. /// Try to select SBFX/UBFX instructions for ARM.
  267. bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
  268. bool tryInsertVectorElt(SDNode *N);
  269. // Select special operations if node forms integer ABS pattern
  270. bool tryABSOp(SDNode *N);
  271. bool tryReadRegister(SDNode *N);
  272. bool tryWriteRegister(SDNode *N);
  273. bool tryInlineAsm(SDNode *N);
  274. void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
  275. void SelectCMP_SWAP(SDNode *N);
  276. /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
  277. /// inline asm expressions.
  278. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
  279. std::vector<SDValue> &OutOps) override;
  280. // Form pairs of consecutive R, S, D, or Q registers.
  281. SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
  282. SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
  283. SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
  284. SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
  285. // Form sequences of 4 consecutive S, D, or Q registers.
  286. SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
  287. SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
  288. SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
  289. // Get the alignment operand for a NEON VLD or VST instruction.
  290. SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
  291. bool is64BitVector);
  292. /// Checks if N is a multiplication by a constant where we can extract out a
  293. /// power of two from the constant so that it can be used in a shift, but only
  294. /// if it simplifies the materialization of the constant. Returns true if it
  295. /// is, and assigns to PowerOfTwo the power of two that should be extracted
  296. /// out and to NewMulConst the new constant to be multiplied by.
  297. bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
  298. unsigned &PowerOfTwo, SDValue &NewMulConst) const;
  299. /// Replace N with M in CurDAG, in a way that also ensures that M gets
  300. /// selected when N would have been selected.
  301. void replaceDAGValue(const SDValue &N, SDValue M);
  302. };
  303. }
  304. /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
  305. /// operand. If so Imm will receive the 32-bit value.
  306. static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
  307. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
  308. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  309. return true;
  310. }
  311. return false;
  312. }
  313. // isInt32Immediate - This method tests to see if a constant operand.
  314. // If so Imm will receive the 32 bit value.
  315. static bool isInt32Immediate(SDValue N, unsigned &Imm) {
  316. return isInt32Immediate(N.getNode(), Imm);
  317. }
  318. // isOpcWithIntImmediate - This method tests to see if the node is a specific
  319. // opcode and that it has a immediate integer right operand.
  320. // If so Imm will receive the 32 bit value.
  321. static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
  322. return N->getOpcode() == Opc &&
  323. isInt32Immediate(N->getOperand(1).getNode(), Imm);
  324. }
  325. /// Check whether a particular node is a constant value representable as
  326. /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
  327. ///
  328. /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
  329. static bool isScaledConstantInRange(SDValue Node, int Scale,
  330. int RangeMin, int RangeMax,
  331. int &ScaledConstant) {
  332. assert(Scale > 0 && "Invalid scale!");
  333. // Check that this is a constant.
  334. const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
  335. if (!C)
  336. return false;
  337. ScaledConstant = (int) C->getZExtValue();
  338. if ((ScaledConstant % Scale) != 0)
  339. return false;
  340. ScaledConstant /= Scale;
  341. return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
  342. }
  343. void ARMDAGToDAGISel::PreprocessISelDAG() {
  344. if (!Subtarget->hasV6T2Ops())
  345. return;
  346. bool isThumb2 = Subtarget->isThumb();
  347. // We use make_early_inc_range to avoid invalidation issues.
  348. for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
  349. if (N.getOpcode() != ISD::ADD)
  350. continue;
  351. // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
  352. // leading zeros, followed by consecutive set bits, followed by 1 or 2
  353. // trailing zeros, e.g. 1020.
  354. // Transform the expression to
  355. // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
  356. // of trailing zeros of c2. The left shift would be folded as an shifter
  357. // operand of 'add' and the 'and' and 'srl' would become a bits extraction
  358. // node (UBFX).
  359. SDValue N0 = N.getOperand(0);
  360. SDValue N1 = N.getOperand(1);
  361. unsigned And_imm = 0;
  362. if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
  363. if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
  364. std::swap(N0, N1);
  365. }
  366. if (!And_imm)
  367. continue;
  368. // Check if the AND mask is an immediate of the form: 000.....1111111100
  369. unsigned TZ = countTrailingZeros(And_imm);
  370. if (TZ != 1 && TZ != 2)
  371. // Be conservative here. Shifter operands aren't always free. e.g. On
  372. // Swift, left shifter operand of 1 / 2 for free but others are not.
  373. // e.g.
  374. // ubfx r3, r1, #16, #8
  375. // ldr.w r3, [r0, r3, lsl #2]
  376. // vs.
  377. // mov.w r9, #1020
  378. // and.w r2, r9, r1, lsr #14
  379. // ldr r2, [r0, r2]
  380. continue;
  381. And_imm >>= TZ;
  382. if (And_imm & (And_imm + 1))
  383. continue;
  384. // Look for (and (srl X, c1), c2).
  385. SDValue Srl = N1.getOperand(0);
  386. unsigned Srl_imm = 0;
  387. if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
  388. (Srl_imm <= 2))
  389. continue;
  390. // Make sure first operand is not a shifter operand which would prevent
  391. // folding of the left shift.
  392. SDValue CPTmp0;
  393. SDValue CPTmp1;
  394. SDValue CPTmp2;
  395. if (isThumb2) {
  396. if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
  397. continue;
  398. } else {
  399. if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
  400. SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
  401. continue;
  402. }
  403. // Now make the transformation.
  404. Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
  405. Srl.getOperand(0),
  406. CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
  407. MVT::i32));
  408. N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
  409. Srl,
  410. CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
  411. N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
  412. N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
  413. CurDAG->UpdateNodeOperands(&N, N0, N1);
  414. }
  415. }
  416. /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
  417. /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
  418. /// least on current ARM implementations) which should be avoidded.
  419. bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
  420. if (OptLevel == CodeGenOpt::None)
  421. return true;
  422. if (!Subtarget->hasVMLxHazards())
  423. return true;
  424. if (!N->hasOneUse())
  425. return false;
  426. SDNode *Use = *N->use_begin();
  427. if (Use->getOpcode() == ISD::CopyToReg)
  428. return true;
  429. if (Use->isMachineOpcode()) {
  430. const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
  431. CurDAG->getSubtarget().getInstrInfo());
  432. const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
  433. if (MCID.mayStore())
  434. return true;
  435. unsigned Opcode = MCID.getOpcode();
  436. if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
  437. return true;
  438. // vmlx feeding into another vmlx. We actually want to unfold
  439. // the use later in the MLxExpansion pass. e.g.
  440. // vmla
  441. // vmla (stall 8 cycles)
  442. //
  443. // vmul (5 cycles)
  444. // vadd (5 cycles)
  445. // vmla
  446. // This adds up to about 18 - 19 cycles.
  447. //
  448. // vmla
  449. // vmul (stall 4 cycles)
  450. // vadd adds up to about 14 cycles.
  451. return TII->isFpMLxInstruction(Opcode);
  452. }
  453. return false;
  454. }
  455. bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
  456. ARM_AM::ShiftOpc ShOpcVal,
  457. unsigned ShAmt) {
  458. if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
  459. return true;
  460. if (Shift.hasOneUse())
  461. return true;
  462. // R << 2 is free.
  463. return ShOpcVal == ARM_AM::lsl &&
  464. (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
  465. }
  466. bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
  467. unsigned MaxShift,
  468. unsigned &PowerOfTwo,
  469. SDValue &NewMulConst) const {
  470. assert(N.getOpcode() == ISD::MUL);
  471. assert(MaxShift > 0);
  472. // If the multiply is used in more than one place then changing the constant
  473. // will make other uses incorrect, so don't.
  474. if (!N.hasOneUse()) return false;
  475. // Check if the multiply is by a constant
  476. ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
  477. if (!MulConst) return false;
  478. // If the constant is used in more than one place then modifying it will mean
  479. // we need to materialize two constants instead of one, which is a bad idea.
  480. if (!MulConst->hasOneUse()) return false;
  481. unsigned MulConstVal = MulConst->getZExtValue();
  482. if (MulConstVal == 0) return false;
  483. // Find the largest power of 2 that MulConstVal is a multiple of
  484. PowerOfTwo = MaxShift;
  485. while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
  486. --PowerOfTwo;
  487. if (PowerOfTwo == 0) return false;
  488. }
  489. // Only optimise if the new cost is better
  490. unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
  491. NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
  492. unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
  493. unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
  494. return NewCost < OldCost;
  495. }
  496. void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
  497. CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
  498. ReplaceUses(N, M);
  499. }
  500. bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
  501. SDValue &BaseReg,
  502. SDValue &Opc,
  503. bool CheckProfitability) {
  504. if (DisableShifterOp)
  505. return false;
  506. // If N is a multiply-by-constant and it's profitable to extract a shift and
  507. // use it in a shifted operand do so.
  508. if (N.getOpcode() == ISD::MUL) {
  509. unsigned PowerOfTwo = 0;
  510. SDValue NewMulConst;
  511. if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
  512. HandleSDNode Handle(N);
  513. SDLoc Loc(N);
  514. replaceDAGValue(N.getOperand(1), NewMulConst);
  515. BaseReg = Handle.getValue();
  516. Opc = CurDAG->getTargetConstant(
  517. ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
  518. return true;
  519. }
  520. }
  521. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
  522. // Don't match base register only case. That is matched to a separate
  523. // lower complexity pattern with explicit register operand.
  524. if (ShOpcVal == ARM_AM::no_shift) return false;
  525. BaseReg = N.getOperand(0);
  526. unsigned ShImmVal = 0;
  527. ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
  528. if (!RHS) return false;
  529. ShImmVal = RHS->getZExtValue() & 31;
  530. Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
  531. SDLoc(N), MVT::i32);
  532. return true;
  533. }
  534. bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
  535. SDValue &BaseReg,
  536. SDValue &ShReg,
  537. SDValue &Opc,
  538. bool CheckProfitability) {
  539. if (DisableShifterOp)
  540. return false;
  541. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
  542. // Don't match base register only case. That is matched to a separate
  543. // lower complexity pattern with explicit register operand.
  544. if (ShOpcVal == ARM_AM::no_shift) return false;
  545. BaseReg = N.getOperand(0);
  546. unsigned ShImmVal = 0;
  547. ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
  548. if (RHS) return false;
  549. ShReg = N.getOperand(1);
  550. if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
  551. return false;
  552. Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
  553. SDLoc(N), MVT::i32);
  554. return true;
  555. }
  556. // Determine whether an ISD::OR's operands are suitable to turn the operation
  557. // into an addition, which often has more compact encodings.
  558. bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
  559. assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
  560. Out = N;
  561. return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
  562. }
  563. bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
  564. SDValue &Base,
  565. SDValue &OffImm) {
  566. // Match simple R + imm12 operands.
  567. // Base only.
  568. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  569. !CurDAG->isBaseWithConstantOffset(N)) {
  570. if (N.getOpcode() == ISD::FrameIndex) {
  571. // Match frame index.
  572. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  573. Base = CurDAG->getTargetFrameIndex(
  574. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  575. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  576. return true;
  577. }
  578. if (N.getOpcode() == ARMISD::Wrapper &&
  579. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  580. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  581. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  582. Base = N.getOperand(0);
  583. } else
  584. Base = N;
  585. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  586. return true;
  587. }
  588. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  589. int RHSC = (int)RHS->getSExtValue();
  590. if (N.getOpcode() == ISD::SUB)
  591. RHSC = -RHSC;
  592. if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
  593. Base = N.getOperand(0);
  594. if (Base.getOpcode() == ISD::FrameIndex) {
  595. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  596. Base = CurDAG->getTargetFrameIndex(
  597. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  598. }
  599. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  600. return true;
  601. }
  602. }
  603. // Base only.
  604. Base = N;
  605. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  606. return true;
  607. }
  608. bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
  609. SDValue &Opc) {
  610. if (N.getOpcode() == ISD::MUL &&
  611. ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
  612. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  613. // X * [3,5,9] -> X + X * [2,4,8] etc.
  614. int RHSC = (int)RHS->getZExtValue();
  615. if (RHSC & 1) {
  616. RHSC = RHSC & ~1;
  617. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  618. if (RHSC < 0) {
  619. AddSub = ARM_AM::sub;
  620. RHSC = - RHSC;
  621. }
  622. if (isPowerOf2_32(RHSC)) {
  623. unsigned ShAmt = Log2_32(RHSC);
  624. Base = Offset = N.getOperand(0);
  625. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
  626. ARM_AM::lsl),
  627. SDLoc(N), MVT::i32);
  628. return true;
  629. }
  630. }
  631. }
  632. }
  633. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  634. // ISD::OR that is equivalent to an ISD::ADD.
  635. !CurDAG->isBaseWithConstantOffset(N))
  636. return false;
  637. // Leave simple R +/- imm12 operands for LDRi12
  638. if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
  639. int RHSC;
  640. if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
  641. -0x1000+1, 0x1000, RHSC)) // 12 bits.
  642. return false;
  643. }
  644. // Otherwise this is R +/- [possibly shifted] R.
  645. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
  646. ARM_AM::ShiftOpc ShOpcVal =
  647. ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
  648. unsigned ShAmt = 0;
  649. Base = N.getOperand(0);
  650. Offset = N.getOperand(1);
  651. if (ShOpcVal != ARM_AM::no_shift) {
  652. // Check to see if the RHS of the shift is a constant, if not, we can't fold
  653. // it.
  654. if (ConstantSDNode *Sh =
  655. dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
  656. ShAmt = Sh->getZExtValue();
  657. if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
  658. Offset = N.getOperand(1).getOperand(0);
  659. else {
  660. ShAmt = 0;
  661. ShOpcVal = ARM_AM::no_shift;
  662. }
  663. } else {
  664. ShOpcVal = ARM_AM::no_shift;
  665. }
  666. }
  667. // Try matching (R shl C) + (R).
  668. if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
  669. !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
  670. N.getOperand(0).hasOneUse())) {
  671. ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
  672. if (ShOpcVal != ARM_AM::no_shift) {
  673. // Check to see if the RHS of the shift is a constant, if not, we can't
  674. // fold it.
  675. if (ConstantSDNode *Sh =
  676. dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
  677. ShAmt = Sh->getZExtValue();
  678. if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
  679. Offset = N.getOperand(0).getOperand(0);
  680. Base = N.getOperand(1);
  681. } else {
  682. ShAmt = 0;
  683. ShOpcVal = ARM_AM::no_shift;
  684. }
  685. } else {
  686. ShOpcVal = ARM_AM::no_shift;
  687. }
  688. }
  689. }
  690. // If Offset is a multiply-by-constant and it's profitable to extract a shift
  691. // and use it in a shifted operand do so.
  692. if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
  693. unsigned PowerOfTwo = 0;
  694. SDValue NewMulConst;
  695. if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
  696. HandleSDNode Handle(Offset);
  697. replaceDAGValue(Offset.getOperand(1), NewMulConst);
  698. Offset = Handle.getValue();
  699. ShAmt = PowerOfTwo;
  700. ShOpcVal = ARM_AM::lsl;
  701. }
  702. }
  703. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
  704. SDLoc(N), MVT::i32);
  705. return true;
  706. }
  707. bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
  708. SDValue &Offset, SDValue &Opc) {
  709. unsigned Opcode = Op->getOpcode();
  710. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  711. ? cast<LoadSDNode>(Op)->getAddressingMode()
  712. : cast<StoreSDNode>(Op)->getAddressingMode();
  713. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  714. ? ARM_AM::add : ARM_AM::sub;
  715. int Val;
  716. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
  717. return false;
  718. Offset = N;
  719. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
  720. unsigned ShAmt = 0;
  721. if (ShOpcVal != ARM_AM::no_shift) {
  722. // Check to see if the RHS of the shift is a constant, if not, we can't fold
  723. // it.
  724. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  725. ShAmt = Sh->getZExtValue();
  726. if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
  727. Offset = N.getOperand(0);
  728. else {
  729. ShAmt = 0;
  730. ShOpcVal = ARM_AM::no_shift;
  731. }
  732. } else {
  733. ShOpcVal = ARM_AM::no_shift;
  734. }
  735. }
  736. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
  737. SDLoc(N), MVT::i32);
  738. return true;
  739. }
  740. bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
  741. SDValue &Offset, SDValue &Opc) {
  742. unsigned Opcode = Op->getOpcode();
  743. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  744. ? cast<LoadSDNode>(Op)->getAddressingMode()
  745. : cast<StoreSDNode>(Op)->getAddressingMode();
  746. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  747. ? ARM_AM::add : ARM_AM::sub;
  748. int Val;
  749. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
  750. if (AddSub == ARM_AM::sub) Val *= -1;
  751. Offset = CurDAG->getRegister(0, MVT::i32);
  752. Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
  753. return true;
  754. }
  755. return false;
  756. }
  757. bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
  758. SDValue &Offset, SDValue &Opc) {
  759. unsigned Opcode = Op->getOpcode();
  760. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  761. ? cast<LoadSDNode>(Op)->getAddressingMode()
  762. : cast<StoreSDNode>(Op)->getAddressingMode();
  763. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  764. ? ARM_AM::add : ARM_AM::sub;
  765. int Val;
  766. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
  767. Offset = CurDAG->getRegister(0, MVT::i32);
  768. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
  769. ARM_AM::no_shift),
  770. SDLoc(Op), MVT::i32);
  771. return true;
  772. }
  773. return false;
  774. }
  775. bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
  776. Base = N;
  777. return true;
  778. }
  779. bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
  780. SDValue &Base, SDValue &Offset,
  781. SDValue &Opc) {
  782. if (N.getOpcode() == ISD::SUB) {
  783. // X - C is canonicalize to X + -C, no need to handle it here.
  784. Base = N.getOperand(0);
  785. Offset = N.getOperand(1);
  786. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
  787. MVT::i32);
  788. return true;
  789. }
  790. if (!CurDAG->isBaseWithConstantOffset(N)) {
  791. Base = N;
  792. if (N.getOpcode() == ISD::FrameIndex) {
  793. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  794. Base = CurDAG->getTargetFrameIndex(
  795. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  796. }
  797. Offset = CurDAG->getRegister(0, MVT::i32);
  798. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
  799. MVT::i32);
  800. return true;
  801. }
  802. // If the RHS is +/- imm8, fold into addr mode.
  803. int RHSC;
  804. if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
  805. -256 + 1, 256, RHSC)) { // 8 bits.
  806. Base = N.getOperand(0);
  807. if (Base.getOpcode() == ISD::FrameIndex) {
  808. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  809. Base = CurDAG->getTargetFrameIndex(
  810. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  811. }
  812. Offset = CurDAG->getRegister(0, MVT::i32);
  813. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  814. if (RHSC < 0) {
  815. AddSub = ARM_AM::sub;
  816. RHSC = -RHSC;
  817. }
  818. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
  819. MVT::i32);
  820. return true;
  821. }
  822. Base = N.getOperand(0);
  823. Offset = N.getOperand(1);
  824. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
  825. MVT::i32);
  826. return true;
  827. }
  828. bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
  829. SDValue &Offset, SDValue &Opc) {
  830. unsigned Opcode = Op->getOpcode();
  831. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  832. ? cast<LoadSDNode>(Op)->getAddressingMode()
  833. : cast<StoreSDNode>(Op)->getAddressingMode();
  834. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  835. ? ARM_AM::add : ARM_AM::sub;
  836. int Val;
  837. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
  838. Offset = CurDAG->getRegister(0, MVT::i32);
  839. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
  840. MVT::i32);
  841. return true;
  842. }
  843. Offset = N;
  844. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
  845. MVT::i32);
  846. return true;
  847. }
  848. bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
  849. bool FP16) {
  850. if (!CurDAG->isBaseWithConstantOffset(N)) {
  851. Base = N;
  852. if (N.getOpcode() == ISD::FrameIndex) {
  853. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  854. Base = CurDAG->getTargetFrameIndex(
  855. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  856. } else if (N.getOpcode() == ARMISD::Wrapper &&
  857. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  858. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  859. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  860. Base = N.getOperand(0);
  861. }
  862. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
  863. SDLoc(N), MVT::i32);
  864. return true;
  865. }
  866. // If the RHS is +/- imm8, fold into addr mode.
  867. int RHSC;
  868. const int Scale = FP16 ? 2 : 4;
  869. if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
  870. Base = N.getOperand(0);
  871. if (Base.getOpcode() == ISD::FrameIndex) {
  872. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  873. Base = CurDAG->getTargetFrameIndex(
  874. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  875. }
  876. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  877. if (RHSC < 0) {
  878. AddSub = ARM_AM::sub;
  879. RHSC = -RHSC;
  880. }
  881. if (FP16)
  882. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
  883. SDLoc(N), MVT::i32);
  884. else
  885. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
  886. SDLoc(N), MVT::i32);
  887. return true;
  888. }
  889. Base = N;
  890. if (FP16)
  891. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
  892. SDLoc(N), MVT::i32);
  893. else
  894. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
  895. SDLoc(N), MVT::i32);
  896. return true;
  897. }
  898. bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
  899. SDValue &Base, SDValue &Offset) {
  900. return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
  901. }
  902. bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
  903. SDValue &Base, SDValue &Offset) {
  904. return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
  905. }
  906. bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
  907. SDValue &Align) {
  908. Addr = N;
  909. unsigned Alignment = 0;
  910. MemSDNode *MemN = cast<MemSDNode>(Parent);
  911. if (isa<LSBaseSDNode>(MemN) ||
  912. ((MemN->getOpcode() == ARMISD::VST1_UPD ||
  913. MemN->getOpcode() == ARMISD::VLD1_UPD) &&
  914. MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
  915. // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
  916. // The maximum alignment is equal to the memory size being referenced.
  917. unsigned MMOAlign = MemN->getAlignment();
  918. unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
  919. if (MMOAlign >= MemSize && MemSize > 1)
  920. Alignment = MemSize;
  921. } else {
  922. // All other uses of addrmode6 are for intrinsics. For now just record
  923. // the raw alignment value; it will be refined later based on the legal
  924. // alignment operands for the intrinsic.
  925. Alignment = MemN->getAlignment();
  926. }
  927. Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
  928. return true;
  929. }
  930. bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
  931. SDValue &Offset) {
  932. LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
  933. ISD::MemIndexedMode AM = LdSt->getAddressingMode();
  934. if (AM != ISD::POST_INC)
  935. return false;
  936. Offset = N;
  937. if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
  938. if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
  939. Offset = CurDAG->getRegister(0, MVT::i32);
  940. }
  941. return true;
  942. }
  943. bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
  944. SDValue &Offset, SDValue &Label) {
  945. if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
  946. Offset = N.getOperand(0);
  947. SDValue N1 = N.getOperand(1);
  948. Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
  949. SDLoc(N), MVT::i32);
  950. return true;
  951. }
  952. return false;
  953. }
  954. //===----------------------------------------------------------------------===//
  955. // Thumb Addressing Modes
  956. //===----------------------------------------------------------------------===//
  957. static bool shouldUseZeroOffsetLdSt(SDValue N) {
  958. // Negative numbers are difficult to materialise in thumb1. If we are
  959. // selecting the add of a negative, instead try to select ri with a zero
  960. // offset, so create the add node directly which will become a sub.
  961. if (N.getOpcode() != ISD::ADD)
  962. return false;
  963. // Look for an imm which is not legal for ld/st, but is legal for sub.
  964. if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
  965. return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
  966. return false;
  967. }
  968. bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
  969. SDValue &Offset) {
  970. if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
  971. ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
  972. if (!NC || !NC->isZero())
  973. return false;
  974. Base = Offset = N;
  975. return true;
  976. }
  977. Base = N.getOperand(0);
  978. Offset = N.getOperand(1);
  979. return true;
  980. }
  981. bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
  982. SDValue &Offset) {
  983. if (shouldUseZeroOffsetLdSt(N))
  984. return false; // Select ri instead
  985. return SelectThumbAddrModeRRSext(N, Base, Offset);
  986. }
  987. bool
  988. ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
  989. SDValue &Base, SDValue &OffImm) {
  990. if (shouldUseZeroOffsetLdSt(N)) {
  991. Base = N;
  992. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  993. return true;
  994. }
  995. if (!CurDAG->isBaseWithConstantOffset(N)) {
  996. if (N.getOpcode() == ISD::ADD) {
  997. return false; // We want to select register offset instead
  998. } else if (N.getOpcode() == ARMISD::Wrapper &&
  999. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  1000. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  1001. N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
  1002. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  1003. Base = N.getOperand(0);
  1004. } else {
  1005. Base = N;
  1006. }
  1007. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1008. return true;
  1009. }
  1010. // If the RHS is + imm5 * scale, fold into addr mode.
  1011. int RHSC;
  1012. if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
  1013. Base = N.getOperand(0);
  1014. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1015. return true;
  1016. }
  1017. // Offset is too large, so use register offset instead.
  1018. return false;
  1019. }
  1020. bool
  1021. ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
  1022. SDValue &OffImm) {
  1023. return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
  1024. }
  1025. bool
  1026. ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
  1027. SDValue &OffImm) {
  1028. return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
  1029. }
  1030. bool
  1031. ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
  1032. SDValue &OffImm) {
  1033. return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
  1034. }
  1035. bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
  1036. SDValue &Base, SDValue &OffImm) {
  1037. if (N.getOpcode() == ISD::FrameIndex) {
  1038. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  1039. // Only multiples of 4 are allowed for the offset, so the frame object
  1040. // alignment must be at least 4.
  1041. MachineFrameInfo &MFI = MF->getFrameInfo();
  1042. if (MFI.getObjectAlign(FI) < Align(4))
  1043. MFI.setObjectAlignment(FI, Align(4));
  1044. Base = CurDAG->getTargetFrameIndex(
  1045. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1046. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1047. return true;
  1048. }
  1049. if (!CurDAG->isBaseWithConstantOffset(N))
  1050. return false;
  1051. if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
  1052. // If the RHS is + imm8 * scale, fold into addr mode.
  1053. int RHSC;
  1054. if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
  1055. Base = N.getOperand(0);
  1056. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1057. // Make sure the offset is inside the object, or we might fail to
  1058. // allocate an emergency spill slot. (An out-of-range access is UB, but
  1059. // it could show up anyway.)
  1060. MachineFrameInfo &MFI = MF->getFrameInfo();
  1061. if (RHSC * 4 < MFI.getObjectSize(FI)) {
  1062. // For LHS+RHS to result in an offset that's a multiple of 4 the object
  1063. // indexed by the LHS must be 4-byte aligned.
  1064. if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
  1065. MFI.setObjectAlignment(FI, Align(4));
  1066. if (MFI.getObjectAlign(FI) >= Align(4)) {
  1067. Base = CurDAG->getTargetFrameIndex(
  1068. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1069. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1070. return true;
  1071. }
  1072. }
  1073. }
  1074. }
  1075. return false;
  1076. }
  1077. template <unsigned Shift>
  1078. bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
  1079. SDValue &OffImm) {
  1080. if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
  1081. int RHSC;
  1082. if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
  1083. RHSC)) {
  1084. Base = N.getOperand(0);
  1085. if (N.getOpcode() == ISD::SUB)
  1086. RHSC = -RHSC;
  1087. OffImm =
  1088. CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
  1089. return true;
  1090. }
  1091. }
  1092. // Base only.
  1093. Base = N;
  1094. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1095. return true;
  1096. }
  1097. //===----------------------------------------------------------------------===//
  1098. // Thumb 2 Addressing Modes
  1099. //===----------------------------------------------------------------------===//
  1100. bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
  1101. SDValue &Base, SDValue &OffImm) {
  1102. // Match simple R + imm12 operands.
  1103. // Base only.
  1104. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  1105. !CurDAG->isBaseWithConstantOffset(N)) {
  1106. if (N.getOpcode() == ISD::FrameIndex) {
  1107. // Match frame index.
  1108. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  1109. Base = CurDAG->getTargetFrameIndex(
  1110. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1111. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1112. return true;
  1113. }
  1114. if (N.getOpcode() == ARMISD::Wrapper &&
  1115. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  1116. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  1117. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  1118. Base = N.getOperand(0);
  1119. if (Base.getOpcode() == ISD::TargetConstantPool)
  1120. return false; // We want to select t2LDRpci instead.
  1121. } else
  1122. Base = N;
  1123. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1124. return true;
  1125. }
  1126. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  1127. if (SelectT2AddrModeImm8(N, Base, OffImm))
  1128. // Let t2LDRi8 handle (R - imm8).
  1129. return false;
  1130. int RHSC = (int)RHS->getZExtValue();
  1131. if (N.getOpcode() == ISD::SUB)
  1132. RHSC = -RHSC;
  1133. if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
  1134. Base = N.getOperand(0);
  1135. if (Base.getOpcode() == ISD::FrameIndex) {
  1136. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1137. Base = CurDAG->getTargetFrameIndex(
  1138. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1139. }
  1140. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1141. return true;
  1142. }
  1143. }
  1144. // Base only.
  1145. Base = N;
  1146. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1147. return true;
  1148. }
  1149. template <unsigned Shift>
  1150. bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
  1151. SDValue &OffImm) {
  1152. if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
  1153. int RHSC;
  1154. if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
  1155. Base = N.getOperand(0);
  1156. if (Base.getOpcode() == ISD::FrameIndex) {
  1157. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1158. Base = CurDAG->getTargetFrameIndex(
  1159. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1160. }
  1161. if (N.getOpcode() == ISD::SUB)
  1162. RHSC = -RHSC;
  1163. OffImm =
  1164. CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
  1165. return true;
  1166. }
  1167. }
  1168. // Base only.
  1169. Base = N;
  1170. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1171. return true;
  1172. }
  1173. bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
  1174. SDValue &Base, SDValue &OffImm) {
  1175. // Match simple R - imm8 operands.
  1176. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  1177. !CurDAG->isBaseWithConstantOffset(N))
  1178. return false;
  1179. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  1180. int RHSC = (int)RHS->getSExtValue();
  1181. if (N.getOpcode() == ISD::SUB)
  1182. RHSC = -RHSC;
  1183. if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
  1184. Base = N.getOperand(0);
  1185. if (Base.getOpcode() == ISD::FrameIndex) {
  1186. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1187. Base = CurDAG->getTargetFrameIndex(
  1188. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1189. }
  1190. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1191. return true;
  1192. }
  1193. }
  1194. return false;
  1195. }
  1196. bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
  1197. SDValue &OffImm){
  1198. unsigned Opcode = Op->getOpcode();
  1199. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  1200. ? cast<LoadSDNode>(Op)->getAddressingMode()
  1201. : cast<StoreSDNode>(Op)->getAddressingMode();
  1202. int RHSC;
  1203. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
  1204. OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
  1205. ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
  1206. : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
  1207. return true;
  1208. }
  1209. return false;
  1210. }
  1211. template <unsigned Shift>
  1212. bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
  1213. SDValue &OffImm) {
  1214. if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
  1215. int RHSC;
  1216. if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
  1217. RHSC)) {
  1218. Base = N.getOperand(0);
  1219. if (Base.getOpcode() == ISD::FrameIndex) {
  1220. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1221. Base = CurDAG->getTargetFrameIndex(
  1222. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1223. }
  1224. if (N.getOpcode() == ISD::SUB)
  1225. RHSC = -RHSC;
  1226. OffImm =
  1227. CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
  1228. return true;
  1229. }
  1230. }
  1231. // Base only.
  1232. Base = N;
  1233. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1234. return true;
  1235. }
  1236. template <unsigned Shift>
  1237. bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
  1238. SDValue &OffImm) {
  1239. return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
  1240. }
  1241. bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
  1242. SDValue &OffImm,
  1243. unsigned Shift) {
  1244. unsigned Opcode = Op->getOpcode();
  1245. ISD::MemIndexedMode AM;
  1246. switch (Opcode) {
  1247. case ISD::LOAD:
  1248. AM = cast<LoadSDNode>(Op)->getAddressingMode();
  1249. break;
  1250. case ISD::STORE:
  1251. AM = cast<StoreSDNode>(Op)->getAddressingMode();
  1252. break;
  1253. case ISD::MLOAD:
  1254. AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
  1255. break;
  1256. case ISD::MSTORE:
  1257. AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
  1258. break;
  1259. default:
  1260. llvm_unreachable("Unexpected Opcode for Imm7Offset");
  1261. }
  1262. int RHSC;
  1263. // 7 bit constant, shifted by Shift.
  1264. if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
  1265. OffImm =
  1266. ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
  1267. ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
  1268. : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
  1269. MVT::i32);
  1270. return true;
  1271. }
  1272. return false;
  1273. }
  1274. template <int Min, int Max>
  1275. bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
  1276. int Val;
  1277. if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
  1278. OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
  1279. return true;
  1280. }
  1281. return false;
  1282. }
  1283. bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
  1284. SDValue &Base,
  1285. SDValue &OffReg, SDValue &ShImm) {
  1286. // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
  1287. if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
  1288. return false;
  1289. // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
  1290. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  1291. int RHSC = (int)RHS->getZExtValue();
  1292. if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
  1293. return false;
  1294. else if (RHSC < 0 && RHSC >= -255) // 8 bits
  1295. return false;
  1296. }
  1297. // Look for (R + R) or (R + (R << [1,2,3])).
  1298. unsigned ShAmt = 0;
  1299. Base = N.getOperand(0);
  1300. OffReg = N.getOperand(1);
  1301. // Swap if it is ((R << c) + R).
  1302. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
  1303. if (ShOpcVal != ARM_AM::lsl) {
  1304. ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
  1305. if (ShOpcVal == ARM_AM::lsl)
  1306. std::swap(Base, OffReg);
  1307. }
  1308. if (ShOpcVal == ARM_AM::lsl) {
  1309. // Check to see if the RHS of the shift is a constant, if not, we can't fold
  1310. // it.
  1311. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
  1312. ShAmt = Sh->getZExtValue();
  1313. if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
  1314. OffReg = OffReg.getOperand(0);
  1315. else {
  1316. ShAmt = 0;
  1317. }
  1318. }
  1319. }
  1320. // If OffReg is a multiply-by-constant and it's profitable to extract a shift
  1321. // and use it in a shifted operand do so.
  1322. if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
  1323. unsigned PowerOfTwo = 0;
  1324. SDValue NewMulConst;
  1325. if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
  1326. HandleSDNode Handle(OffReg);
  1327. replaceDAGValue(OffReg.getOperand(1), NewMulConst);
  1328. OffReg = Handle.getValue();
  1329. ShAmt = PowerOfTwo;
  1330. }
  1331. }
  1332. ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
  1333. return true;
  1334. }
  1335. bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
  1336. SDValue &OffImm) {
  1337. // This *must* succeed since it's used for the irreplaceable ldrex and strex
  1338. // instructions.
  1339. Base = N;
  1340. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1341. if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
  1342. return true;
  1343. ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
  1344. if (!RHS)
  1345. return true;
  1346. uint32_t RHSC = (int)RHS->getZExtValue();
  1347. if (RHSC > 1020 || RHSC % 4 != 0)
  1348. return true;
  1349. Base = N.getOperand(0);
  1350. if (Base.getOpcode() == ISD::FrameIndex) {
  1351. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1352. Base = CurDAG->getTargetFrameIndex(
  1353. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1354. }
  1355. OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
  1356. return true;
  1357. }
  1358. //===--------------------------------------------------------------------===//
  1359. /// getAL - Returns a ARMCC::AL immediate node.
  1360. static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
  1361. return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
  1362. }
  1363. void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
  1364. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  1365. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
  1366. }
  1367. bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
  1368. LoadSDNode *LD = cast<LoadSDNode>(N);
  1369. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1370. if (AM == ISD::UNINDEXED)
  1371. return false;
  1372. EVT LoadedVT = LD->getMemoryVT();
  1373. SDValue Offset, AMOpc;
  1374. bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1375. unsigned Opcode = 0;
  1376. bool Match = false;
  1377. if (LoadedVT == MVT::i32 && isPre &&
  1378. SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
  1379. Opcode = ARM::LDR_PRE_IMM;
  1380. Match = true;
  1381. } else if (LoadedVT == MVT::i32 && !isPre &&
  1382. SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
  1383. Opcode = ARM::LDR_POST_IMM;
  1384. Match = true;
  1385. } else if (LoadedVT == MVT::i32 &&
  1386. SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
  1387. Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
  1388. Match = true;
  1389. } else if (LoadedVT == MVT::i16 &&
  1390. SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
  1391. Match = true;
  1392. Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
  1393. ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
  1394. : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
  1395. } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
  1396. if (LD->getExtensionType() == ISD::SEXTLOAD) {
  1397. if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
  1398. Match = true;
  1399. Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
  1400. }
  1401. } else {
  1402. if (isPre &&
  1403. SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
  1404. Match = true;
  1405. Opcode = ARM::LDRB_PRE_IMM;
  1406. } else if (!isPre &&
  1407. SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
  1408. Match = true;
  1409. Opcode = ARM::LDRB_POST_IMM;
  1410. } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
  1411. Match = true;
  1412. Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
  1413. }
  1414. }
  1415. }
  1416. if (Match) {
  1417. if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
  1418. SDValue Chain = LD->getChain();
  1419. SDValue Base = LD->getBasePtr();
  1420. SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
  1421. CurDAG->getRegister(0, MVT::i32), Chain };
  1422. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
  1423. MVT::Other, Ops);
  1424. transferMemOperands(N, New);
  1425. ReplaceNode(N, New);
  1426. return true;
  1427. } else {
  1428. SDValue Chain = LD->getChain();
  1429. SDValue Base = LD->getBasePtr();
  1430. SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
  1431. CurDAG->getRegister(0, MVT::i32), Chain };
  1432. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
  1433. MVT::Other, Ops);
  1434. transferMemOperands(N, New);
  1435. ReplaceNode(N, New);
  1436. return true;
  1437. }
  1438. }
  1439. return false;
  1440. }
  1441. bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
  1442. LoadSDNode *LD = cast<LoadSDNode>(N);
  1443. EVT LoadedVT = LD->getMemoryVT();
  1444. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1445. if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
  1446. LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
  1447. return false;
  1448. auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
  1449. if (!COffs || COffs->getZExtValue() != 4)
  1450. return false;
  1451. // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
  1452. // The encoding of LDM is not how the rest of ISel expects a post-inc load to
  1453. // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
  1454. // ISel.
  1455. SDValue Chain = LD->getChain();
  1456. SDValue Base = LD->getBasePtr();
  1457. SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
  1458. CurDAG->getRegister(0, MVT::i32), Chain };
  1459. SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
  1460. MVT::i32, MVT::Other, Ops);
  1461. transferMemOperands(N, New);
  1462. ReplaceNode(N, New);
  1463. return true;
  1464. }
  1465. bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
  1466. LoadSDNode *LD = cast<LoadSDNode>(N);
  1467. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1468. if (AM == ISD::UNINDEXED)
  1469. return false;
  1470. EVT LoadedVT = LD->getMemoryVT();
  1471. bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
  1472. SDValue Offset;
  1473. bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1474. unsigned Opcode = 0;
  1475. bool Match = false;
  1476. if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
  1477. switch (LoadedVT.getSimpleVT().SimpleTy) {
  1478. case MVT::i32:
  1479. Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
  1480. break;
  1481. case MVT::i16:
  1482. if (isSExtLd)
  1483. Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
  1484. else
  1485. Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
  1486. break;
  1487. case MVT::i8:
  1488. case MVT::i1:
  1489. if (isSExtLd)
  1490. Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
  1491. else
  1492. Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
  1493. break;
  1494. default:
  1495. return false;
  1496. }
  1497. Match = true;
  1498. }
  1499. if (Match) {
  1500. SDValue Chain = LD->getChain();
  1501. SDValue Base = LD->getBasePtr();
  1502. SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
  1503. CurDAG->getRegister(0, MVT::i32), Chain };
  1504. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
  1505. MVT::Other, Ops);
  1506. transferMemOperands(N, New);
  1507. ReplaceNode(N, New);
  1508. return true;
  1509. }
  1510. return false;
  1511. }
  1512. bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
  1513. EVT LoadedVT;
  1514. unsigned Opcode = 0;
  1515. bool isSExtLd, isPre;
  1516. Align Alignment;
  1517. ARMVCC::VPTCodes Pred;
  1518. SDValue PredReg;
  1519. SDValue Chain, Base, Offset;
  1520. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  1521. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1522. if (AM == ISD::UNINDEXED)
  1523. return false;
  1524. LoadedVT = LD->getMemoryVT();
  1525. if (!LoadedVT.isVector())
  1526. return false;
  1527. Chain = LD->getChain();
  1528. Base = LD->getBasePtr();
  1529. Offset = LD->getOffset();
  1530. Alignment = LD->getAlign();
  1531. isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
  1532. isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1533. Pred = ARMVCC::None;
  1534. PredReg = CurDAG->getRegister(0, MVT::i32);
  1535. } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
  1536. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1537. if (AM == ISD::UNINDEXED)
  1538. return false;
  1539. LoadedVT = LD->getMemoryVT();
  1540. if (!LoadedVT.isVector())
  1541. return false;
  1542. Chain = LD->getChain();
  1543. Base = LD->getBasePtr();
  1544. Offset = LD->getOffset();
  1545. Alignment = LD->getAlign();
  1546. isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
  1547. isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1548. Pred = ARMVCC::Then;
  1549. PredReg = LD->getMask();
  1550. } else
  1551. llvm_unreachable("Expected a Load or a Masked Load!");
  1552. // We allow LE non-masked loads to change the type (for example use a vldrb.8
  1553. // as opposed to a vldrw.32). This can allow extra addressing modes or
  1554. // alignments for what is otherwise an equivalent instruction.
  1555. bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
  1556. SDValue NewOffset;
  1557. if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
  1558. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
  1559. if (isSExtLd)
  1560. Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
  1561. else
  1562. Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
  1563. } else if (LoadedVT == MVT::v8i8 &&
  1564. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
  1565. if (isSExtLd)
  1566. Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
  1567. else
  1568. Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
  1569. } else if (LoadedVT == MVT::v4i8 &&
  1570. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
  1571. if (isSExtLd)
  1572. Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
  1573. else
  1574. Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
  1575. } else if (Alignment >= Align(4) &&
  1576. (CanChangeType || LoadedVT == MVT::v4i32 ||
  1577. LoadedVT == MVT::v4f32) &&
  1578. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
  1579. Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
  1580. else if (Alignment >= Align(2) &&
  1581. (CanChangeType || LoadedVT == MVT::v8i16 ||
  1582. LoadedVT == MVT::v8f16) &&
  1583. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
  1584. Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
  1585. else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
  1586. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
  1587. Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
  1588. else
  1589. return false;
  1590. SDValue Ops[] = {Base,
  1591. NewOffset,
  1592. CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
  1593. PredReg,
  1594. CurDAG->getRegister(0, MVT::i32), // tp_reg
  1595. Chain};
  1596. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
  1597. N->getValueType(0), MVT::Other, Ops);
  1598. transferMemOperands(N, New);
  1599. ReplaceUses(SDValue(N, 0), SDValue(New, 1));
  1600. ReplaceUses(SDValue(N, 1), SDValue(New, 0));
  1601. ReplaceUses(SDValue(N, 2), SDValue(New, 2));
  1602. CurDAG->RemoveDeadNode(N);
  1603. return true;
  1604. }
  1605. /// Form a GPRPair pseudo register from a pair of GPR regs.
  1606. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
  1607. SDLoc dl(V0.getNode());
  1608. SDValue RegClass =
  1609. CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
  1610. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
  1611. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
  1612. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1613. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1614. }
  1615. /// Form a D register from a pair of S registers.
  1616. SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
  1617. SDLoc dl(V0.getNode());
  1618. SDValue RegClass =
  1619. CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
  1620. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
  1621. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
  1622. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1623. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1624. }
  1625. /// Form a quad register from a pair of D registers.
  1626. SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
  1627. SDLoc dl(V0.getNode());
  1628. SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
  1629. MVT::i32);
  1630. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
  1631. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
  1632. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1633. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1634. }
  1635. /// Form 4 consecutive D registers from a pair of Q registers.
  1636. SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
  1637. SDLoc dl(V0.getNode());
  1638. SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
  1639. MVT::i32);
  1640. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
  1641. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
  1642. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1643. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1644. }
  1645. /// Form 4 consecutive S registers.
  1646. SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
  1647. SDValue V2, SDValue V3) {
  1648. SDLoc dl(V0.getNode());
  1649. SDValue RegClass =
  1650. CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
  1651. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
  1652. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
  1653. SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
  1654. SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
  1655. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
  1656. V2, SubReg2, V3, SubReg3 };
  1657. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1658. }
  1659. /// Form 4 consecutive D registers.
  1660. SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
  1661. SDValue V2, SDValue V3) {
  1662. SDLoc dl(V0.getNode());
  1663. SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
  1664. MVT::i32);
  1665. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
  1666. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
  1667. SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
  1668. SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
  1669. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
  1670. V2, SubReg2, V3, SubReg3 };
  1671. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1672. }
  1673. /// Form 4 consecutive Q registers.
  1674. SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
  1675. SDValue V2, SDValue V3) {
  1676. SDLoc dl(V0.getNode());
  1677. SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
  1678. MVT::i32);
  1679. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
  1680. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
  1681. SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
  1682. SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
  1683. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
  1684. V2, SubReg2, V3, SubReg3 };
  1685. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1686. }
  1687. /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
  1688. /// of a NEON VLD or VST instruction. The supported values depend on the
  1689. /// number of registers being loaded.
  1690. SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
  1691. unsigned NumVecs, bool is64BitVector) {
  1692. unsigned NumRegs = NumVecs;
  1693. if (!is64BitVector && NumVecs < 3)
  1694. NumRegs *= 2;
  1695. unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
  1696. if (Alignment >= 32 && NumRegs == 4)
  1697. Alignment = 32;
  1698. else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
  1699. Alignment = 16;
  1700. else if (Alignment >= 8)
  1701. Alignment = 8;
  1702. else
  1703. Alignment = 0;
  1704. return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
  1705. }
  1706. static bool isVLDfixed(unsigned Opc)
  1707. {
  1708. switch (Opc) {
  1709. default: return false;
  1710. case ARM::VLD1d8wb_fixed : return true;
  1711. case ARM::VLD1d16wb_fixed : return true;
  1712. case ARM::VLD1d64Qwb_fixed : return true;
  1713. case ARM::VLD1d32wb_fixed : return true;
  1714. case ARM::VLD1d64wb_fixed : return true;
  1715. case ARM::VLD1d8TPseudoWB_fixed : return true;
  1716. case ARM::VLD1d16TPseudoWB_fixed : return true;
  1717. case ARM::VLD1d32TPseudoWB_fixed : return true;
  1718. case ARM::VLD1d64TPseudoWB_fixed : return true;
  1719. case ARM::VLD1d8QPseudoWB_fixed : return true;
  1720. case ARM::VLD1d16QPseudoWB_fixed : return true;
  1721. case ARM::VLD1d32QPseudoWB_fixed : return true;
  1722. case ARM::VLD1d64QPseudoWB_fixed : return true;
  1723. case ARM::VLD1q8wb_fixed : return true;
  1724. case ARM::VLD1q16wb_fixed : return true;
  1725. case ARM::VLD1q32wb_fixed : return true;
  1726. case ARM::VLD1q64wb_fixed : return true;
  1727. case ARM::VLD1DUPd8wb_fixed : return true;
  1728. case ARM::VLD1DUPd16wb_fixed : return true;
  1729. case ARM::VLD1DUPd32wb_fixed : return true;
  1730. case ARM::VLD1DUPq8wb_fixed : return true;
  1731. case ARM::VLD1DUPq16wb_fixed : return true;
  1732. case ARM::VLD1DUPq32wb_fixed : return true;
  1733. case ARM::VLD2d8wb_fixed : return true;
  1734. case ARM::VLD2d16wb_fixed : return true;
  1735. case ARM::VLD2d32wb_fixed : return true;
  1736. case ARM::VLD2q8PseudoWB_fixed : return true;
  1737. case ARM::VLD2q16PseudoWB_fixed : return true;
  1738. case ARM::VLD2q32PseudoWB_fixed : return true;
  1739. case ARM::VLD2DUPd8wb_fixed : return true;
  1740. case ARM::VLD2DUPd16wb_fixed : return true;
  1741. case ARM::VLD2DUPd32wb_fixed : return true;
  1742. case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
  1743. case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
  1744. case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
  1745. }
  1746. }
  1747. static bool isVSTfixed(unsigned Opc)
  1748. {
  1749. switch (Opc) {
  1750. default: return false;
  1751. case ARM::VST1d8wb_fixed : return true;
  1752. case ARM::VST1d16wb_fixed : return true;
  1753. case ARM::VST1d32wb_fixed : return true;
  1754. case ARM::VST1d64wb_fixed : return true;
  1755. case ARM::VST1q8wb_fixed : return true;
  1756. case ARM::VST1q16wb_fixed : return true;
  1757. case ARM::VST1q32wb_fixed : return true;
  1758. case ARM::VST1q64wb_fixed : return true;
  1759. case ARM::VST1d8TPseudoWB_fixed : return true;
  1760. case ARM::VST1d16TPseudoWB_fixed : return true;
  1761. case ARM::VST1d32TPseudoWB_fixed : return true;
  1762. case ARM::VST1d64TPseudoWB_fixed : return true;
  1763. case ARM::VST1d8QPseudoWB_fixed : return true;
  1764. case ARM::VST1d16QPseudoWB_fixed : return true;
  1765. case ARM::VST1d32QPseudoWB_fixed : return true;
  1766. case ARM::VST1d64QPseudoWB_fixed : return true;
  1767. case ARM::VST2d8wb_fixed : return true;
  1768. case ARM::VST2d16wb_fixed : return true;
  1769. case ARM::VST2d32wb_fixed : return true;
  1770. case ARM::VST2q8PseudoWB_fixed : return true;
  1771. case ARM::VST2q16PseudoWB_fixed : return true;
  1772. case ARM::VST2q32PseudoWB_fixed : return true;
  1773. }
  1774. }
  1775. // Get the register stride update opcode of a VLD/VST instruction that
  1776. // is otherwise equivalent to the given fixed stride updating instruction.
  1777. static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
  1778. assert((isVLDfixed(Opc) || isVSTfixed(Opc))
  1779. && "Incorrect fixed stride updating instruction.");
  1780. switch (Opc) {
  1781. default: break;
  1782. case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
  1783. case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
  1784. case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
  1785. case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
  1786. case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
  1787. case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
  1788. case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
  1789. case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
  1790. case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
  1791. case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
  1792. case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
  1793. case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
  1794. case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
  1795. case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
  1796. case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
  1797. case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
  1798. case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
  1799. case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
  1800. case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
  1801. case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
  1802. case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
  1803. case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
  1804. case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
  1805. case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
  1806. case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
  1807. case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
  1808. case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
  1809. case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
  1810. case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
  1811. case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
  1812. case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
  1813. case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
  1814. case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
  1815. case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
  1816. case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
  1817. case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
  1818. case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
  1819. case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
  1820. case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
  1821. case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
  1822. case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
  1823. case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
  1824. case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
  1825. case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
  1826. case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
  1827. case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
  1828. case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
  1829. case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
  1830. case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
  1831. case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
  1832. case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
  1833. case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
  1834. case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
  1835. case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
  1836. case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
  1837. case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
  1838. case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
  1839. case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
  1840. }
  1841. return Opc; // If not one we handle, return it unchanged.
  1842. }
  1843. /// Returns true if the given increment is a Constant known to be equal to the
  1844. /// access size performed by a NEON load/store. This means the "[rN]!" form can
  1845. /// be used.
  1846. static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
  1847. auto C = dyn_cast<ConstantSDNode>(Inc);
  1848. return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
  1849. }
  1850. void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
  1851. const uint16_t *DOpcodes,
  1852. const uint16_t *QOpcodes0,
  1853. const uint16_t *QOpcodes1) {
  1854. assert(Subtarget->hasNEON());
  1855. assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
  1856. SDLoc dl(N);
  1857. SDValue MemAddr, Align;
  1858. bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
  1859. // nodes are not intrinsics.
  1860. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  1861. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  1862. return;
  1863. SDValue Chain = N->getOperand(0);
  1864. EVT VT = N->getValueType(0);
  1865. bool is64BitVector = VT.is64BitVector();
  1866. Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
  1867. unsigned OpcodeIndex;
  1868. switch (VT.getSimpleVT().SimpleTy) {
  1869. default: llvm_unreachable("unhandled vld type");
  1870. // Double-register operations:
  1871. case MVT::v8i8: OpcodeIndex = 0; break;
  1872. case MVT::v4f16:
  1873. case MVT::v4bf16:
  1874. case MVT::v4i16: OpcodeIndex = 1; break;
  1875. case MVT::v2f32:
  1876. case MVT::v2i32: OpcodeIndex = 2; break;
  1877. case MVT::v1i64: OpcodeIndex = 3; break;
  1878. // Quad-register operations:
  1879. case MVT::v16i8: OpcodeIndex = 0; break;
  1880. case MVT::v8f16:
  1881. case MVT::v8bf16:
  1882. case MVT::v8i16: OpcodeIndex = 1; break;
  1883. case MVT::v4f32:
  1884. case MVT::v4i32: OpcodeIndex = 2; break;
  1885. case MVT::v2f64:
  1886. case MVT::v2i64: OpcodeIndex = 3; break;
  1887. }
  1888. EVT ResTy;
  1889. if (NumVecs == 1)
  1890. ResTy = VT;
  1891. else {
  1892. unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
  1893. if (!is64BitVector)
  1894. ResTyElts *= 2;
  1895. ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
  1896. }
  1897. std::vector<EVT> ResTys;
  1898. ResTys.push_back(ResTy);
  1899. if (isUpdating)
  1900. ResTys.push_back(MVT::i32);
  1901. ResTys.push_back(MVT::Other);
  1902. SDValue Pred = getAL(CurDAG, dl);
  1903. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  1904. SDNode *VLd;
  1905. SmallVector<SDValue, 7> Ops;
  1906. // Double registers and VLD1/VLD2 quad registers are directly supported.
  1907. if (is64BitVector || NumVecs <= 2) {
  1908. unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
  1909. QOpcodes0[OpcodeIndex]);
  1910. Ops.push_back(MemAddr);
  1911. Ops.push_back(Align);
  1912. if (isUpdating) {
  1913. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  1914. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
  1915. if (!IsImmUpdate) {
  1916. // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
  1917. // check for the opcode rather than the number of vector elements.
  1918. if (isVLDfixed(Opc))
  1919. Opc = getVLDSTRegisterUpdateOpcode(Opc);
  1920. Ops.push_back(Inc);
  1921. // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
  1922. // the operands if not such an opcode.
  1923. } else if (!isVLDfixed(Opc))
  1924. Ops.push_back(Reg0);
  1925. }
  1926. Ops.push_back(Pred);
  1927. Ops.push_back(Reg0);
  1928. Ops.push_back(Chain);
  1929. VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1930. } else {
  1931. // Otherwise, quad registers are loaded with two separate instructions,
  1932. // where one loads the even registers and the other loads the odd registers.
  1933. EVT AddrTy = MemAddr.getValueType();
  1934. // Load the even subregs. This is always an updating load, so that it
  1935. // provides the address to the second load for the odd subregs.
  1936. SDValue ImplDef =
  1937. SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
  1938. const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
  1939. SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
  1940. ResTy, AddrTy, MVT::Other, OpsA);
  1941. Chain = SDValue(VLdA, 2);
  1942. // Load the odd subregs.
  1943. Ops.push_back(SDValue(VLdA, 1));
  1944. Ops.push_back(Align);
  1945. if (isUpdating) {
  1946. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  1947. assert(isa<ConstantSDNode>(Inc.getNode()) &&
  1948. "only constant post-increment update allowed for VLD3/4");
  1949. (void)Inc;
  1950. Ops.push_back(Reg0);
  1951. }
  1952. Ops.push_back(SDValue(VLdA, 0));
  1953. Ops.push_back(Pred);
  1954. Ops.push_back(Reg0);
  1955. Ops.push_back(Chain);
  1956. VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
  1957. }
  1958. // Transfer memoperands.
  1959. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  1960. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
  1961. if (NumVecs == 1) {
  1962. ReplaceNode(N, VLd);
  1963. return;
  1964. }
  1965. // Extract out the subregisters.
  1966. SDValue SuperReg = SDValue(VLd, 0);
  1967. static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
  1968. ARM::qsub_3 == ARM::qsub_0 + 3,
  1969. "Unexpected subreg numbering");
  1970. unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
  1971. for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
  1972. ReplaceUses(SDValue(N, Vec),
  1973. CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
  1974. ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
  1975. if (isUpdating)
  1976. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
  1977. CurDAG->RemoveDeadNode(N);
  1978. }
  1979. void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
  1980. const uint16_t *DOpcodes,
  1981. const uint16_t *QOpcodes0,
  1982. const uint16_t *QOpcodes1) {
  1983. assert(Subtarget->hasNEON());
  1984. assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
  1985. SDLoc dl(N);
  1986. SDValue MemAddr, Align;
  1987. bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
  1988. // nodes are not intrinsics.
  1989. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  1990. unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
  1991. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  1992. return;
  1993. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  1994. SDValue Chain = N->getOperand(0);
  1995. EVT VT = N->getOperand(Vec0Idx).getValueType();
  1996. bool is64BitVector = VT.is64BitVector();
  1997. Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
  1998. unsigned OpcodeIndex;
  1999. switch (VT.getSimpleVT().SimpleTy) {
  2000. default: llvm_unreachable("unhandled vst type");
  2001. // Double-register operations:
  2002. case MVT::v8i8: OpcodeIndex = 0; break;
  2003. case MVT::v4f16:
  2004. case MVT::v4bf16:
  2005. case MVT::v4i16: OpcodeIndex = 1; break;
  2006. case MVT::v2f32:
  2007. case MVT::v2i32: OpcodeIndex = 2; break;
  2008. case MVT::v1i64: OpcodeIndex = 3; break;
  2009. // Quad-register operations:
  2010. case MVT::v16i8: OpcodeIndex = 0; break;
  2011. case MVT::v8f16:
  2012. case MVT::v8bf16:
  2013. case MVT::v8i16: OpcodeIndex = 1; break;
  2014. case MVT::v4f32:
  2015. case MVT::v4i32: OpcodeIndex = 2; break;
  2016. case MVT::v2f64:
  2017. case MVT::v2i64: OpcodeIndex = 3; break;
  2018. }
  2019. std::vector<EVT> ResTys;
  2020. if (isUpdating)
  2021. ResTys.push_back(MVT::i32);
  2022. ResTys.push_back(MVT::Other);
  2023. SDValue Pred = getAL(CurDAG, dl);
  2024. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2025. SmallVector<SDValue, 7> Ops;
  2026. // Double registers and VST1/VST2 quad registers are directly supported.
  2027. if (is64BitVector || NumVecs <= 2) {
  2028. SDValue SrcReg;
  2029. if (NumVecs == 1) {
  2030. SrcReg = N->getOperand(Vec0Idx);
  2031. } else if (is64BitVector) {
  2032. // Form a REG_SEQUENCE to force register allocation.
  2033. SDValue V0 = N->getOperand(Vec0Idx + 0);
  2034. SDValue V1 = N->getOperand(Vec0Idx + 1);
  2035. if (NumVecs == 2)
  2036. SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
  2037. else {
  2038. SDValue V2 = N->getOperand(Vec0Idx + 2);
  2039. // If it's a vst3, form a quad D-register and leave the last part as
  2040. // an undef.
  2041. SDValue V3 = (NumVecs == 3)
  2042. ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
  2043. : N->getOperand(Vec0Idx + 3);
  2044. SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
  2045. }
  2046. } else {
  2047. // Form a QQ register.
  2048. SDValue Q0 = N->getOperand(Vec0Idx);
  2049. SDValue Q1 = N->getOperand(Vec0Idx + 1);
  2050. SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
  2051. }
  2052. unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
  2053. QOpcodes0[OpcodeIndex]);
  2054. Ops.push_back(MemAddr);
  2055. Ops.push_back(Align);
  2056. if (isUpdating) {
  2057. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  2058. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
  2059. if (!IsImmUpdate) {
  2060. // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
  2061. // check for the opcode rather than the number of vector elements.
  2062. if (isVSTfixed(Opc))
  2063. Opc = getVLDSTRegisterUpdateOpcode(Opc);
  2064. Ops.push_back(Inc);
  2065. }
  2066. // VST1/VST2 fixed increment does not need Reg0 so only include it in
  2067. // the operands if not such an opcode.
  2068. else if (!isVSTfixed(Opc))
  2069. Ops.push_back(Reg0);
  2070. }
  2071. Ops.push_back(SrcReg);
  2072. Ops.push_back(Pred);
  2073. Ops.push_back(Reg0);
  2074. Ops.push_back(Chain);
  2075. SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  2076. // Transfer memoperands.
  2077. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
  2078. ReplaceNode(N, VSt);
  2079. return;
  2080. }
  2081. // Otherwise, quad registers are stored with two separate instructions,
  2082. // where one stores the even registers and the other stores the odd registers.
  2083. // Form the QQQQ REG_SEQUENCE.
  2084. SDValue V0 = N->getOperand(Vec0Idx + 0);
  2085. SDValue V1 = N->getOperand(Vec0Idx + 1);
  2086. SDValue V2 = N->getOperand(Vec0Idx + 2);
  2087. SDValue V3 = (NumVecs == 3)
  2088. ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
  2089. : N->getOperand(Vec0Idx + 3);
  2090. SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
  2091. // Store the even D registers. This is always an updating store, so that it
  2092. // provides the address to the second store for the odd subregs.
  2093. const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
  2094. SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
  2095. MemAddr.getValueType(),
  2096. MVT::Other, OpsA);
  2097. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
  2098. Chain = SDValue(VStA, 1);
  2099. // Store the odd D registers.
  2100. Ops.push_back(SDValue(VStA, 0));
  2101. Ops.push_back(Align);
  2102. if (isUpdating) {
  2103. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  2104. assert(isa<ConstantSDNode>(Inc.getNode()) &&
  2105. "only constant post-increment update allowed for VST3/4");
  2106. (void)Inc;
  2107. Ops.push_back(Reg0);
  2108. }
  2109. Ops.push_back(RegSeq);
  2110. Ops.push_back(Pred);
  2111. Ops.push_back(Reg0);
  2112. Ops.push_back(Chain);
  2113. SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
  2114. Ops);
  2115. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
  2116. ReplaceNode(N, VStB);
  2117. }
  2118. void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
  2119. unsigned NumVecs,
  2120. const uint16_t *DOpcodes,
  2121. const uint16_t *QOpcodes) {
  2122. assert(Subtarget->hasNEON());
  2123. assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
  2124. SDLoc dl(N);
  2125. SDValue MemAddr, Align;
  2126. bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
  2127. // nodes are not intrinsics.
  2128. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  2129. unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
  2130. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  2131. return;
  2132. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  2133. SDValue Chain = N->getOperand(0);
  2134. unsigned Lane =
  2135. cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
  2136. EVT VT = N->getOperand(Vec0Idx).getValueType();
  2137. bool is64BitVector = VT.is64BitVector();
  2138. unsigned Alignment = 0;
  2139. if (NumVecs != 3) {
  2140. Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
  2141. unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
  2142. if (Alignment > NumBytes)
  2143. Alignment = NumBytes;
  2144. if (Alignment < 8 && Alignment < NumBytes)
  2145. Alignment = 0;
  2146. // Alignment must be a power of two; make sure of that.
  2147. Alignment = (Alignment & -Alignment);
  2148. if (Alignment == 1)
  2149. Alignment = 0;
  2150. }
  2151. Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
  2152. unsigned OpcodeIndex;
  2153. switch (VT.getSimpleVT().SimpleTy) {
  2154. default: llvm_unreachable("unhandled vld/vst lane type");
  2155. // Double-register operations:
  2156. case MVT::v8i8: OpcodeIndex = 0; break;
  2157. case MVT::v4f16:
  2158. case MVT::v4bf16:
  2159. case MVT::v4i16: OpcodeIndex = 1; break;
  2160. case MVT::v2f32:
  2161. case MVT::v2i32: OpcodeIndex = 2; break;
  2162. // Quad-register operations:
  2163. case MVT::v8f16:
  2164. case MVT::v8bf16:
  2165. case MVT::v8i16: OpcodeIndex = 0; break;
  2166. case MVT::v4f32:
  2167. case MVT::v4i32: OpcodeIndex = 1; break;
  2168. }
  2169. std::vector<EVT> ResTys;
  2170. if (IsLoad) {
  2171. unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
  2172. if (!is64BitVector)
  2173. ResTyElts *= 2;
  2174. ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
  2175. MVT::i64, ResTyElts));
  2176. }
  2177. if (isUpdating)
  2178. ResTys.push_back(MVT::i32);
  2179. ResTys.push_back(MVT::Other);
  2180. SDValue Pred = getAL(CurDAG, dl);
  2181. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2182. SmallVector<SDValue, 8> Ops;
  2183. Ops.push_back(MemAddr);
  2184. Ops.push_back(Align);
  2185. if (isUpdating) {
  2186. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  2187. bool IsImmUpdate =
  2188. isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
  2189. Ops.push_back(IsImmUpdate ? Reg0 : Inc);
  2190. }
  2191. SDValue SuperReg;
  2192. SDValue V0 = N->getOperand(Vec0Idx + 0);
  2193. SDValue V1 = N->getOperand(Vec0Idx + 1);
  2194. if (NumVecs == 2) {
  2195. if (is64BitVector)
  2196. SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
  2197. else
  2198. SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
  2199. } else {
  2200. SDValue V2 = N->getOperand(Vec0Idx + 2);
  2201. SDValue V3 = (NumVecs == 3)
  2202. ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
  2203. : N->getOperand(Vec0Idx + 3);
  2204. if (is64BitVector)
  2205. SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
  2206. else
  2207. SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
  2208. }
  2209. Ops.push_back(SuperReg);
  2210. Ops.push_back(getI32Imm(Lane, dl));
  2211. Ops.push_back(Pred);
  2212. Ops.push_back(Reg0);
  2213. Ops.push_back(Chain);
  2214. unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
  2215. QOpcodes[OpcodeIndex]);
  2216. SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  2217. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
  2218. if (!IsLoad) {
  2219. ReplaceNode(N, VLdLn);
  2220. return;
  2221. }
  2222. // Extract the subregisters.
  2223. SuperReg = SDValue(VLdLn, 0);
  2224. static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
  2225. ARM::qsub_3 == ARM::qsub_0 + 3,
  2226. "Unexpected subreg numbering");
  2227. unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
  2228. for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
  2229. ReplaceUses(SDValue(N, Vec),
  2230. CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
  2231. ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
  2232. if (isUpdating)
  2233. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
  2234. CurDAG->RemoveDeadNode(N);
  2235. }
  2236. template <typename SDValueVector>
  2237. void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  2238. SDValue PredicateMask) {
  2239. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
  2240. Ops.push_back(PredicateMask);
  2241. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2242. }
  2243. template <typename SDValueVector>
  2244. void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  2245. SDValue PredicateMask,
  2246. SDValue Inactive) {
  2247. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
  2248. Ops.push_back(PredicateMask);
  2249. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2250. Ops.push_back(Inactive);
  2251. }
  2252. template <typename SDValueVector>
  2253. void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
  2254. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
  2255. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  2256. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2257. }
  2258. template <typename SDValueVector>
  2259. void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  2260. EVT InactiveTy) {
  2261. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
  2262. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  2263. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2264. Ops.push_back(SDValue(
  2265. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
  2266. }
  2267. void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
  2268. bool Predicated) {
  2269. SDLoc Loc(N);
  2270. SmallVector<SDValue, 8> Ops;
  2271. uint16_t Opcode;
  2272. switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
  2273. case 32:
  2274. Opcode = Opcodes[0];
  2275. break;
  2276. case 64:
  2277. Opcode = Opcodes[1];
  2278. break;
  2279. default:
  2280. llvm_unreachable("bad vector element size in SelectMVE_WB");
  2281. }
  2282. Ops.push_back(N->getOperand(2)); // vector of base addresses
  2283. int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2284. Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
  2285. if (Predicated)
  2286. AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
  2287. else
  2288. AddEmptyMVEPredicateToOps(Ops, Loc);
  2289. Ops.push_back(N->getOperand(0)); // chain
  2290. SmallVector<EVT, 8> VTs;
  2291. VTs.push_back(N->getValueType(1));
  2292. VTs.push_back(N->getValueType(0));
  2293. VTs.push_back(N->getValueType(2));
  2294. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
  2295. ReplaceUses(SDValue(N, 0), SDValue(New, 1));
  2296. ReplaceUses(SDValue(N, 1), SDValue(New, 0));
  2297. ReplaceUses(SDValue(N, 2), SDValue(New, 2));
  2298. transferMemOperands(N, New);
  2299. CurDAG->RemoveDeadNode(N);
  2300. }
  2301. void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
  2302. bool Immediate,
  2303. bool HasSaturationOperand) {
  2304. SDLoc Loc(N);
  2305. SmallVector<SDValue, 8> Ops;
  2306. // Two 32-bit halves of the value to be shifted
  2307. Ops.push_back(N->getOperand(1));
  2308. Ops.push_back(N->getOperand(2));
  2309. // The shift count
  2310. if (Immediate) {
  2311. int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2312. Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
  2313. } else {
  2314. Ops.push_back(N->getOperand(3));
  2315. }
  2316. // The immediate saturation operand, if any
  2317. if (HasSaturationOperand) {
  2318. int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
  2319. int SatBit = (SatOp == 64 ? 0 : 1);
  2320. Ops.push_back(getI32Imm(SatBit, Loc));
  2321. }
  2322. // MVE scalar shifts are IT-predicable, so include the standard
  2323. // predicate arguments.
  2324. Ops.push_back(getAL(CurDAG, Loc));
  2325. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  2326. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
  2327. }
  2328. void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
  2329. uint16_t OpcodeWithNoCarry,
  2330. bool Add, bool Predicated) {
  2331. SDLoc Loc(N);
  2332. SmallVector<SDValue, 8> Ops;
  2333. uint16_t Opcode;
  2334. unsigned FirstInputOp = Predicated ? 2 : 1;
  2335. // Two input vectors and the input carry flag
  2336. Ops.push_back(N->getOperand(FirstInputOp));
  2337. Ops.push_back(N->getOperand(FirstInputOp + 1));
  2338. SDValue CarryIn = N->getOperand(FirstInputOp + 2);
  2339. ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
  2340. uint32_t CarryMask = 1 << 29;
  2341. uint32_t CarryExpected = Add ? 0 : CarryMask;
  2342. if (CarryInConstant &&
  2343. (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
  2344. Opcode = OpcodeWithNoCarry;
  2345. } else {
  2346. Ops.push_back(CarryIn);
  2347. Opcode = OpcodeWithCarry;
  2348. }
  2349. if (Predicated)
  2350. AddMVEPredicateToOps(Ops, Loc,
  2351. N->getOperand(FirstInputOp + 3), // predicate
  2352. N->getOperand(FirstInputOp - 1)); // inactive
  2353. else
  2354. AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
  2355. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
  2356. }
  2357. void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
  2358. SDLoc Loc(N);
  2359. SmallVector<SDValue, 8> Ops;
  2360. // One vector input, followed by a 32-bit word of bits to shift in
  2361. // and then an immediate shift count
  2362. Ops.push_back(N->getOperand(1));
  2363. Ops.push_back(N->getOperand(2));
  2364. int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2365. Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
  2366. if (Predicated)
  2367. AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
  2368. else
  2369. AddEmptyMVEPredicateToOps(Ops, Loc);
  2370. CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops));
  2371. }
  2372. static bool SDValueToConstBool(SDValue SDVal) {
  2373. assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
  2374. ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
  2375. uint64_t Value = SDValConstant->getZExtValue();
  2376. assert((Value == 0 || Value == 1) && "expected value 0 or 1");
  2377. return Value;
  2378. }
  2379. void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
  2380. const uint16_t *OpcodesS,
  2381. const uint16_t *OpcodesU,
  2382. size_t Stride, size_t TySize) {
  2383. assert(TySize < Stride && "Invalid TySize");
  2384. bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
  2385. bool IsSub = SDValueToConstBool(N->getOperand(2));
  2386. bool IsExchange = SDValueToConstBool(N->getOperand(3));
  2387. if (IsUnsigned) {
  2388. assert(!IsSub &&
  2389. "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
  2390. assert(!IsExchange &&
  2391. "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
  2392. }
  2393. auto OpIsZero = [N](size_t OpNo) {
  2394. if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
  2395. if (OpConst->getZExtValue() == 0)
  2396. return true;
  2397. return false;
  2398. };
  2399. // If the input accumulator value is not zero, select an instruction with
  2400. // accumulator, otherwise select an instruction without accumulator
  2401. bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
  2402. const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
  2403. if (IsSub)
  2404. Opcodes += 4 * Stride;
  2405. if (IsExchange)
  2406. Opcodes += 2 * Stride;
  2407. if (IsAccum)
  2408. Opcodes += Stride;
  2409. uint16_t Opcode = Opcodes[TySize];
  2410. SDLoc Loc(N);
  2411. SmallVector<SDValue, 8> Ops;
  2412. // Push the accumulator operands, if they are used
  2413. if (IsAccum) {
  2414. Ops.push_back(N->getOperand(4));
  2415. Ops.push_back(N->getOperand(5));
  2416. }
  2417. // Push the two vector operands
  2418. Ops.push_back(N->getOperand(6));
  2419. Ops.push_back(N->getOperand(7));
  2420. if (Predicated)
  2421. AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
  2422. else
  2423. AddEmptyMVEPredicateToOps(Ops, Loc);
  2424. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
  2425. }
  2426. void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
  2427. const uint16_t *OpcodesS,
  2428. const uint16_t *OpcodesU) {
  2429. EVT VecTy = N->getOperand(6).getValueType();
  2430. size_t SizeIndex;
  2431. switch (VecTy.getVectorElementType().getSizeInBits()) {
  2432. case 16:
  2433. SizeIndex = 0;
  2434. break;
  2435. case 32:
  2436. SizeIndex = 1;
  2437. break;
  2438. default:
  2439. llvm_unreachable("bad vector element size");
  2440. }
  2441. SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
  2442. }
  2443. void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
  2444. const uint16_t *OpcodesS,
  2445. const uint16_t *OpcodesU) {
  2446. assert(
  2447. N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
  2448. 32 &&
  2449. "bad vector element size");
  2450. SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
  2451. }
  2452. void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
  2453. const uint16_t *const *Opcodes,
  2454. bool HasWriteback) {
  2455. EVT VT = N->getValueType(0);
  2456. SDLoc Loc(N);
  2457. const uint16_t *OurOpcodes;
  2458. switch (VT.getVectorElementType().getSizeInBits()) {
  2459. case 8:
  2460. OurOpcodes = Opcodes[0];
  2461. break;
  2462. case 16:
  2463. OurOpcodes = Opcodes[1];
  2464. break;
  2465. case 32:
  2466. OurOpcodes = Opcodes[2];
  2467. break;
  2468. default:
  2469. llvm_unreachable("bad vector element size in SelectMVE_VLD");
  2470. }
  2471. EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
  2472. SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
  2473. unsigned PtrOperand = HasWriteback ? 1 : 2;
  2474. auto Data = SDValue(
  2475. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
  2476. SDValue Chain = N->getOperand(0);
  2477. // Add a MVE_VLDn instruction for each Vec, except the last
  2478. for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
  2479. SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
  2480. auto LoadInst =
  2481. CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
  2482. Data = SDValue(LoadInst, 0);
  2483. Chain = SDValue(LoadInst, 1);
  2484. transferMemOperands(N, LoadInst);
  2485. }
  2486. // The last may need a writeback on it
  2487. if (HasWriteback)
  2488. ResultTys = {DataTy, MVT::i32, MVT::Other};
  2489. SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
  2490. auto LoadInst =
  2491. CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
  2492. transferMemOperands(N, LoadInst);
  2493. unsigned i;
  2494. for (i = 0; i < NumVecs; i++)
  2495. ReplaceUses(SDValue(N, i),
  2496. CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
  2497. SDValue(LoadInst, 0)));
  2498. if (HasWriteback)
  2499. ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
  2500. ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
  2501. CurDAG->RemoveDeadNode(N);
  2502. }
  2503. void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
  2504. bool Wrapping, bool Predicated) {
  2505. EVT VT = N->getValueType(0);
  2506. SDLoc Loc(N);
  2507. uint16_t Opcode;
  2508. switch (VT.getScalarSizeInBits()) {
  2509. case 8:
  2510. Opcode = Opcodes[0];
  2511. break;
  2512. case 16:
  2513. Opcode = Opcodes[1];
  2514. break;
  2515. case 32:
  2516. Opcode = Opcodes[2];
  2517. break;
  2518. default:
  2519. llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
  2520. }
  2521. SmallVector<SDValue, 8> Ops;
  2522. unsigned OpIdx = 1;
  2523. SDValue Inactive;
  2524. if (Predicated)
  2525. Inactive = N->getOperand(OpIdx++);
  2526. Ops.push_back(N->getOperand(OpIdx++)); // base
  2527. if (Wrapping)
  2528. Ops.push_back(N->getOperand(OpIdx++)); // limit
  2529. SDValue ImmOp = N->getOperand(OpIdx++); // step
  2530. int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
  2531. Ops.push_back(getI32Imm(ImmValue, Loc));
  2532. if (Predicated)
  2533. AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
  2534. else
  2535. AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
  2536. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
  2537. }
  2538. void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
  2539. size_t NumExtraOps, bool HasAccum) {
  2540. bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
  2541. SDLoc Loc(N);
  2542. SmallVector<SDValue, 8> Ops;
  2543. unsigned OpIdx = 1;
  2544. // Convert and append the immediate operand designating the coprocessor.
  2545. SDValue ImmCorpoc = N->getOperand(OpIdx++);
  2546. uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
  2547. Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
  2548. // For accumulating variants copy the low and high order parts of the
  2549. // accumulator into a register pair and add it to the operand vector.
  2550. if (HasAccum) {
  2551. SDValue AccLo = N->getOperand(OpIdx++);
  2552. SDValue AccHi = N->getOperand(OpIdx++);
  2553. if (IsBigEndian)
  2554. std::swap(AccLo, AccHi);
  2555. Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
  2556. }
  2557. // Copy extra operands as-is.
  2558. for (size_t I = 0; I < NumExtraOps; I++)
  2559. Ops.push_back(N->getOperand(OpIdx++));
  2560. // Convert and append the immediate operand
  2561. SDValue Imm = N->getOperand(OpIdx);
  2562. uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
  2563. Ops.push_back(getI32Imm(ImmVal, Loc));
  2564. // Accumulating variants are IT-predicable, add predicate operands.
  2565. if (HasAccum) {
  2566. SDValue Pred = getAL(CurDAG, Loc);
  2567. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  2568. Ops.push_back(Pred);
  2569. Ops.push_back(PredReg);
  2570. }
  2571. // Create the CDE intruction
  2572. SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
  2573. SDValue ResultPair = SDValue(InstrNode, 0);
  2574. // The original intrinsic had two outputs, and the output of the dual-register
  2575. // CDE instruction is a register pair. We need to extract the two subregisters
  2576. // and replace all uses of the original outputs with the extracted
  2577. // subregisters.
  2578. uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
  2579. if (IsBigEndian)
  2580. std::swap(SubRegs[0], SubRegs[1]);
  2581. for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
  2582. if (SDValue(N, ResIdx).use_empty())
  2583. continue;
  2584. SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
  2585. MVT::i32, ResultPair);
  2586. ReplaceUses(SDValue(N, ResIdx), SubReg);
  2587. }
  2588. CurDAG->RemoveDeadNode(N);
  2589. }
  2590. void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
  2591. bool isUpdating, unsigned NumVecs,
  2592. const uint16_t *DOpcodes,
  2593. const uint16_t *QOpcodes0,
  2594. const uint16_t *QOpcodes1) {
  2595. assert(Subtarget->hasNEON());
  2596. assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
  2597. SDLoc dl(N);
  2598. SDValue MemAddr, Align;
  2599. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  2600. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  2601. return;
  2602. SDValue Chain = N->getOperand(0);
  2603. EVT VT = N->getValueType(0);
  2604. bool is64BitVector = VT.is64BitVector();
  2605. unsigned Alignment = 0;
  2606. if (NumVecs != 3) {
  2607. Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
  2608. unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
  2609. if (Alignment > NumBytes)
  2610. Alignment = NumBytes;
  2611. if (Alignment < 8 && Alignment < NumBytes)
  2612. Alignment = 0;
  2613. // Alignment must be a power of two; make sure of that.
  2614. Alignment = (Alignment & -Alignment);
  2615. if (Alignment == 1)
  2616. Alignment = 0;
  2617. }
  2618. Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
  2619. unsigned OpcodeIndex;
  2620. switch (VT.getSimpleVT().SimpleTy) {
  2621. default: llvm_unreachable("unhandled vld-dup type");
  2622. case MVT::v8i8:
  2623. case MVT::v16i8: OpcodeIndex = 0; break;
  2624. case MVT::v4i16:
  2625. case MVT::v8i16:
  2626. case MVT::v4f16:
  2627. case MVT::v8f16:
  2628. case MVT::v4bf16:
  2629. case MVT::v8bf16:
  2630. OpcodeIndex = 1; break;
  2631. case MVT::v2f32:
  2632. case MVT::v2i32:
  2633. case MVT::v4f32:
  2634. case MVT::v4i32: OpcodeIndex = 2; break;
  2635. case MVT::v1f64:
  2636. case MVT::v1i64: OpcodeIndex = 3; break;
  2637. }
  2638. unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
  2639. if (!is64BitVector)
  2640. ResTyElts *= 2;
  2641. EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
  2642. std::vector<EVT> ResTys;
  2643. ResTys.push_back(ResTy);
  2644. if (isUpdating)
  2645. ResTys.push_back(MVT::i32);
  2646. ResTys.push_back(MVT::Other);
  2647. SDValue Pred = getAL(CurDAG, dl);
  2648. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2649. SmallVector<SDValue, 6> Ops;
  2650. Ops.push_back(MemAddr);
  2651. Ops.push_back(Align);
  2652. unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
  2653. : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
  2654. : QOpcodes1[OpcodeIndex];
  2655. if (isUpdating) {
  2656. SDValue Inc = N->getOperand(2);
  2657. bool IsImmUpdate =
  2658. isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
  2659. if (IsImmUpdate) {
  2660. if (!isVLDfixed(Opc))
  2661. Ops.push_back(Reg0);
  2662. } else {
  2663. if (isVLDfixed(Opc))
  2664. Opc = getVLDSTRegisterUpdateOpcode(Opc);
  2665. Ops.push_back(Inc);
  2666. }
  2667. }
  2668. if (is64BitVector || NumVecs == 1) {
  2669. // Double registers and VLD1 quad registers are directly supported.
  2670. } else if (NumVecs == 2) {
  2671. const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
  2672. SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
  2673. MVT::Other, OpsA);
  2674. Chain = SDValue(VLdA, 1);
  2675. } else {
  2676. SDValue ImplDef = SDValue(
  2677. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
  2678. const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
  2679. SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
  2680. MVT::Other, OpsA);
  2681. Ops.push_back(SDValue(VLdA, 0));
  2682. Chain = SDValue(VLdA, 1);
  2683. }
  2684. Ops.push_back(Pred);
  2685. Ops.push_back(Reg0);
  2686. Ops.push_back(Chain);
  2687. SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  2688. // Transfer memoperands.
  2689. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  2690. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
  2691. // Extract the subregisters.
  2692. if (NumVecs == 1) {
  2693. ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
  2694. } else {
  2695. SDValue SuperReg = SDValue(VLdDup, 0);
  2696. static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
  2697. unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
  2698. for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
  2699. ReplaceUses(SDValue(N, Vec),
  2700. CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
  2701. }
  2702. }
  2703. ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
  2704. if (isUpdating)
  2705. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
  2706. CurDAG->RemoveDeadNode(N);
  2707. }
  2708. bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
  2709. if (!Subtarget->hasMVEIntegerOps())
  2710. return false;
  2711. SDLoc dl(N);
  2712. // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
  2713. // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
  2714. // inserts of the correct type:
  2715. SDValue Ins1 = SDValue(N, 0);
  2716. SDValue Ins2 = N->getOperand(0);
  2717. EVT VT = Ins1.getValueType();
  2718. if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
  2719. !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
  2720. !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
  2721. (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
  2722. return false;
  2723. unsigned Lane1 = Ins1.getConstantOperandVal(2);
  2724. unsigned Lane2 = Ins2.getConstantOperandVal(2);
  2725. if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
  2726. return false;
  2727. // If the inserted values will be able to use T/B already, leave it to the
  2728. // existing tablegen patterns. For example VCVTT/VCVTB.
  2729. SDValue Val1 = Ins1.getOperand(1);
  2730. SDValue Val2 = Ins2.getOperand(1);
  2731. if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
  2732. return false;
  2733. // Check if the inserted values are both extracts.
  2734. if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
  2735. Val1.getOpcode() == ARMISD::VGETLANEu) &&
  2736. (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
  2737. Val2.getOpcode() == ARMISD::VGETLANEu) &&
  2738. isa<ConstantSDNode>(Val1.getOperand(1)) &&
  2739. isa<ConstantSDNode>(Val2.getOperand(1)) &&
  2740. (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
  2741. Val1.getOperand(0).getValueType() == MVT::v8i16) &&
  2742. (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
  2743. Val2.getOperand(0).getValueType() == MVT::v8i16)) {
  2744. unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
  2745. unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
  2746. // If the two extracted lanes are from the same place and adjacent, this
  2747. // simplifies into a f32 lane move.
  2748. if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
  2749. ExtractLane1 == ExtractLane2 + 1) {
  2750. SDValue NewExt = CurDAG->getTargetExtractSubreg(
  2751. ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
  2752. SDValue NewIns = CurDAG->getTargetInsertSubreg(
  2753. ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
  2754. NewExt);
  2755. ReplaceUses(Ins1, NewIns);
  2756. return true;
  2757. }
  2758. // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
  2759. // extracting odd lanes.
  2760. if (VT == MVT::v8i16) {
  2761. SDValue Inp1 = CurDAG->getTargetExtractSubreg(
  2762. ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
  2763. SDValue Inp2 = CurDAG->getTargetExtractSubreg(
  2764. ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
  2765. if (ExtractLane1 % 2 != 0)
  2766. Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
  2767. if (ExtractLane2 % 2 != 0)
  2768. Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
  2769. SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
  2770. SDValue NewIns =
  2771. CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
  2772. Ins2.getOperand(0), SDValue(VINS, 0));
  2773. ReplaceUses(Ins1, NewIns);
  2774. return true;
  2775. }
  2776. }
  2777. // The inserted values are not extracted - if they are f16 then insert them
  2778. // directly using a VINS.
  2779. if (VT == MVT::v8f16) {
  2780. SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
  2781. SDValue NewIns =
  2782. CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
  2783. Ins2.getOperand(0), SDValue(VINS, 0));
  2784. ReplaceUses(Ins1, NewIns);
  2785. return true;
  2786. }
  2787. return false;
  2788. }
  2789. bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
  2790. SDNode *FMul,
  2791. bool IsUnsigned,
  2792. bool FixedToFloat) {
  2793. auto Type = N->getValueType(0);
  2794. unsigned ScalarBits = Type.getScalarSizeInBits();
  2795. if (ScalarBits > 32)
  2796. return false;
  2797. SDNodeFlags FMulFlags = FMul->getFlags();
  2798. // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
  2799. // allowed in 16 bit unsigned floats
  2800. if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
  2801. return false;
  2802. SDValue ImmNode = FMul->getOperand(1);
  2803. SDValue VecVal = FMul->getOperand(0);
  2804. if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
  2805. VecVal->getOpcode() == ISD::SINT_TO_FP)
  2806. VecVal = VecVal->getOperand(0);
  2807. if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
  2808. return false;
  2809. if (ImmNode.getOpcode() == ISD::BITCAST) {
  2810. if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
  2811. return false;
  2812. ImmNode = ImmNode.getOperand(0);
  2813. }
  2814. if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
  2815. return false;
  2816. APFloat ImmAPF(0.0f);
  2817. switch (ImmNode.getOpcode()) {
  2818. case ARMISD::VMOVIMM:
  2819. case ARMISD::VDUP: {
  2820. if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
  2821. return false;
  2822. unsigned Imm = ImmNode.getConstantOperandVal(0);
  2823. if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
  2824. Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
  2825. ImmAPF =
  2826. APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
  2827. APInt(ScalarBits, Imm));
  2828. break;
  2829. }
  2830. case ARMISD::VMOVFPIMM: {
  2831. ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
  2832. break;
  2833. }
  2834. default:
  2835. return false;
  2836. }
  2837. // Where n is the number of fractional bits, multiplying by 2^n will convert
  2838. // from float to fixed and multiplying by 2^-n will convert from fixed to
  2839. // float. Taking log2 of the factor (after taking the inverse in the case of
  2840. // float to fixed) will give n.
  2841. APFloat ToConvert = ImmAPF;
  2842. if (FixedToFloat) {
  2843. if (!ImmAPF.getExactInverse(&ToConvert))
  2844. return false;
  2845. }
  2846. APSInt Converted(64, false);
  2847. bool IsExact;
  2848. ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,
  2849. &IsExact);
  2850. if (!IsExact || !Converted.isPowerOf2())
  2851. return false;
  2852. unsigned FracBits = Converted.logBase2();
  2853. if (FracBits > ScalarBits)
  2854. return false;
  2855. SmallVector<SDValue, 3> Ops{
  2856. VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
  2857. AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
  2858. unsigned int Opcode;
  2859. switch (ScalarBits) {
  2860. case 16:
  2861. if (FixedToFloat)
  2862. Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
  2863. else
  2864. Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
  2865. break;
  2866. case 32:
  2867. if (FixedToFloat)
  2868. Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
  2869. else
  2870. Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
  2871. break;
  2872. default:
  2873. llvm_unreachable("unexpected number of scalar bits");
  2874. break;
  2875. }
  2876. ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
  2877. return true;
  2878. }
  2879. bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
  2880. // Transform a floating-point to fixed-point conversion to a VCVT
  2881. if (!Subtarget->hasMVEFloatOps())
  2882. return false;
  2883. EVT Type = N->getValueType(0);
  2884. if (!Type.isVector())
  2885. return false;
  2886. unsigned int ScalarBits = Type.getScalarSizeInBits();
  2887. bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
  2888. N->getOpcode() == ISD::FP_TO_UINT_SAT;
  2889. SDNode *Node = N->getOperand(0).getNode();
  2890. // floating-point to fixed-point with one fractional bit gets turned into an
  2891. // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
  2892. if (Node->getOpcode() == ISD::FADD) {
  2893. if (Node->getOperand(0) != Node->getOperand(1))
  2894. return false;
  2895. SDNodeFlags Flags = Node->getFlags();
  2896. // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
  2897. // allowed in 16 bit unsigned floats
  2898. if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
  2899. return false;
  2900. unsigned Opcode;
  2901. switch (ScalarBits) {
  2902. case 16:
  2903. Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
  2904. break;
  2905. case 32:
  2906. Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
  2907. break;
  2908. }
  2909. SmallVector<SDValue, 3> Ops{Node->getOperand(0),
  2910. CurDAG->getConstant(1, dl, MVT::i32)};
  2911. AddEmptyMVEPredicateToOps(Ops, dl, Type);
  2912. ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
  2913. return true;
  2914. }
  2915. if (Node->getOpcode() != ISD::FMUL)
  2916. return false;
  2917. return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
  2918. }
  2919. bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
  2920. // Transform a fixed-point to floating-point conversion to a VCVT
  2921. if (!Subtarget->hasMVEFloatOps())
  2922. return false;
  2923. auto Type = N->getValueType(0);
  2924. if (!Type.isVector())
  2925. return false;
  2926. auto LHS = N->getOperand(0);
  2927. if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
  2928. return false;
  2929. return transformFixedFloatingPointConversion(
  2930. N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
  2931. }
  2932. bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
  2933. if (!Subtarget->hasV6T2Ops())
  2934. return false;
  2935. unsigned Opc = isSigned
  2936. ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
  2937. : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
  2938. SDLoc dl(N);
  2939. // For unsigned extracts, check for a shift right and mask
  2940. unsigned And_imm = 0;
  2941. if (N->getOpcode() == ISD::AND) {
  2942. if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
  2943. // The immediate is a mask of the low bits iff imm & (imm+1) == 0
  2944. if (And_imm & (And_imm + 1))
  2945. return false;
  2946. unsigned Srl_imm = 0;
  2947. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
  2948. Srl_imm)) {
  2949. assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
  2950. // Mask off the unnecessary bits of the AND immediate; normally
  2951. // DAGCombine will do this, but that might not happen if
  2952. // targetShrinkDemandedConstant chooses a different immediate.
  2953. And_imm &= -1U >> Srl_imm;
  2954. // Note: The width operand is encoded as width-1.
  2955. unsigned Width = countTrailingOnes(And_imm) - 1;
  2956. unsigned LSB = Srl_imm;
  2957. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2958. if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
  2959. // It's cheaper to use a right shift to extract the top bits.
  2960. if (Subtarget->isThumb()) {
  2961. Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
  2962. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  2963. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  2964. getAL(CurDAG, dl), Reg0, Reg0 };
  2965. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  2966. return true;
  2967. }
  2968. // ARM models shift instructions as MOVsi with shifter operand.
  2969. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
  2970. SDValue ShOpc =
  2971. CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
  2972. MVT::i32);
  2973. SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
  2974. getAL(CurDAG, dl), Reg0, Reg0 };
  2975. CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
  2976. return true;
  2977. }
  2978. assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
  2979. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  2980. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  2981. CurDAG->getTargetConstant(Width, dl, MVT::i32),
  2982. getAL(CurDAG, dl), Reg0 };
  2983. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  2984. return true;
  2985. }
  2986. }
  2987. return false;
  2988. }
  2989. // Otherwise, we're looking for a shift of a shift
  2990. unsigned Shl_imm = 0;
  2991. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
  2992. assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
  2993. unsigned Srl_imm = 0;
  2994. if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
  2995. assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
  2996. // Note: The width operand is encoded as width-1.
  2997. unsigned Width = 32 - Srl_imm - 1;
  2998. int LSB = Srl_imm - Shl_imm;
  2999. if (LSB < 0)
  3000. return false;
  3001. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3002. assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
  3003. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  3004. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  3005. CurDAG->getTargetConstant(Width, dl, MVT::i32),
  3006. getAL(CurDAG, dl), Reg0 };
  3007. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3008. return true;
  3009. }
  3010. }
  3011. // Or we are looking for a shift of an and, with a mask operand
  3012. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
  3013. isShiftedMask_32(And_imm)) {
  3014. unsigned Srl_imm = 0;
  3015. unsigned LSB = countTrailingZeros(And_imm);
  3016. // Shift must be the same as the ands lsb
  3017. if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
  3018. assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
  3019. unsigned MSB = 31 - countLeadingZeros(And_imm);
  3020. // Note: The width operand is encoded as width-1.
  3021. unsigned Width = MSB - LSB;
  3022. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3023. assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
  3024. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  3025. CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
  3026. CurDAG->getTargetConstant(Width, dl, MVT::i32),
  3027. getAL(CurDAG, dl), Reg0 };
  3028. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3029. return true;
  3030. }
  3031. }
  3032. if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
  3033. unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
  3034. unsigned LSB = 0;
  3035. if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
  3036. !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
  3037. return false;
  3038. if (LSB + Width > 32)
  3039. return false;
  3040. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3041. assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
  3042. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  3043. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  3044. CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
  3045. getAL(CurDAG, dl), Reg0 };
  3046. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3047. return true;
  3048. }
  3049. return false;
  3050. }
  3051. /// Target-specific DAG combining for ISD::XOR.
  3052. /// Target-independent combining lowers SELECT_CC nodes of the form
  3053. /// select_cc setg[ge] X, 0, X, -X
  3054. /// select_cc setgt X, -1, X, -X
  3055. /// select_cc setl[te] X, 0, -X, X
  3056. /// select_cc setlt X, 1, -X, X
  3057. /// which represent Integer ABS into:
  3058. /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
  3059. /// ARM instruction selection detects the latter and matches it to
  3060. /// ARM::ABS or ARM::t2ABS machine node.
  3061. bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
  3062. SDValue XORSrc0 = N->getOperand(0);
  3063. SDValue XORSrc1 = N->getOperand(1);
  3064. EVT VT = N->getValueType(0);
  3065. if (Subtarget->isThumb1Only())
  3066. return false;
  3067. if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
  3068. return false;
  3069. SDValue ADDSrc0 = XORSrc0.getOperand(0);
  3070. SDValue ADDSrc1 = XORSrc0.getOperand(1);
  3071. SDValue SRASrc0 = XORSrc1.getOperand(0);
  3072. SDValue SRASrc1 = XORSrc1.getOperand(1);
  3073. ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
  3074. EVT XType = SRASrc0.getValueType();
  3075. unsigned Size = XType.getSizeInBits() - 1;
  3076. if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
  3077. XType.isInteger() && SRAConstant != nullptr &&
  3078. Size == SRAConstant->getZExtValue()) {
  3079. unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
  3080. CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
  3081. return true;
  3082. }
  3083. return false;
  3084. }
  3085. /// We've got special pseudo-instructions for these
  3086. void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
  3087. unsigned Opcode;
  3088. EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
  3089. if (MemTy == MVT::i8)
  3090. Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
  3091. else if (MemTy == MVT::i16)
  3092. Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
  3093. else if (MemTy == MVT::i32)
  3094. Opcode = ARM::CMP_SWAP_32;
  3095. else
  3096. llvm_unreachable("Unknown AtomicCmpSwap type");
  3097. SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
  3098. N->getOperand(0)};
  3099. SDNode *CmpSwap = CurDAG->getMachineNode(
  3100. Opcode, SDLoc(N),
  3101. CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
  3102. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  3103. CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
  3104. ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
  3105. ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
  3106. CurDAG->RemoveDeadNode(N);
  3107. }
  3108. static Optional<std::pair<unsigned, unsigned>>
  3109. getContiguousRangeOfSetBits(const APInt &A) {
  3110. unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
  3111. unsigned LastOne = A.countTrailingZeros();
  3112. if (A.countPopulation() != (FirstOne - LastOne + 1))
  3113. return Optional<std::pair<unsigned,unsigned>>();
  3114. return std::make_pair(FirstOne, LastOne);
  3115. }
  3116. void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
  3117. assert(N->getOpcode() == ARMISD::CMPZ);
  3118. SwitchEQNEToPLMI = false;
  3119. if (!Subtarget->isThumb())
  3120. // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
  3121. // LSR don't exist as standalone instructions - they need the barrel shifter.
  3122. return;
  3123. // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
  3124. SDValue And = N->getOperand(0);
  3125. if (!And->hasOneUse())
  3126. return;
  3127. SDValue Zero = N->getOperand(1);
  3128. if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() ||
  3129. And->getOpcode() != ISD::AND)
  3130. return;
  3131. SDValue X = And.getOperand(0);
  3132. auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
  3133. if (!C)
  3134. return;
  3135. auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
  3136. if (!Range)
  3137. return;
  3138. // There are several ways to lower this:
  3139. SDNode *NewN;
  3140. SDLoc dl(N);
  3141. auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
  3142. if (Subtarget->isThumb2()) {
  3143. Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
  3144. SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
  3145. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
  3146. CurDAG->getRegister(0, MVT::i32) };
  3147. return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
  3148. } else {
  3149. SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
  3150. CurDAG->getTargetConstant(Imm, dl, MVT::i32),
  3151. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
  3152. return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
  3153. }
  3154. };
  3155. if (Range->second == 0) {
  3156. // 1. Mask includes the LSB -> Simply shift the top N bits off
  3157. NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
  3158. ReplaceNode(And.getNode(), NewN);
  3159. } else if (Range->first == 31) {
  3160. // 2. Mask includes the MSB -> Simply shift the bottom N bits off
  3161. NewN = EmitShift(ARM::tLSRri, X, Range->second);
  3162. ReplaceNode(And.getNode(), NewN);
  3163. } else if (Range->first == Range->second) {
  3164. // 3. Only one bit is set. We can shift this into the sign bit and use a
  3165. // PL/MI comparison.
  3166. NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
  3167. ReplaceNode(And.getNode(), NewN);
  3168. SwitchEQNEToPLMI = true;
  3169. } else if (!Subtarget->hasV6T2Ops()) {
  3170. // 4. Do a double shift to clear bottom and top bits, but only in
  3171. // thumb-1 mode as in thumb-2 we can use UBFX.
  3172. NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
  3173. NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
  3174. Range->second + (31 - Range->first));
  3175. ReplaceNode(And.getNode(), NewN);
  3176. }
  3177. }
  3178. void ARMDAGToDAGISel::Select(SDNode *N) {
  3179. SDLoc dl(N);
  3180. if (N->isMachineOpcode()) {
  3181. N->setNodeId(-1);
  3182. return; // Already selected.
  3183. }
  3184. switch (N->getOpcode()) {
  3185. default: break;
  3186. case ISD::STORE: {
  3187. // For Thumb1, match an sp-relative store in C++. This is a little
  3188. // unfortunate, but I don't think I can make the chain check work
  3189. // otherwise. (The chain of the store has to be the same as the chain
  3190. // of the CopyFromReg, or else we can't replace the CopyFromReg with
  3191. // a direct reference to "SP".)
  3192. //
  3193. // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
  3194. // a different addressing mode from other four-byte stores.
  3195. //
  3196. // This pattern usually comes up with call arguments.
  3197. StoreSDNode *ST = cast<StoreSDNode>(N);
  3198. SDValue Ptr = ST->getBasePtr();
  3199. if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
  3200. int RHSC = 0;
  3201. if (Ptr.getOpcode() == ISD::ADD &&
  3202. isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
  3203. Ptr = Ptr.getOperand(0);
  3204. if (Ptr.getOpcode() == ISD::CopyFromReg &&
  3205. cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
  3206. Ptr.getOperand(0) == ST->getChain()) {
  3207. SDValue Ops[] = {ST->getValue(),
  3208. CurDAG->getRegister(ARM::SP, MVT::i32),
  3209. CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
  3210. getAL(CurDAG, dl),
  3211. CurDAG->getRegister(0, MVT::i32),
  3212. ST->getChain()};
  3213. MachineSDNode *ResNode =
  3214. CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
  3215. MachineMemOperand *MemOp = ST->getMemOperand();
  3216. CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
  3217. ReplaceNode(N, ResNode);
  3218. return;
  3219. }
  3220. }
  3221. break;
  3222. }
  3223. case ISD::WRITE_REGISTER:
  3224. if (tryWriteRegister(N))
  3225. return;
  3226. break;
  3227. case ISD::READ_REGISTER:
  3228. if (tryReadRegister(N))
  3229. return;
  3230. break;
  3231. case ISD::INLINEASM:
  3232. case ISD::INLINEASM_BR:
  3233. if (tryInlineAsm(N))
  3234. return;
  3235. break;
  3236. case ISD::XOR:
  3237. // Select special operations if XOR node forms integer ABS pattern
  3238. if (tryABSOp(N))
  3239. return;
  3240. // Other cases are autogenerated.
  3241. break;
  3242. case ISD::Constant: {
  3243. unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
  3244. // If we can't materialize the constant we need to use a literal pool
  3245. if (ConstantMaterializationCost(Val, Subtarget) > 2) {
  3246. SDValue CPIdx = CurDAG->getTargetConstantPool(
  3247. ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
  3248. TLI->getPointerTy(CurDAG->getDataLayout()));
  3249. SDNode *ResNode;
  3250. if (Subtarget->isThumb()) {
  3251. SDValue Ops[] = {
  3252. CPIdx,
  3253. getAL(CurDAG, dl),
  3254. CurDAG->getRegister(0, MVT::i32),
  3255. CurDAG->getEntryNode()
  3256. };
  3257. ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
  3258. Ops);
  3259. } else {
  3260. SDValue Ops[] = {
  3261. CPIdx,
  3262. CurDAG->getTargetConstant(0, dl, MVT::i32),
  3263. getAL(CurDAG, dl),
  3264. CurDAG->getRegister(0, MVT::i32),
  3265. CurDAG->getEntryNode()
  3266. };
  3267. ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
  3268. Ops);
  3269. }
  3270. // Annotate the Node with memory operand information so that MachineInstr
  3271. // queries work properly. This e.g. gives the register allocation the
  3272. // required information for rematerialization.
  3273. MachineFunction& MF = CurDAG->getMachineFunction();
  3274. MachineMemOperand *MemOp =
  3275. MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
  3276. MachineMemOperand::MOLoad, 4, Align(4));
  3277. CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
  3278. ReplaceNode(N, ResNode);
  3279. return;
  3280. }
  3281. // Other cases are autogenerated.
  3282. break;
  3283. }
  3284. case ISD::FrameIndex: {
  3285. // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
  3286. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  3287. SDValue TFI = CurDAG->getTargetFrameIndex(
  3288. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  3289. if (Subtarget->isThumb1Only()) {
  3290. // Set the alignment of the frame object to 4, to avoid having to generate
  3291. // more than one ADD
  3292. MachineFrameInfo &MFI = MF->getFrameInfo();
  3293. if (MFI.getObjectAlign(FI) < Align(4))
  3294. MFI.setObjectAlignment(FI, Align(4));
  3295. CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
  3296. CurDAG->getTargetConstant(0, dl, MVT::i32));
  3297. return;
  3298. } else {
  3299. unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
  3300. ARM::t2ADDri : ARM::ADDri);
  3301. SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
  3302. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
  3303. CurDAG->getRegister(0, MVT::i32) };
  3304. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3305. return;
  3306. }
  3307. }
  3308. case ISD::INSERT_VECTOR_ELT: {
  3309. if (tryInsertVectorElt(N))
  3310. return;
  3311. break;
  3312. }
  3313. case ISD::SRL:
  3314. if (tryV6T2BitfieldExtractOp(N, false))
  3315. return;
  3316. break;
  3317. case ISD::SIGN_EXTEND_INREG:
  3318. case ISD::SRA:
  3319. if (tryV6T2BitfieldExtractOp(N, true))
  3320. return;
  3321. break;
  3322. case ISD::FP_TO_UINT:
  3323. case ISD::FP_TO_SINT:
  3324. case ISD::FP_TO_UINT_SAT:
  3325. case ISD::FP_TO_SINT_SAT:
  3326. if (tryFP_TO_INT(N, dl))
  3327. return;
  3328. break;
  3329. case ISD::FMUL:
  3330. if (tryFMULFixed(N, dl))
  3331. return;
  3332. break;
  3333. case ISD::MUL:
  3334. if (Subtarget->isThumb1Only())
  3335. break;
  3336. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
  3337. unsigned RHSV = C->getZExtValue();
  3338. if (!RHSV) break;
  3339. if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
  3340. unsigned ShImm = Log2_32(RHSV-1);
  3341. if (ShImm >= 32)
  3342. break;
  3343. SDValue V = N->getOperand(0);
  3344. ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
  3345. SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
  3346. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3347. if (Subtarget->isThumb()) {
  3348. SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
  3349. CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
  3350. return;
  3351. } else {
  3352. SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
  3353. Reg0 };
  3354. CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
  3355. return;
  3356. }
  3357. }
  3358. if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
  3359. unsigned ShImm = Log2_32(RHSV+1);
  3360. if (ShImm >= 32)
  3361. break;
  3362. SDValue V = N->getOperand(0);
  3363. ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
  3364. SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
  3365. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3366. if (Subtarget->isThumb()) {
  3367. SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
  3368. CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
  3369. return;
  3370. } else {
  3371. SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
  3372. Reg0 };
  3373. CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
  3374. return;
  3375. }
  3376. }
  3377. }
  3378. break;
  3379. case ISD::AND: {
  3380. // Check for unsigned bitfield extract
  3381. if (tryV6T2BitfieldExtractOp(N, false))
  3382. return;
  3383. // If an immediate is used in an AND node, it is possible that the immediate
  3384. // can be more optimally materialized when negated. If this is the case we
  3385. // can negate the immediate and use a BIC instead.
  3386. auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
  3387. if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
  3388. uint32_t Imm = (uint32_t) N1C->getZExtValue();
  3389. // In Thumb2 mode, an AND can take a 12-bit immediate. If this
  3390. // immediate can be negated and fit in the immediate operand of
  3391. // a t2BIC, don't do any manual transform here as this can be
  3392. // handled by the generic ISel machinery.
  3393. bool PreferImmediateEncoding =
  3394. Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
  3395. if (!PreferImmediateEncoding &&
  3396. ConstantMaterializationCost(Imm, Subtarget) >
  3397. ConstantMaterializationCost(~Imm, Subtarget)) {
  3398. // The current immediate costs more to materialize than a negated
  3399. // immediate, so negate the immediate and use a BIC.
  3400. SDValue NewImm =
  3401. CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
  3402. // If the new constant didn't exist before, reposition it in the topological
  3403. // ordering so it is just before N. Otherwise, don't touch its location.
  3404. if (NewImm->getNodeId() == -1)
  3405. CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
  3406. if (!Subtarget->hasThumb2()) {
  3407. SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
  3408. N->getOperand(0), NewImm, getAL(CurDAG, dl),
  3409. CurDAG->getRegister(0, MVT::i32)};
  3410. ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
  3411. return;
  3412. } else {
  3413. SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
  3414. CurDAG->getRegister(0, MVT::i32),
  3415. CurDAG->getRegister(0, MVT::i32)};
  3416. ReplaceNode(N,
  3417. CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
  3418. return;
  3419. }
  3420. }
  3421. }
  3422. // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
  3423. // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
  3424. // are entirely contributed by c2 and lower 16-bits are entirely contributed
  3425. // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
  3426. // Select it to: "movt x, ((c1 & 0xffff) >> 16)
  3427. EVT VT = N->getValueType(0);
  3428. if (VT != MVT::i32)
  3429. break;
  3430. unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
  3431. ? ARM::t2MOVTi16
  3432. : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
  3433. if (!Opc)
  3434. break;
  3435. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
  3436. N1C = dyn_cast<ConstantSDNode>(N1);
  3437. if (!N1C)
  3438. break;
  3439. if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
  3440. SDValue N2 = N0.getOperand(1);
  3441. ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
  3442. if (!N2C)
  3443. break;
  3444. unsigned N1CVal = N1C->getZExtValue();
  3445. unsigned N2CVal = N2C->getZExtValue();
  3446. if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
  3447. (N1CVal & 0xffffU) == 0xffffU &&
  3448. (N2CVal & 0xffffU) == 0x0U) {
  3449. SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
  3450. dl, MVT::i32);
  3451. SDValue Ops[] = { N0.getOperand(0), Imm16,
  3452. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
  3453. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
  3454. return;
  3455. }
  3456. }
  3457. break;
  3458. }
  3459. case ARMISD::UMAAL: {
  3460. unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
  3461. SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
  3462. N->getOperand(2), N->getOperand(3),
  3463. getAL(CurDAG, dl),
  3464. CurDAG->getRegister(0, MVT::i32) };
  3465. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
  3466. return;
  3467. }
  3468. case ARMISD::UMLAL:{
  3469. if (Subtarget->isThumb()) {
  3470. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3471. N->getOperand(3), getAL(CurDAG, dl),
  3472. CurDAG->getRegister(0, MVT::i32)};
  3473. ReplaceNode(
  3474. N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
  3475. return;
  3476. }else{
  3477. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3478. N->getOperand(3), getAL(CurDAG, dl),
  3479. CurDAG->getRegister(0, MVT::i32),
  3480. CurDAG->getRegister(0, MVT::i32) };
  3481. ReplaceNode(N, CurDAG->getMachineNode(
  3482. Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
  3483. MVT::i32, MVT::i32, Ops));
  3484. return;
  3485. }
  3486. }
  3487. case ARMISD::SMLAL:{
  3488. if (Subtarget->isThumb()) {
  3489. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3490. N->getOperand(3), getAL(CurDAG, dl),
  3491. CurDAG->getRegister(0, MVT::i32)};
  3492. ReplaceNode(
  3493. N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
  3494. return;
  3495. }else{
  3496. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3497. N->getOperand(3), getAL(CurDAG, dl),
  3498. CurDAG->getRegister(0, MVT::i32),
  3499. CurDAG->getRegister(0, MVT::i32) };
  3500. ReplaceNode(N, CurDAG->getMachineNode(
  3501. Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
  3502. MVT::i32, MVT::i32, Ops));
  3503. return;
  3504. }
  3505. }
  3506. case ARMISD::SUBE: {
  3507. if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
  3508. break;
  3509. // Look for a pattern to match SMMLS
  3510. // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
  3511. if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
  3512. N->getOperand(2).getOpcode() != ARMISD::SUBC ||
  3513. !SDValue(N, 1).use_empty())
  3514. break;
  3515. if (Subtarget->isThumb())
  3516. assert(Subtarget->hasThumb2() &&
  3517. "This pattern should not be generated for Thumb");
  3518. SDValue SmulLoHi = N->getOperand(1);
  3519. SDValue Subc = N->getOperand(2);
  3520. auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
  3521. if (!Zero || Zero->getZExtValue() != 0 ||
  3522. Subc.getOperand(1) != SmulLoHi.getValue(0) ||
  3523. N->getOperand(1) != SmulLoHi.getValue(1) ||
  3524. N->getOperand(2) != Subc.getValue(1))
  3525. break;
  3526. unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
  3527. SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
  3528. N->getOperand(0), getAL(CurDAG, dl),
  3529. CurDAG->getRegister(0, MVT::i32) };
  3530. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
  3531. return;
  3532. }
  3533. case ISD::LOAD: {
  3534. if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
  3535. return;
  3536. if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
  3537. if (tryT2IndexedLoad(N))
  3538. return;
  3539. } else if (Subtarget->isThumb()) {
  3540. if (tryT1IndexedLoad(N))
  3541. return;
  3542. } else if (tryARMIndexedLoad(N))
  3543. return;
  3544. // Other cases are autogenerated.
  3545. break;
  3546. }
  3547. case ISD::MLOAD:
  3548. if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
  3549. return;
  3550. // Other cases are autogenerated.
  3551. break;
  3552. case ARMISD::WLSSETUP: {
  3553. SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
  3554. N->getOperand(0));
  3555. ReplaceUses(N, New);
  3556. CurDAG->RemoveDeadNode(N);
  3557. return;
  3558. }
  3559. case ARMISD::WLS: {
  3560. SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
  3561. N->getOperand(1), N->getOperand(2),
  3562. N->getOperand(0));
  3563. ReplaceUses(N, New);
  3564. CurDAG->RemoveDeadNode(N);
  3565. return;
  3566. }
  3567. case ARMISD::LE: {
  3568. SDValue Ops[] = { N->getOperand(1),
  3569. N->getOperand(2),
  3570. N->getOperand(0) };
  3571. unsigned Opc = ARM::t2LoopEnd;
  3572. SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
  3573. ReplaceUses(N, New);
  3574. CurDAG->RemoveDeadNode(N);
  3575. return;
  3576. }
  3577. case ARMISD::LDRD: {
  3578. if (Subtarget->isThumb2())
  3579. break; // TableGen handles isel in this case.
  3580. SDValue Base, RegOffset, ImmOffset;
  3581. const SDValue &Chain = N->getOperand(0);
  3582. const SDValue &Addr = N->getOperand(1);
  3583. SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
  3584. if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
  3585. // The register-offset variant of LDRD mandates that the register
  3586. // allocated to RegOffset is not reused in any of the remaining operands.
  3587. // This restriction is currently not enforced. Therefore emitting this
  3588. // variant is explicitly avoided.
  3589. Base = Addr;
  3590. RegOffset = CurDAG->getRegister(0, MVT::i32);
  3591. }
  3592. SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
  3593. SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
  3594. {MVT::Untyped, MVT::Other}, Ops);
  3595. SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
  3596. SDValue(New, 0));
  3597. SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
  3598. SDValue(New, 0));
  3599. transferMemOperands(N, New);
  3600. ReplaceUses(SDValue(N, 0), Lo);
  3601. ReplaceUses(SDValue(N, 1), Hi);
  3602. ReplaceUses(SDValue(N, 2), SDValue(New, 1));
  3603. CurDAG->RemoveDeadNode(N);
  3604. return;
  3605. }
  3606. case ARMISD::STRD: {
  3607. if (Subtarget->isThumb2())
  3608. break; // TableGen handles isel in this case.
  3609. SDValue Base, RegOffset, ImmOffset;
  3610. const SDValue &Chain = N->getOperand(0);
  3611. const SDValue &Addr = N->getOperand(3);
  3612. SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
  3613. if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
  3614. // The register-offset variant of STRD mandates that the register
  3615. // allocated to RegOffset is not reused in any of the remaining operands.
  3616. // This restriction is currently not enforced. Therefore emitting this
  3617. // variant is explicitly avoided.
  3618. Base = Addr;
  3619. RegOffset = CurDAG->getRegister(0, MVT::i32);
  3620. }
  3621. SDNode *RegPair =
  3622. createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
  3623. SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
  3624. SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
  3625. transferMemOperands(N, New);
  3626. ReplaceUses(SDValue(N, 0), SDValue(New, 0));
  3627. CurDAG->RemoveDeadNode(N);
  3628. return;
  3629. }
  3630. case ARMISD::LOOP_DEC: {
  3631. SDValue Ops[] = { N->getOperand(1),
  3632. N->getOperand(2),
  3633. N->getOperand(0) };
  3634. SDNode *Dec =
  3635. CurDAG->getMachineNode(ARM::t2LoopDec, dl,
  3636. CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
  3637. ReplaceUses(N, Dec);
  3638. CurDAG->RemoveDeadNode(N);
  3639. return;
  3640. }
  3641. case ARMISD::BRCOND: {
  3642. // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
  3643. // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
  3644. // Pattern complexity = 6 cost = 1 size = 0
  3645. // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
  3646. // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
  3647. // Pattern complexity = 6 cost = 1 size = 0
  3648. // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
  3649. // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
  3650. // Pattern complexity = 6 cost = 1 size = 0
  3651. unsigned Opc = Subtarget->isThumb() ?
  3652. ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
  3653. SDValue Chain = N->getOperand(0);
  3654. SDValue N1 = N->getOperand(1);
  3655. SDValue N2 = N->getOperand(2);
  3656. SDValue N3 = N->getOperand(3);
  3657. SDValue InFlag = N->getOperand(4);
  3658. assert(N1.getOpcode() == ISD::BasicBlock);
  3659. assert(N2.getOpcode() == ISD::Constant);
  3660. assert(N3.getOpcode() == ISD::Register);
  3661. unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
  3662. if (InFlag.getOpcode() == ARMISD::CMPZ) {
  3663. if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
  3664. SDValue Int = InFlag.getOperand(0);
  3665. uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
  3666. // Handle low-overhead loops.
  3667. if (ID == Intrinsic::loop_decrement_reg) {
  3668. SDValue Elements = Int.getOperand(2);
  3669. SDValue Size = CurDAG->getTargetConstant(
  3670. cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
  3671. MVT::i32);
  3672. SDValue Args[] = { Elements, Size, Int.getOperand(0) };
  3673. SDNode *LoopDec =
  3674. CurDAG->getMachineNode(ARM::t2LoopDec, dl,
  3675. CurDAG->getVTList(MVT::i32, MVT::Other),
  3676. Args);
  3677. ReplaceUses(Int.getNode(), LoopDec);
  3678. SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
  3679. SDNode *LoopEnd =
  3680. CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
  3681. ReplaceUses(N, LoopEnd);
  3682. CurDAG->RemoveDeadNode(N);
  3683. CurDAG->RemoveDeadNode(InFlag.getNode());
  3684. CurDAG->RemoveDeadNode(Int.getNode());
  3685. return;
  3686. }
  3687. }
  3688. bool SwitchEQNEToPLMI;
  3689. SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
  3690. InFlag = N->getOperand(4);
  3691. if (SwitchEQNEToPLMI) {
  3692. switch ((ARMCC::CondCodes)CC) {
  3693. default: llvm_unreachable("CMPZ must be either NE or EQ!");
  3694. case ARMCC::NE:
  3695. CC = (unsigned)ARMCC::MI;
  3696. break;
  3697. case ARMCC::EQ:
  3698. CC = (unsigned)ARMCC::PL;
  3699. break;
  3700. }
  3701. }
  3702. }
  3703. SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
  3704. SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
  3705. SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
  3706. MVT::Glue, Ops);
  3707. Chain = SDValue(ResNode, 0);
  3708. if (N->getNumValues() == 2) {
  3709. InFlag = SDValue(ResNode, 1);
  3710. ReplaceUses(SDValue(N, 1), InFlag);
  3711. }
  3712. ReplaceUses(SDValue(N, 0),
  3713. SDValue(Chain.getNode(), Chain.getResNo()));
  3714. CurDAG->RemoveDeadNode(N);
  3715. return;
  3716. }
  3717. case ARMISD::CMPZ: {
  3718. // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
  3719. // This allows us to avoid materializing the expensive negative constant.
  3720. // The CMPZ #0 is useless and will be peepholed away but we need to keep it
  3721. // for its glue output.
  3722. SDValue X = N->getOperand(0);
  3723. auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
  3724. if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
  3725. int64_t Addend = -C->getSExtValue();
  3726. SDNode *Add = nullptr;
  3727. // ADDS can be better than CMN if the immediate fits in a
  3728. // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
  3729. // Outside that range we can just use a CMN which is 32-bit but has a
  3730. // 12-bit immediate range.
  3731. if (Addend < 1<<8) {
  3732. if (Subtarget->isThumb2()) {
  3733. SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
  3734. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
  3735. CurDAG->getRegister(0, MVT::i32) };
  3736. Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
  3737. } else {
  3738. unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
  3739. SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
  3740. CurDAG->getTargetConstant(Addend, dl, MVT::i32),
  3741. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
  3742. Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
  3743. }
  3744. }
  3745. if (Add) {
  3746. SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
  3747. CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
  3748. }
  3749. }
  3750. // Other cases are autogenerated.
  3751. break;
  3752. }
  3753. case ARMISD::CMOV: {
  3754. SDValue InFlag = N->getOperand(4);
  3755. if (InFlag.getOpcode() == ARMISD::CMPZ) {
  3756. bool SwitchEQNEToPLMI;
  3757. SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
  3758. if (SwitchEQNEToPLMI) {
  3759. SDValue ARMcc = N->getOperand(2);
  3760. ARMCC::CondCodes CC =
  3761. (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
  3762. switch (CC) {
  3763. default: llvm_unreachable("CMPZ must be either NE or EQ!");
  3764. case ARMCC::NE:
  3765. CC = ARMCC::MI;
  3766. break;
  3767. case ARMCC::EQ:
  3768. CC = ARMCC::PL;
  3769. break;
  3770. }
  3771. SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
  3772. SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
  3773. N->getOperand(3), N->getOperand(4)};
  3774. CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
  3775. }
  3776. }
  3777. // Other cases are autogenerated.
  3778. break;
  3779. }
  3780. case ARMISD::VZIP: {
  3781. unsigned Opc = 0;
  3782. EVT VT = N->getValueType(0);
  3783. switch (VT.getSimpleVT().SimpleTy) {
  3784. default: return;
  3785. case MVT::v8i8: Opc = ARM::VZIPd8; break;
  3786. case MVT::v4f16:
  3787. case MVT::v4i16: Opc = ARM::VZIPd16; break;
  3788. case MVT::v2f32:
  3789. // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  3790. case MVT::v2i32: Opc = ARM::VTRNd32; break;
  3791. case MVT::v16i8: Opc = ARM::VZIPq8; break;
  3792. case MVT::v8f16:
  3793. case MVT::v8i16: Opc = ARM::VZIPq16; break;
  3794. case MVT::v4f32:
  3795. case MVT::v4i32: Opc = ARM::VZIPq32; break;
  3796. }
  3797. SDValue Pred = getAL(CurDAG, dl);
  3798. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  3799. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
  3800. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
  3801. return;
  3802. }
  3803. case ARMISD::VUZP: {
  3804. unsigned Opc = 0;
  3805. EVT VT = N->getValueType(0);
  3806. switch (VT.getSimpleVT().SimpleTy) {
  3807. default: return;
  3808. case MVT::v8i8: Opc = ARM::VUZPd8; break;
  3809. case MVT::v4f16:
  3810. case MVT::v4i16: Opc = ARM::VUZPd16; break;
  3811. case MVT::v2f32:
  3812. // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  3813. case MVT::v2i32: Opc = ARM::VTRNd32; break;
  3814. case MVT::v16i8: Opc = ARM::VUZPq8; break;
  3815. case MVT::v8f16:
  3816. case MVT::v8i16: Opc = ARM::VUZPq16; break;
  3817. case MVT::v4f32:
  3818. case MVT::v4i32: Opc = ARM::VUZPq32; break;
  3819. }
  3820. SDValue Pred = getAL(CurDAG, dl);
  3821. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  3822. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
  3823. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
  3824. return;
  3825. }
  3826. case ARMISD::VTRN: {
  3827. unsigned Opc = 0;
  3828. EVT VT = N->getValueType(0);
  3829. switch (VT.getSimpleVT().SimpleTy) {
  3830. default: return;
  3831. case MVT::v8i8: Opc = ARM::VTRNd8; break;
  3832. case MVT::v4f16:
  3833. case MVT::v4i16: Opc = ARM::VTRNd16; break;
  3834. case MVT::v2f32:
  3835. case MVT::v2i32: Opc = ARM::VTRNd32; break;
  3836. case MVT::v16i8: Opc = ARM::VTRNq8; break;
  3837. case MVT::v8f16:
  3838. case MVT::v8i16: Opc = ARM::VTRNq16; break;
  3839. case MVT::v4f32:
  3840. case MVT::v4i32: Opc = ARM::VTRNq32; break;
  3841. }
  3842. SDValue Pred = getAL(CurDAG, dl);
  3843. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  3844. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
  3845. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
  3846. return;
  3847. }
  3848. case ARMISD::BUILD_VECTOR: {
  3849. EVT VecVT = N->getValueType(0);
  3850. EVT EltVT = VecVT.getVectorElementType();
  3851. unsigned NumElts = VecVT.getVectorNumElements();
  3852. if (EltVT == MVT::f64) {
  3853. assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
  3854. ReplaceNode(
  3855. N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
  3856. return;
  3857. }
  3858. assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
  3859. if (NumElts == 2) {
  3860. ReplaceNode(
  3861. N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
  3862. return;
  3863. }
  3864. assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
  3865. ReplaceNode(N,
  3866. createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
  3867. N->getOperand(2), N->getOperand(3)));
  3868. return;
  3869. }
  3870. case ARMISD::VLD1DUP: {
  3871. static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
  3872. ARM::VLD1DUPd32 };
  3873. static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
  3874. ARM::VLD1DUPq32 };
  3875. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
  3876. return;
  3877. }
  3878. case ARMISD::VLD2DUP: {
  3879. static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
  3880. ARM::VLD2DUPd32 };
  3881. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
  3882. return;
  3883. }
  3884. case ARMISD::VLD3DUP: {
  3885. static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
  3886. ARM::VLD3DUPd16Pseudo,
  3887. ARM::VLD3DUPd32Pseudo };
  3888. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
  3889. return;
  3890. }
  3891. case ARMISD::VLD4DUP: {
  3892. static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
  3893. ARM::VLD4DUPd16Pseudo,
  3894. ARM::VLD4DUPd32Pseudo };
  3895. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
  3896. return;
  3897. }
  3898. case ARMISD::VLD1DUP_UPD: {
  3899. static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
  3900. ARM::VLD1DUPd16wb_fixed,
  3901. ARM::VLD1DUPd32wb_fixed };
  3902. static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
  3903. ARM::VLD1DUPq16wb_fixed,
  3904. ARM::VLD1DUPq32wb_fixed };
  3905. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
  3906. return;
  3907. }
  3908. case ARMISD::VLD2DUP_UPD: {
  3909. static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
  3910. ARM::VLD2DUPd16wb_fixed,
  3911. ARM::VLD2DUPd32wb_fixed,
  3912. ARM::VLD1q64wb_fixed };
  3913. static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
  3914. ARM::VLD2DUPq16EvenPseudo,
  3915. ARM::VLD2DUPq32EvenPseudo };
  3916. static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
  3917. ARM::VLD2DUPq16OddPseudoWB_fixed,
  3918. ARM::VLD2DUPq32OddPseudoWB_fixed };
  3919. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
  3920. return;
  3921. }
  3922. case ARMISD::VLD3DUP_UPD: {
  3923. static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
  3924. ARM::VLD3DUPd16Pseudo_UPD,
  3925. ARM::VLD3DUPd32Pseudo_UPD,
  3926. ARM::VLD1d64TPseudoWB_fixed };
  3927. static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
  3928. ARM::VLD3DUPq16EvenPseudo,
  3929. ARM::VLD3DUPq32EvenPseudo };
  3930. static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
  3931. ARM::VLD3DUPq16OddPseudo_UPD,
  3932. ARM::VLD3DUPq32OddPseudo_UPD };
  3933. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  3934. return;
  3935. }
  3936. case ARMISD::VLD4DUP_UPD: {
  3937. static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
  3938. ARM::VLD4DUPd16Pseudo_UPD,
  3939. ARM::VLD4DUPd32Pseudo_UPD,
  3940. ARM::VLD1d64QPseudoWB_fixed };
  3941. static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
  3942. ARM::VLD4DUPq16EvenPseudo,
  3943. ARM::VLD4DUPq32EvenPseudo };
  3944. static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
  3945. ARM::VLD4DUPq16OddPseudo_UPD,
  3946. ARM::VLD4DUPq32OddPseudo_UPD };
  3947. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  3948. return;
  3949. }
  3950. case ARMISD::VLD1_UPD: {
  3951. static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
  3952. ARM::VLD1d16wb_fixed,
  3953. ARM::VLD1d32wb_fixed,
  3954. ARM::VLD1d64wb_fixed };
  3955. static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
  3956. ARM::VLD1q16wb_fixed,
  3957. ARM::VLD1q32wb_fixed,
  3958. ARM::VLD1q64wb_fixed };
  3959. SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
  3960. return;
  3961. }
  3962. case ARMISD::VLD2_UPD: {
  3963. if (Subtarget->hasNEON()) {
  3964. static const uint16_t DOpcodes[] = {
  3965. ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
  3966. ARM::VLD1q64wb_fixed};
  3967. static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
  3968. ARM::VLD2q16PseudoWB_fixed,
  3969. ARM::VLD2q32PseudoWB_fixed};
  3970. SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
  3971. } else {
  3972. static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
  3973. ARM::MVE_VLD21_8_wb};
  3974. static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
  3975. ARM::MVE_VLD21_16_wb};
  3976. static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
  3977. ARM::MVE_VLD21_32_wb};
  3978. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  3979. SelectMVE_VLD(N, 2, Opcodes, true);
  3980. }
  3981. return;
  3982. }
  3983. case ARMISD::VLD3_UPD: {
  3984. static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
  3985. ARM::VLD3d16Pseudo_UPD,
  3986. ARM::VLD3d32Pseudo_UPD,
  3987. ARM::VLD1d64TPseudoWB_fixed};
  3988. static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
  3989. ARM::VLD3q16Pseudo_UPD,
  3990. ARM::VLD3q32Pseudo_UPD };
  3991. static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
  3992. ARM::VLD3q16oddPseudo_UPD,
  3993. ARM::VLD3q32oddPseudo_UPD };
  3994. SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  3995. return;
  3996. }
  3997. case ARMISD::VLD4_UPD: {
  3998. if (Subtarget->hasNEON()) {
  3999. static const uint16_t DOpcodes[] = {
  4000. ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
  4001. ARM::VLD1d64QPseudoWB_fixed};
  4002. static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
  4003. ARM::VLD4q16Pseudo_UPD,
  4004. ARM::VLD4q32Pseudo_UPD};
  4005. static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
  4006. ARM::VLD4q16oddPseudo_UPD,
  4007. ARM::VLD4q32oddPseudo_UPD};
  4008. SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4009. } else {
  4010. static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
  4011. ARM::MVE_VLD42_8,
  4012. ARM::MVE_VLD43_8_wb};
  4013. static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
  4014. ARM::MVE_VLD42_16,
  4015. ARM::MVE_VLD43_16_wb};
  4016. static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
  4017. ARM::MVE_VLD42_32,
  4018. ARM::MVE_VLD43_32_wb};
  4019. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  4020. SelectMVE_VLD(N, 4, Opcodes, true);
  4021. }
  4022. return;
  4023. }
  4024. case ARMISD::VLD1x2_UPD: {
  4025. if (Subtarget->hasNEON()) {
  4026. static const uint16_t DOpcodes[] = {
  4027. ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
  4028. ARM::VLD1q64wb_fixed};
  4029. static const uint16_t QOpcodes[] = {
  4030. ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
  4031. ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
  4032. SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
  4033. return;
  4034. }
  4035. break;
  4036. }
  4037. case ARMISD::VLD1x3_UPD: {
  4038. if (Subtarget->hasNEON()) {
  4039. static const uint16_t DOpcodes[] = {
  4040. ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
  4041. ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
  4042. static const uint16_t QOpcodes0[] = {
  4043. ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
  4044. ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
  4045. static const uint16_t QOpcodes1[] = {
  4046. ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
  4047. ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
  4048. SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4049. return;
  4050. }
  4051. break;
  4052. }
  4053. case ARMISD::VLD1x4_UPD: {
  4054. if (Subtarget->hasNEON()) {
  4055. static const uint16_t DOpcodes[] = {
  4056. ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
  4057. ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
  4058. static const uint16_t QOpcodes0[] = {
  4059. ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
  4060. ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
  4061. static const uint16_t QOpcodes1[] = {
  4062. ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
  4063. ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
  4064. SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4065. return;
  4066. }
  4067. break;
  4068. }
  4069. case ARMISD::VLD2LN_UPD: {
  4070. static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
  4071. ARM::VLD2LNd16Pseudo_UPD,
  4072. ARM::VLD2LNd32Pseudo_UPD };
  4073. static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
  4074. ARM::VLD2LNq32Pseudo_UPD };
  4075. SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
  4076. return;
  4077. }
  4078. case ARMISD::VLD3LN_UPD: {
  4079. static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
  4080. ARM::VLD3LNd16Pseudo_UPD,
  4081. ARM::VLD3LNd32Pseudo_UPD };
  4082. static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
  4083. ARM::VLD3LNq32Pseudo_UPD };
  4084. SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
  4085. return;
  4086. }
  4087. case ARMISD::VLD4LN_UPD: {
  4088. static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
  4089. ARM::VLD4LNd16Pseudo_UPD,
  4090. ARM::VLD4LNd32Pseudo_UPD };
  4091. static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
  4092. ARM::VLD4LNq32Pseudo_UPD };
  4093. SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
  4094. return;
  4095. }
  4096. case ARMISD::VST1_UPD: {
  4097. static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
  4098. ARM::VST1d16wb_fixed,
  4099. ARM::VST1d32wb_fixed,
  4100. ARM::VST1d64wb_fixed };
  4101. static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
  4102. ARM::VST1q16wb_fixed,
  4103. ARM::VST1q32wb_fixed,
  4104. ARM::VST1q64wb_fixed };
  4105. SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
  4106. return;
  4107. }
  4108. case ARMISD::VST2_UPD: {
  4109. if (Subtarget->hasNEON()) {
  4110. static const uint16_t DOpcodes[] = {
  4111. ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
  4112. ARM::VST1q64wb_fixed};
  4113. static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
  4114. ARM::VST2q16PseudoWB_fixed,
  4115. ARM::VST2q32PseudoWB_fixed};
  4116. SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
  4117. return;
  4118. }
  4119. break;
  4120. }
  4121. case ARMISD::VST3_UPD: {
  4122. static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
  4123. ARM::VST3d16Pseudo_UPD,
  4124. ARM::VST3d32Pseudo_UPD,
  4125. ARM::VST1d64TPseudoWB_fixed};
  4126. static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
  4127. ARM::VST3q16Pseudo_UPD,
  4128. ARM::VST3q32Pseudo_UPD };
  4129. static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
  4130. ARM::VST3q16oddPseudo_UPD,
  4131. ARM::VST3q32oddPseudo_UPD };
  4132. SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4133. return;
  4134. }
  4135. case ARMISD::VST4_UPD: {
  4136. if (Subtarget->hasNEON()) {
  4137. static const uint16_t DOpcodes[] = {
  4138. ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
  4139. ARM::VST1d64QPseudoWB_fixed};
  4140. static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
  4141. ARM::VST4q16Pseudo_UPD,
  4142. ARM::VST4q32Pseudo_UPD};
  4143. static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
  4144. ARM::VST4q16oddPseudo_UPD,
  4145. ARM::VST4q32oddPseudo_UPD};
  4146. SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4147. return;
  4148. }
  4149. break;
  4150. }
  4151. case ARMISD::VST1x2_UPD: {
  4152. if (Subtarget->hasNEON()) {
  4153. static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
  4154. ARM::VST1q16wb_fixed,
  4155. ARM::VST1q32wb_fixed,
  4156. ARM::VST1q64wb_fixed};
  4157. static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
  4158. ARM::VST1d16QPseudoWB_fixed,
  4159. ARM::VST1d32QPseudoWB_fixed,
  4160. ARM::VST1d64QPseudoWB_fixed };
  4161. SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
  4162. return;
  4163. }
  4164. break;
  4165. }
  4166. case ARMISD::VST1x3_UPD: {
  4167. if (Subtarget->hasNEON()) {
  4168. static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
  4169. ARM::VST1d16TPseudoWB_fixed,
  4170. ARM::VST1d32TPseudoWB_fixed,
  4171. ARM::VST1d64TPseudoWB_fixed };
  4172. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
  4173. ARM::VST1q16LowTPseudo_UPD,
  4174. ARM::VST1q32LowTPseudo_UPD,
  4175. ARM::VST1q64LowTPseudo_UPD };
  4176. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
  4177. ARM::VST1q16HighTPseudo_UPD,
  4178. ARM::VST1q32HighTPseudo_UPD,
  4179. ARM::VST1q64HighTPseudo_UPD };
  4180. SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4181. return;
  4182. }
  4183. break;
  4184. }
  4185. case ARMISD::VST1x4_UPD: {
  4186. if (Subtarget->hasNEON()) {
  4187. static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
  4188. ARM::VST1d16QPseudoWB_fixed,
  4189. ARM::VST1d32QPseudoWB_fixed,
  4190. ARM::VST1d64QPseudoWB_fixed };
  4191. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
  4192. ARM::VST1q16LowQPseudo_UPD,
  4193. ARM::VST1q32LowQPseudo_UPD,
  4194. ARM::VST1q64LowQPseudo_UPD };
  4195. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
  4196. ARM::VST1q16HighQPseudo_UPD,
  4197. ARM::VST1q32HighQPseudo_UPD,
  4198. ARM::VST1q64HighQPseudo_UPD };
  4199. SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4200. return;
  4201. }
  4202. break;
  4203. }
  4204. case ARMISD::VST2LN_UPD: {
  4205. static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
  4206. ARM::VST2LNd16Pseudo_UPD,
  4207. ARM::VST2LNd32Pseudo_UPD };
  4208. static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
  4209. ARM::VST2LNq32Pseudo_UPD };
  4210. SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
  4211. return;
  4212. }
  4213. case ARMISD::VST3LN_UPD: {
  4214. static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
  4215. ARM::VST3LNd16Pseudo_UPD,
  4216. ARM::VST3LNd32Pseudo_UPD };
  4217. static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
  4218. ARM::VST3LNq32Pseudo_UPD };
  4219. SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
  4220. return;
  4221. }
  4222. case ARMISD::VST4LN_UPD: {
  4223. static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
  4224. ARM::VST4LNd16Pseudo_UPD,
  4225. ARM::VST4LNd32Pseudo_UPD };
  4226. static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
  4227. ARM::VST4LNq32Pseudo_UPD };
  4228. SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
  4229. return;
  4230. }
  4231. case ISD::INTRINSIC_VOID:
  4232. case ISD::INTRINSIC_W_CHAIN: {
  4233. unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
  4234. switch (IntNo) {
  4235. default:
  4236. break;
  4237. case Intrinsic::arm_mrrc:
  4238. case Intrinsic::arm_mrrc2: {
  4239. SDLoc dl(N);
  4240. SDValue Chain = N->getOperand(0);
  4241. unsigned Opc;
  4242. if (Subtarget->isThumb())
  4243. Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
  4244. else
  4245. Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
  4246. SmallVector<SDValue, 5> Ops;
  4247. Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
  4248. Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
  4249. Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
  4250. // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
  4251. // instruction will always be '1111' but it is possible in assembly language to specify
  4252. // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
  4253. if (Opc != ARM::MRRC2) {
  4254. Ops.push_back(getAL(CurDAG, dl));
  4255. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  4256. }
  4257. Ops.push_back(Chain);
  4258. // Writes to two registers.
  4259. const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
  4260. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
  4261. return;
  4262. }
  4263. case Intrinsic::arm_ldaexd:
  4264. case Intrinsic::arm_ldrexd: {
  4265. SDLoc dl(N);
  4266. SDValue Chain = N->getOperand(0);
  4267. SDValue MemAddr = N->getOperand(2);
  4268. bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
  4269. bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
  4270. unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
  4271. : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
  4272. // arm_ldrexd returns a i64 value in {i32, i32}
  4273. std::vector<EVT> ResTys;
  4274. if (isThumb) {
  4275. ResTys.push_back(MVT::i32);
  4276. ResTys.push_back(MVT::i32);
  4277. } else
  4278. ResTys.push_back(MVT::Untyped);
  4279. ResTys.push_back(MVT::Other);
  4280. // Place arguments in the right order.
  4281. SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
  4282. CurDAG->getRegister(0, MVT::i32), Chain};
  4283. SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
  4284. // Transfer memoperands.
  4285. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  4286. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
  4287. // Remap uses.
  4288. SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
  4289. if (!SDValue(N, 0).use_empty()) {
  4290. SDValue Result;
  4291. if (isThumb)
  4292. Result = SDValue(Ld, 0);
  4293. else {
  4294. SDValue SubRegIdx =
  4295. CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
  4296. SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  4297. dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
  4298. Result = SDValue(ResNode,0);
  4299. }
  4300. ReplaceUses(SDValue(N, 0), Result);
  4301. }
  4302. if (!SDValue(N, 1).use_empty()) {
  4303. SDValue Result;
  4304. if (isThumb)
  4305. Result = SDValue(Ld, 1);
  4306. else {
  4307. SDValue SubRegIdx =
  4308. CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
  4309. SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  4310. dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
  4311. Result = SDValue(ResNode,0);
  4312. }
  4313. ReplaceUses(SDValue(N, 1), Result);
  4314. }
  4315. ReplaceUses(SDValue(N, 2), OutChain);
  4316. CurDAG->RemoveDeadNode(N);
  4317. return;
  4318. }
  4319. case Intrinsic::arm_stlexd:
  4320. case Intrinsic::arm_strexd: {
  4321. SDLoc dl(N);
  4322. SDValue Chain = N->getOperand(0);
  4323. SDValue Val0 = N->getOperand(2);
  4324. SDValue Val1 = N->getOperand(3);
  4325. SDValue MemAddr = N->getOperand(4);
  4326. // Store exclusive double return a i32 value which is the return status
  4327. // of the issued store.
  4328. const EVT ResTys[] = {MVT::i32, MVT::Other};
  4329. bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
  4330. // Place arguments in the right order.
  4331. SmallVector<SDValue, 7> Ops;
  4332. if (isThumb) {
  4333. Ops.push_back(Val0);
  4334. Ops.push_back(Val1);
  4335. } else
  4336. // arm_strexd uses GPRPair.
  4337. Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
  4338. Ops.push_back(MemAddr);
  4339. Ops.push_back(getAL(CurDAG, dl));
  4340. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  4341. Ops.push_back(Chain);
  4342. bool IsRelease = IntNo == Intrinsic::arm_stlexd;
  4343. unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
  4344. : (IsRelease ? ARM::STLEXD : ARM::STREXD);
  4345. SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
  4346. // Transfer memoperands.
  4347. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  4348. CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
  4349. ReplaceNode(N, St);
  4350. return;
  4351. }
  4352. case Intrinsic::arm_neon_vld1: {
  4353. static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
  4354. ARM::VLD1d32, ARM::VLD1d64 };
  4355. static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
  4356. ARM::VLD1q32, ARM::VLD1q64};
  4357. SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
  4358. return;
  4359. }
  4360. case Intrinsic::arm_neon_vld1x2: {
  4361. static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
  4362. ARM::VLD1q32, ARM::VLD1q64 };
  4363. static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
  4364. ARM::VLD1d16QPseudo,
  4365. ARM::VLD1d32QPseudo,
  4366. ARM::VLD1d64QPseudo };
  4367. SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4368. return;
  4369. }
  4370. case Intrinsic::arm_neon_vld1x3: {
  4371. static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
  4372. ARM::VLD1d16TPseudo,
  4373. ARM::VLD1d32TPseudo,
  4374. ARM::VLD1d64TPseudo };
  4375. static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
  4376. ARM::VLD1q16LowTPseudo_UPD,
  4377. ARM::VLD1q32LowTPseudo_UPD,
  4378. ARM::VLD1q64LowTPseudo_UPD };
  4379. static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
  4380. ARM::VLD1q16HighTPseudo,
  4381. ARM::VLD1q32HighTPseudo,
  4382. ARM::VLD1q64HighTPseudo };
  4383. SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4384. return;
  4385. }
  4386. case Intrinsic::arm_neon_vld1x4: {
  4387. static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
  4388. ARM::VLD1d16QPseudo,
  4389. ARM::VLD1d32QPseudo,
  4390. ARM::VLD1d64QPseudo };
  4391. static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
  4392. ARM::VLD1q16LowQPseudo_UPD,
  4393. ARM::VLD1q32LowQPseudo_UPD,
  4394. ARM::VLD1q64LowQPseudo_UPD };
  4395. static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
  4396. ARM::VLD1q16HighQPseudo,
  4397. ARM::VLD1q32HighQPseudo,
  4398. ARM::VLD1q64HighQPseudo };
  4399. SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4400. return;
  4401. }
  4402. case Intrinsic::arm_neon_vld2: {
  4403. static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
  4404. ARM::VLD2d32, ARM::VLD1q64 };
  4405. static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
  4406. ARM::VLD2q32Pseudo };
  4407. SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4408. return;
  4409. }
  4410. case Intrinsic::arm_neon_vld3: {
  4411. static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
  4412. ARM::VLD3d16Pseudo,
  4413. ARM::VLD3d32Pseudo,
  4414. ARM::VLD1d64TPseudo };
  4415. static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
  4416. ARM::VLD3q16Pseudo_UPD,
  4417. ARM::VLD3q32Pseudo_UPD };
  4418. static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
  4419. ARM::VLD3q16oddPseudo,
  4420. ARM::VLD3q32oddPseudo };
  4421. SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4422. return;
  4423. }
  4424. case Intrinsic::arm_neon_vld4: {
  4425. static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
  4426. ARM::VLD4d16Pseudo,
  4427. ARM::VLD4d32Pseudo,
  4428. ARM::VLD1d64QPseudo };
  4429. static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
  4430. ARM::VLD4q16Pseudo_UPD,
  4431. ARM::VLD4q32Pseudo_UPD };
  4432. static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
  4433. ARM::VLD4q16oddPseudo,
  4434. ARM::VLD4q32oddPseudo };
  4435. SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4436. return;
  4437. }
  4438. case Intrinsic::arm_neon_vld2dup: {
  4439. static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
  4440. ARM::VLD2DUPd32, ARM::VLD1q64 };
  4441. static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
  4442. ARM::VLD2DUPq16EvenPseudo,
  4443. ARM::VLD2DUPq32EvenPseudo };
  4444. static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
  4445. ARM::VLD2DUPq16OddPseudo,
  4446. ARM::VLD2DUPq32OddPseudo };
  4447. SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
  4448. DOpcodes, QOpcodes0, QOpcodes1);
  4449. return;
  4450. }
  4451. case Intrinsic::arm_neon_vld3dup: {
  4452. static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
  4453. ARM::VLD3DUPd16Pseudo,
  4454. ARM::VLD3DUPd32Pseudo,
  4455. ARM::VLD1d64TPseudo };
  4456. static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
  4457. ARM::VLD3DUPq16EvenPseudo,
  4458. ARM::VLD3DUPq32EvenPseudo };
  4459. static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
  4460. ARM::VLD3DUPq16OddPseudo,
  4461. ARM::VLD3DUPq32OddPseudo };
  4462. SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
  4463. DOpcodes, QOpcodes0, QOpcodes1);
  4464. return;
  4465. }
  4466. case Intrinsic::arm_neon_vld4dup: {
  4467. static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
  4468. ARM::VLD4DUPd16Pseudo,
  4469. ARM::VLD4DUPd32Pseudo,
  4470. ARM::VLD1d64QPseudo };
  4471. static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
  4472. ARM::VLD4DUPq16EvenPseudo,
  4473. ARM::VLD4DUPq32EvenPseudo };
  4474. static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
  4475. ARM::VLD4DUPq16OddPseudo,
  4476. ARM::VLD4DUPq32OddPseudo };
  4477. SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
  4478. DOpcodes, QOpcodes0, QOpcodes1);
  4479. return;
  4480. }
  4481. case Intrinsic::arm_neon_vld2lane: {
  4482. static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
  4483. ARM::VLD2LNd16Pseudo,
  4484. ARM::VLD2LNd32Pseudo };
  4485. static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
  4486. ARM::VLD2LNq32Pseudo };
  4487. SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
  4488. return;
  4489. }
  4490. case Intrinsic::arm_neon_vld3lane: {
  4491. static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
  4492. ARM::VLD3LNd16Pseudo,
  4493. ARM::VLD3LNd32Pseudo };
  4494. static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
  4495. ARM::VLD3LNq32Pseudo };
  4496. SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
  4497. return;
  4498. }
  4499. case Intrinsic::arm_neon_vld4lane: {
  4500. static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
  4501. ARM::VLD4LNd16Pseudo,
  4502. ARM::VLD4LNd32Pseudo };
  4503. static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
  4504. ARM::VLD4LNq32Pseudo };
  4505. SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
  4506. return;
  4507. }
  4508. case Intrinsic::arm_neon_vst1: {
  4509. static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
  4510. ARM::VST1d32, ARM::VST1d64 };
  4511. static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
  4512. ARM::VST1q32, ARM::VST1q64 };
  4513. SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
  4514. return;
  4515. }
  4516. case Intrinsic::arm_neon_vst1x2: {
  4517. static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
  4518. ARM::VST1q32, ARM::VST1q64 };
  4519. static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
  4520. ARM::VST1d16QPseudo,
  4521. ARM::VST1d32QPseudo,
  4522. ARM::VST1d64QPseudo };
  4523. SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4524. return;
  4525. }
  4526. case Intrinsic::arm_neon_vst1x3: {
  4527. static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
  4528. ARM::VST1d16TPseudo,
  4529. ARM::VST1d32TPseudo,
  4530. ARM::VST1d64TPseudo };
  4531. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
  4532. ARM::VST1q16LowTPseudo_UPD,
  4533. ARM::VST1q32LowTPseudo_UPD,
  4534. ARM::VST1q64LowTPseudo_UPD };
  4535. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
  4536. ARM::VST1q16HighTPseudo,
  4537. ARM::VST1q32HighTPseudo,
  4538. ARM::VST1q64HighTPseudo };
  4539. SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4540. return;
  4541. }
  4542. case Intrinsic::arm_neon_vst1x4: {
  4543. static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
  4544. ARM::VST1d16QPseudo,
  4545. ARM::VST1d32QPseudo,
  4546. ARM::VST1d64QPseudo };
  4547. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
  4548. ARM::VST1q16LowQPseudo_UPD,
  4549. ARM::VST1q32LowQPseudo_UPD,
  4550. ARM::VST1q64LowQPseudo_UPD };
  4551. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
  4552. ARM::VST1q16HighQPseudo,
  4553. ARM::VST1q32HighQPseudo,
  4554. ARM::VST1q64HighQPseudo };
  4555. SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4556. return;
  4557. }
  4558. case Intrinsic::arm_neon_vst2: {
  4559. static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
  4560. ARM::VST2d32, ARM::VST1q64 };
  4561. static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
  4562. ARM::VST2q32Pseudo };
  4563. SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4564. return;
  4565. }
  4566. case Intrinsic::arm_neon_vst3: {
  4567. static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
  4568. ARM::VST3d16Pseudo,
  4569. ARM::VST3d32Pseudo,
  4570. ARM::VST1d64TPseudo };
  4571. static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
  4572. ARM::VST3q16Pseudo_UPD,
  4573. ARM::VST3q32Pseudo_UPD };
  4574. static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
  4575. ARM::VST3q16oddPseudo,
  4576. ARM::VST3q32oddPseudo };
  4577. SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4578. return;
  4579. }
  4580. case Intrinsic::arm_neon_vst4: {
  4581. static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
  4582. ARM::VST4d16Pseudo,
  4583. ARM::VST4d32Pseudo,
  4584. ARM::VST1d64QPseudo };
  4585. static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
  4586. ARM::VST4q16Pseudo_UPD,
  4587. ARM::VST4q32Pseudo_UPD };
  4588. static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
  4589. ARM::VST4q16oddPseudo,
  4590. ARM::VST4q32oddPseudo };
  4591. SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4592. return;
  4593. }
  4594. case Intrinsic::arm_neon_vst2lane: {
  4595. static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
  4596. ARM::VST2LNd16Pseudo,
  4597. ARM::VST2LNd32Pseudo };
  4598. static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
  4599. ARM::VST2LNq32Pseudo };
  4600. SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
  4601. return;
  4602. }
  4603. case Intrinsic::arm_neon_vst3lane: {
  4604. static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
  4605. ARM::VST3LNd16Pseudo,
  4606. ARM::VST3LNd32Pseudo };
  4607. static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
  4608. ARM::VST3LNq32Pseudo };
  4609. SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
  4610. return;
  4611. }
  4612. case Intrinsic::arm_neon_vst4lane: {
  4613. static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
  4614. ARM::VST4LNd16Pseudo,
  4615. ARM::VST4LNd32Pseudo };
  4616. static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
  4617. ARM::VST4LNq32Pseudo };
  4618. SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
  4619. return;
  4620. }
  4621. case Intrinsic::arm_mve_vldr_gather_base_wb:
  4622. case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
  4623. static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
  4624. ARM::MVE_VLDRDU64_qi_pre};
  4625. SelectMVE_WB(N, Opcodes,
  4626. IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
  4627. return;
  4628. }
  4629. case Intrinsic::arm_mve_vld2q: {
  4630. static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
  4631. static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
  4632. ARM::MVE_VLD21_16};
  4633. static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
  4634. ARM::MVE_VLD21_32};
  4635. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  4636. SelectMVE_VLD(N, 2, Opcodes, false);
  4637. return;
  4638. }
  4639. case Intrinsic::arm_mve_vld4q: {
  4640. static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
  4641. ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
  4642. static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
  4643. ARM::MVE_VLD42_16,
  4644. ARM::MVE_VLD43_16};
  4645. static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
  4646. ARM::MVE_VLD42_32,
  4647. ARM::MVE_VLD43_32};
  4648. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  4649. SelectMVE_VLD(N, 4, Opcodes, false);
  4650. return;
  4651. }
  4652. }
  4653. break;
  4654. }
  4655. case ISD::INTRINSIC_WO_CHAIN: {
  4656. unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
  4657. switch (IntNo) {
  4658. default:
  4659. break;
  4660. // Scalar f32 -> bf16
  4661. case Intrinsic::arm_neon_vcvtbfp2bf: {
  4662. SDLoc dl(N);
  4663. const SDValue &Src = N->getOperand(1);
  4664. llvm::EVT DestTy = N->getValueType(0);
  4665. SDValue Pred = getAL(CurDAG, dl);
  4666. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  4667. SDValue Ops[] = { Src, Src, Pred, Reg0 };
  4668. CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
  4669. return;
  4670. }
  4671. // Vector v4f32 -> v4bf16
  4672. case Intrinsic::arm_neon_vcvtfp2bf: {
  4673. SDLoc dl(N);
  4674. const SDValue &Src = N->getOperand(1);
  4675. SDValue Pred = getAL(CurDAG, dl);
  4676. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  4677. SDValue Ops[] = { Src, Pred, Reg0 };
  4678. CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
  4679. return;
  4680. }
  4681. case Intrinsic::arm_mve_urshrl:
  4682. SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
  4683. return;
  4684. case Intrinsic::arm_mve_uqshll:
  4685. SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
  4686. return;
  4687. case Intrinsic::arm_mve_srshrl:
  4688. SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
  4689. return;
  4690. case Intrinsic::arm_mve_sqshll:
  4691. SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
  4692. return;
  4693. case Intrinsic::arm_mve_uqrshll:
  4694. SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
  4695. return;
  4696. case Intrinsic::arm_mve_sqrshrl:
  4697. SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
  4698. return;
  4699. case Intrinsic::arm_mve_vadc:
  4700. case Intrinsic::arm_mve_vadc_predicated:
  4701. SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
  4702. IntNo == Intrinsic::arm_mve_vadc_predicated);
  4703. return;
  4704. case Intrinsic::arm_mve_vsbc:
  4705. case Intrinsic::arm_mve_vsbc_predicated:
  4706. SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
  4707. IntNo == Intrinsic::arm_mve_vsbc_predicated);
  4708. return;
  4709. case Intrinsic::arm_mve_vshlc:
  4710. case Intrinsic::arm_mve_vshlc_predicated:
  4711. SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
  4712. return;
  4713. case Intrinsic::arm_mve_vmlldava:
  4714. case Intrinsic::arm_mve_vmlldava_predicated: {
  4715. static const uint16_t OpcodesU[] = {
  4716. ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
  4717. ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
  4718. };
  4719. static const uint16_t OpcodesS[] = {
  4720. ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
  4721. ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
  4722. ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
  4723. ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
  4724. ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
  4725. ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
  4726. ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
  4727. ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
  4728. };
  4729. SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
  4730. OpcodesS, OpcodesU);
  4731. return;
  4732. }
  4733. case Intrinsic::arm_mve_vrmlldavha:
  4734. case Intrinsic::arm_mve_vrmlldavha_predicated: {
  4735. static const uint16_t OpcodesU[] = {
  4736. ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
  4737. };
  4738. static const uint16_t OpcodesS[] = {
  4739. ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
  4740. ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
  4741. ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
  4742. ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
  4743. };
  4744. SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
  4745. OpcodesS, OpcodesU);
  4746. return;
  4747. }
  4748. case Intrinsic::arm_mve_vidup:
  4749. case Intrinsic::arm_mve_vidup_predicated: {
  4750. static const uint16_t Opcodes[] = {
  4751. ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
  4752. };
  4753. SelectMVE_VxDUP(N, Opcodes, false,
  4754. IntNo == Intrinsic::arm_mve_vidup_predicated);
  4755. return;
  4756. }
  4757. case Intrinsic::arm_mve_vddup:
  4758. case Intrinsic::arm_mve_vddup_predicated: {
  4759. static const uint16_t Opcodes[] = {
  4760. ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
  4761. };
  4762. SelectMVE_VxDUP(N, Opcodes, false,
  4763. IntNo == Intrinsic::arm_mve_vddup_predicated);
  4764. return;
  4765. }
  4766. case Intrinsic::arm_mve_viwdup:
  4767. case Intrinsic::arm_mve_viwdup_predicated: {
  4768. static const uint16_t Opcodes[] = {
  4769. ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
  4770. };
  4771. SelectMVE_VxDUP(N, Opcodes, true,
  4772. IntNo == Intrinsic::arm_mve_viwdup_predicated);
  4773. return;
  4774. }
  4775. case Intrinsic::arm_mve_vdwdup:
  4776. case Intrinsic::arm_mve_vdwdup_predicated: {
  4777. static const uint16_t Opcodes[] = {
  4778. ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
  4779. };
  4780. SelectMVE_VxDUP(N, Opcodes, true,
  4781. IntNo == Intrinsic::arm_mve_vdwdup_predicated);
  4782. return;
  4783. }
  4784. case Intrinsic::arm_cde_cx1d:
  4785. case Intrinsic::arm_cde_cx1da:
  4786. case Intrinsic::arm_cde_cx2d:
  4787. case Intrinsic::arm_cde_cx2da:
  4788. case Intrinsic::arm_cde_cx3d:
  4789. case Intrinsic::arm_cde_cx3da: {
  4790. bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
  4791. IntNo == Intrinsic::arm_cde_cx2da ||
  4792. IntNo == Intrinsic::arm_cde_cx3da;
  4793. size_t NumExtraOps;
  4794. uint16_t Opcode;
  4795. switch (IntNo) {
  4796. case Intrinsic::arm_cde_cx1d:
  4797. case Intrinsic::arm_cde_cx1da:
  4798. NumExtraOps = 0;
  4799. Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
  4800. break;
  4801. case Intrinsic::arm_cde_cx2d:
  4802. case Intrinsic::arm_cde_cx2da:
  4803. NumExtraOps = 1;
  4804. Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
  4805. break;
  4806. case Intrinsic::arm_cde_cx3d:
  4807. case Intrinsic::arm_cde_cx3da:
  4808. NumExtraOps = 2;
  4809. Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
  4810. break;
  4811. default:
  4812. llvm_unreachable("Unexpected opcode");
  4813. }
  4814. SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
  4815. return;
  4816. }
  4817. }
  4818. break;
  4819. }
  4820. case ISD::ATOMIC_CMP_SWAP:
  4821. SelectCMP_SWAP(N);
  4822. return;
  4823. }
  4824. SelectCode(N);
  4825. }
  4826. // Inspect a register string of the form
  4827. // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
  4828. // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
  4829. // and obtain the integer operands from them, adding these operands to the
  4830. // provided vector.
  4831. static void getIntOperandsFromRegisterString(StringRef RegString,
  4832. SelectionDAG *CurDAG,
  4833. const SDLoc &DL,
  4834. std::vector<SDValue> &Ops) {
  4835. SmallVector<StringRef, 5> Fields;
  4836. RegString.split(Fields, ':');
  4837. if (Fields.size() > 1) {
  4838. bool AllIntFields = true;
  4839. for (StringRef Field : Fields) {
  4840. // Need to trim out leading 'cp' characters and get the integer field.
  4841. unsigned IntField;
  4842. AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
  4843. Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
  4844. }
  4845. assert(AllIntFields &&
  4846. "Unexpected non-integer value in special register string.");
  4847. (void)AllIntFields;
  4848. }
  4849. }
  4850. // Maps a Banked Register string to its mask value. The mask value returned is
  4851. // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
  4852. // mask operand, which expresses which register is to be used, e.g. r8, and in
  4853. // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
  4854. // was invalid.
  4855. static inline int getBankedRegisterMask(StringRef RegString) {
  4856. auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
  4857. if (!TheReg)
  4858. return -1;
  4859. return TheReg->Encoding;
  4860. }
  4861. // The flags here are common to those allowed for apsr in the A class cores and
  4862. // those allowed for the special registers in the M class cores. Returns a
  4863. // value representing which flags were present, -1 if invalid.
  4864. static inline int getMClassFlagsMask(StringRef Flags) {
  4865. return StringSwitch<int>(Flags)
  4866. .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
  4867. // correct when flags are not permitted
  4868. .Case("g", 0x1)
  4869. .Case("nzcvq", 0x2)
  4870. .Case("nzcvqg", 0x3)
  4871. .Default(-1);
  4872. }
  4873. // Maps MClass special registers string to its value for use in the
  4874. // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
  4875. // Returns -1 to signify that the string was invalid.
  4876. static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
  4877. auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
  4878. const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
  4879. if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
  4880. return -1;
  4881. return (int)(TheReg->Encoding & 0xFFF); // SYSm value
  4882. }
  4883. static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
  4884. // The mask operand contains the special register (R Bit) in bit 4, whether
  4885. // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
  4886. // bits 3-0 contains the fields to be accessed in the special register, set by
  4887. // the flags provided with the register.
  4888. int Mask = 0;
  4889. if (Reg == "apsr") {
  4890. // The flags permitted for apsr are the same flags that are allowed in
  4891. // M class registers. We get the flag value and then shift the flags into
  4892. // the correct place to combine with the mask.
  4893. Mask = getMClassFlagsMask(Flags);
  4894. if (Mask == -1)
  4895. return -1;
  4896. return Mask << 2;
  4897. }
  4898. if (Reg != "cpsr" && Reg != "spsr") {
  4899. return -1;
  4900. }
  4901. // This is the same as if the flags were "fc"
  4902. if (Flags.empty() || Flags == "all")
  4903. return Mask | 0x9;
  4904. // Inspect the supplied flags string and set the bits in the mask for
  4905. // the relevant and valid flags allowed for cpsr and spsr.
  4906. for (char Flag : Flags) {
  4907. int FlagVal;
  4908. switch (Flag) {
  4909. case 'c':
  4910. FlagVal = 0x1;
  4911. break;
  4912. case 'x':
  4913. FlagVal = 0x2;
  4914. break;
  4915. case 's':
  4916. FlagVal = 0x4;
  4917. break;
  4918. case 'f':
  4919. FlagVal = 0x8;
  4920. break;
  4921. default:
  4922. FlagVal = 0;
  4923. }
  4924. // This avoids allowing strings where the same flag bit appears twice.
  4925. if (!FlagVal || (Mask & FlagVal))
  4926. return -1;
  4927. Mask |= FlagVal;
  4928. }
  4929. // If the register is spsr then we need to set the R bit.
  4930. if (Reg == "spsr")
  4931. Mask |= 0x10;
  4932. return Mask;
  4933. }
  4934. // Lower the read_register intrinsic to ARM specific DAG nodes
  4935. // using the supplied metadata string to select the instruction node to use
  4936. // and the registers/masks to construct as operands for the node.
  4937. bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
  4938. const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
  4939. const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
  4940. bool IsThumb2 = Subtarget->isThumb2();
  4941. SDLoc DL(N);
  4942. std::vector<SDValue> Ops;
  4943. getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
  4944. if (!Ops.empty()) {
  4945. // If the special register string was constructed of fields (as defined
  4946. // in the ACLE) then need to lower to MRC node (32 bit) or
  4947. // MRRC node(64 bit), we can make the distinction based on the number of
  4948. // operands we have.
  4949. unsigned Opcode;
  4950. SmallVector<EVT, 3> ResTypes;
  4951. if (Ops.size() == 5){
  4952. Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
  4953. ResTypes.append({ MVT::i32, MVT::Other });
  4954. } else {
  4955. assert(Ops.size() == 3 &&
  4956. "Invalid number of fields in special register string.");
  4957. Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
  4958. ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
  4959. }
  4960. Ops.push_back(getAL(CurDAG, DL));
  4961. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  4962. Ops.push_back(N->getOperand(0));
  4963. ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
  4964. return true;
  4965. }
  4966. std::string SpecialReg = RegString->getString().lower();
  4967. int BankedReg = getBankedRegisterMask(SpecialReg);
  4968. if (BankedReg != -1) {
  4969. Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
  4970. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  4971. N->getOperand(0) };
  4972. ReplaceNode(
  4973. N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
  4974. DL, MVT::i32, MVT::Other, Ops));
  4975. return true;
  4976. }
  4977. // The VFP registers are read by creating SelectionDAG nodes with opcodes
  4978. // corresponding to the register that is being read from. So we switch on the
  4979. // string to find which opcode we need to use.
  4980. unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
  4981. .Case("fpscr", ARM::VMRS)
  4982. .Case("fpexc", ARM::VMRS_FPEXC)
  4983. .Case("fpsid", ARM::VMRS_FPSID)
  4984. .Case("mvfr0", ARM::VMRS_MVFR0)
  4985. .Case("mvfr1", ARM::VMRS_MVFR1)
  4986. .Case("mvfr2", ARM::VMRS_MVFR2)
  4987. .Case("fpinst", ARM::VMRS_FPINST)
  4988. .Case("fpinst2", ARM::VMRS_FPINST2)
  4989. .Default(0);
  4990. // If an opcode was found then we can lower the read to a VFP instruction.
  4991. if (Opcode) {
  4992. if (!Subtarget->hasVFP2Base())
  4993. return false;
  4994. if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
  4995. return false;
  4996. Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  4997. N->getOperand(0) };
  4998. ReplaceNode(N,
  4999. CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
  5000. return true;
  5001. }
  5002. // If the target is M Class then need to validate that the register string
  5003. // is an acceptable value, so check that a mask can be constructed from the
  5004. // string.
  5005. if (Subtarget->isMClass()) {
  5006. int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
  5007. if (SYSmValue == -1)
  5008. return false;
  5009. SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
  5010. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5011. N->getOperand(0) };
  5012. ReplaceNode(
  5013. N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
  5014. return true;
  5015. }
  5016. // Here we know the target is not M Class so we need to check if it is one
  5017. // of the remaining possible values which are apsr, cpsr or spsr.
  5018. if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
  5019. Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5020. N->getOperand(0) };
  5021. ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
  5022. DL, MVT::i32, MVT::Other, Ops));
  5023. return true;
  5024. }
  5025. if (SpecialReg == "spsr") {
  5026. Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5027. N->getOperand(0) };
  5028. ReplaceNode(
  5029. N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
  5030. MVT::i32, MVT::Other, Ops));
  5031. return true;
  5032. }
  5033. return false;
  5034. }
  5035. // Lower the write_register intrinsic to ARM specific DAG nodes
  5036. // using the supplied metadata string to select the instruction node to use
  5037. // and the registers/masks to use in the nodes
  5038. bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
  5039. const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
  5040. const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
  5041. bool IsThumb2 = Subtarget->isThumb2();
  5042. SDLoc DL(N);
  5043. std::vector<SDValue> Ops;
  5044. getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
  5045. if (!Ops.empty()) {
  5046. // If the special register string was constructed of fields (as defined
  5047. // in the ACLE) then need to lower to MCR node (32 bit) or
  5048. // MCRR node(64 bit), we can make the distinction based on the number of
  5049. // operands we have.
  5050. unsigned Opcode;
  5051. if (Ops.size() == 5) {
  5052. Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
  5053. Ops.insert(Ops.begin()+2, N->getOperand(2));
  5054. } else {
  5055. assert(Ops.size() == 3 &&
  5056. "Invalid number of fields in special register string.");
  5057. Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
  5058. SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
  5059. Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
  5060. }
  5061. Ops.push_back(getAL(CurDAG, DL));
  5062. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  5063. Ops.push_back(N->getOperand(0));
  5064. ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
  5065. return true;
  5066. }
  5067. std::string SpecialReg = RegString->getString().lower();
  5068. int BankedReg = getBankedRegisterMask(SpecialReg);
  5069. if (BankedReg != -1) {
  5070. Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
  5071. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5072. N->getOperand(0) };
  5073. ReplaceNode(
  5074. N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
  5075. DL, MVT::Other, Ops));
  5076. return true;
  5077. }
  5078. // The VFP registers are written to by creating SelectionDAG nodes with
  5079. // opcodes corresponding to the register that is being written. So we switch
  5080. // on the string to find which opcode we need to use.
  5081. unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
  5082. .Case("fpscr", ARM::VMSR)
  5083. .Case("fpexc", ARM::VMSR_FPEXC)
  5084. .Case("fpsid", ARM::VMSR_FPSID)
  5085. .Case("fpinst", ARM::VMSR_FPINST)
  5086. .Case("fpinst2", ARM::VMSR_FPINST2)
  5087. .Default(0);
  5088. if (Opcode) {
  5089. if (!Subtarget->hasVFP2Base())
  5090. return false;
  5091. Ops = { N->getOperand(2), getAL(CurDAG, DL),
  5092. CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
  5093. ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
  5094. return true;
  5095. }
  5096. std::pair<StringRef, StringRef> Fields;
  5097. Fields = StringRef(SpecialReg).rsplit('_');
  5098. std::string Reg = Fields.first.str();
  5099. StringRef Flags = Fields.second;
  5100. // If the target was M Class then need to validate the special register value
  5101. // and retrieve the mask for use in the instruction node.
  5102. if (Subtarget->isMClass()) {
  5103. int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
  5104. if (SYSmValue == -1)
  5105. return false;
  5106. SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
  5107. N->getOperand(2), getAL(CurDAG, DL),
  5108. CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
  5109. ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
  5110. return true;
  5111. }
  5112. // We then check to see if a valid mask can be constructed for one of the
  5113. // register string values permitted for the A and R class cores. These values
  5114. // are apsr, spsr and cpsr; these are also valid on older cores.
  5115. int Mask = getARClassRegisterMask(Reg, Flags);
  5116. if (Mask != -1) {
  5117. Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
  5118. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5119. N->getOperand(0) };
  5120. ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
  5121. DL, MVT::Other, Ops));
  5122. return true;
  5123. }
  5124. return false;
  5125. }
  5126. bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
  5127. std::vector<SDValue> AsmNodeOperands;
  5128. unsigned Flag, Kind;
  5129. bool Changed = false;
  5130. unsigned NumOps = N->getNumOperands();
  5131. // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
  5132. // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
  5133. // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
  5134. // respectively. Since there is no constraint to explicitly specify a
  5135. // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
  5136. // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
  5137. // them into a GPRPair.
  5138. SDLoc dl(N);
  5139. SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
  5140. SmallVector<bool, 8> OpChanged;
  5141. // Glue node will be appended late.
  5142. for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
  5143. SDValue op = N->getOperand(i);
  5144. AsmNodeOperands.push_back(op);
  5145. if (i < InlineAsm::Op_FirstOperand)
  5146. continue;
  5147. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
  5148. Flag = C->getZExtValue();
  5149. Kind = InlineAsm::getKind(Flag);
  5150. }
  5151. else
  5152. continue;
  5153. // Immediate operands to inline asm in the SelectionDAG are modeled with
  5154. // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
  5155. // the second is a constant with the value of the immediate. If we get here
  5156. // and we have a Kind_Imm, skip the next operand, and continue.
  5157. if (Kind == InlineAsm::Kind_Imm) {
  5158. SDValue op = N->getOperand(++i);
  5159. AsmNodeOperands.push_back(op);
  5160. continue;
  5161. }
  5162. unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
  5163. if (NumRegs)
  5164. OpChanged.push_back(false);
  5165. unsigned DefIdx = 0;
  5166. bool IsTiedToChangedOp = false;
  5167. // If it's a use that is tied with a previous def, it has no
  5168. // reg class constraint.
  5169. if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
  5170. IsTiedToChangedOp = OpChanged[DefIdx];
  5171. // Memory operands to inline asm in the SelectionDAG are modeled with two
  5172. // operands: a constant of value InlineAsm::Kind_Mem followed by the input
  5173. // operand. If we get here and we have a Kind_Mem, skip the next operand (so
  5174. // it doesn't get misinterpreted), and continue. We do this here because
  5175. // it's important to update the OpChanged array correctly before moving on.
  5176. if (Kind == InlineAsm::Kind_Mem) {
  5177. SDValue op = N->getOperand(++i);
  5178. AsmNodeOperands.push_back(op);
  5179. continue;
  5180. }
  5181. if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
  5182. && Kind != InlineAsm::Kind_RegDefEarlyClobber)
  5183. continue;
  5184. unsigned RC;
  5185. bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
  5186. if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
  5187. || NumRegs != 2)
  5188. continue;
  5189. assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
  5190. SDValue V0 = N->getOperand(i+1);
  5191. SDValue V1 = N->getOperand(i+2);
  5192. Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
  5193. Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
  5194. SDValue PairedReg;
  5195. MachineRegisterInfo &MRI = MF->getRegInfo();
  5196. if (Kind == InlineAsm::Kind_RegDef ||
  5197. Kind == InlineAsm::Kind_RegDefEarlyClobber) {
  5198. // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
  5199. // the original GPRs.
  5200. Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
  5201. PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
  5202. SDValue Chain = SDValue(N,0);
  5203. SDNode *GU = N->getGluedUser();
  5204. SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
  5205. Chain.getValue(1));
  5206. // Extract values from a GPRPair reg and copy to the original GPR reg.
  5207. SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
  5208. RegCopy);
  5209. SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
  5210. RegCopy);
  5211. SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
  5212. RegCopy.getValue(1));
  5213. SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
  5214. // Update the original glue user.
  5215. std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
  5216. Ops.push_back(T1.getValue(1));
  5217. CurDAG->UpdateNodeOperands(GU, Ops);
  5218. }
  5219. else {
  5220. // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
  5221. // GPRPair and then pass the GPRPair to the inline asm.
  5222. SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
  5223. // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
  5224. SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
  5225. Chain.getValue(1));
  5226. SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
  5227. T0.getValue(1));
  5228. SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
  5229. // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
  5230. // i32 VRs of inline asm with it.
  5231. Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
  5232. PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
  5233. Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
  5234. AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
  5235. Glue = Chain.getValue(1);
  5236. }
  5237. Changed = true;
  5238. if(PairedReg.getNode()) {
  5239. OpChanged[OpChanged.size() -1 ] = true;
  5240. Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
  5241. if (IsTiedToChangedOp)
  5242. Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
  5243. else
  5244. Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
  5245. // Replace the current flag.
  5246. AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
  5247. Flag, dl, MVT::i32);
  5248. // Add the new register node and skip the original two GPRs.
  5249. AsmNodeOperands.push_back(PairedReg);
  5250. // Skip the next two GPRs.
  5251. i += 2;
  5252. }
  5253. }
  5254. if (Glue.getNode())
  5255. AsmNodeOperands.push_back(Glue);
  5256. if (!Changed)
  5257. return false;
  5258. SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
  5259. CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
  5260. New->setNodeId(-1);
  5261. ReplaceNode(N, New.getNode());
  5262. return true;
  5263. }
  5264. bool ARMDAGToDAGISel::
  5265. SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
  5266. std::vector<SDValue> &OutOps) {
  5267. switch(ConstraintID) {
  5268. default:
  5269. llvm_unreachable("Unexpected asm memory constraint");
  5270. case InlineAsm::Constraint_m:
  5271. case InlineAsm::Constraint_o:
  5272. case InlineAsm::Constraint_Q:
  5273. case InlineAsm::Constraint_Um:
  5274. case InlineAsm::Constraint_Un:
  5275. case InlineAsm::Constraint_Uq:
  5276. case InlineAsm::Constraint_Us:
  5277. case InlineAsm::Constraint_Ut:
  5278. case InlineAsm::Constraint_Uv:
  5279. case InlineAsm::Constraint_Uy:
  5280. // Require the address to be in a register. That is safe for all ARM
  5281. // variants and it is hard to do anything much smarter without knowing
  5282. // how the operand is used.
  5283. OutOps.push_back(Op);
  5284. return false;
  5285. }
  5286. return true;
  5287. }
  5288. /// createARMISelDag - This pass converts a legalized DAG into a
  5289. /// ARM-specific DAG, ready for instruction scheduling.
  5290. ///
  5291. FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
  5292. CodeGenOpt::Level OptLevel) {
  5293. return new ARMDAGToDAGISel(TM, OptLevel);
  5294. }