ARMISelDAGToDAG.cpp 225 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908
  1. //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines an instruction selector for the ARM target.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "ARM.h"
  13. #include "ARMBaseInstrInfo.h"
  14. #include "ARMTargetMachine.h"
  15. #include "MCTargetDesc/ARMAddressingModes.h"
  16. #include "Utils/ARMBaseInfo.h"
  17. #include "llvm/ADT/APSInt.h"
  18. #include "llvm/ADT/StringSwitch.h"
  19. #include "llvm/CodeGen/MachineFrameInfo.h"
  20. #include "llvm/CodeGen/MachineFunction.h"
  21. #include "llvm/CodeGen/MachineInstrBuilder.h"
  22. #include "llvm/CodeGen/MachineRegisterInfo.h"
  23. #include "llvm/CodeGen/SelectionDAG.h"
  24. #include "llvm/CodeGen/SelectionDAGISel.h"
  25. #include "llvm/CodeGen/TargetLowering.h"
  26. #include "llvm/IR/CallingConv.h"
  27. #include "llvm/IR/Constants.h"
  28. #include "llvm/IR/DerivedTypes.h"
  29. #include "llvm/IR/Function.h"
  30. #include "llvm/IR/Intrinsics.h"
  31. #include "llvm/IR/IntrinsicsARM.h"
  32. #include "llvm/IR/LLVMContext.h"
  33. #include "llvm/Support/CommandLine.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/ErrorHandling.h"
  36. #include "llvm/Target/TargetOptions.h"
  37. #include <optional>
  38. using namespace llvm;
  39. #define DEBUG_TYPE "arm-isel"
  40. #define PASS_NAME "ARM Instruction Selection"
  41. static cl::opt<bool>
  42. DisableShifterOp("disable-shifter-op", cl::Hidden,
  43. cl::desc("Disable isel of shifter-op"),
  44. cl::init(false));
  45. //===--------------------------------------------------------------------===//
  46. /// ARMDAGToDAGISel - ARM specific code to select ARM machine
  47. /// instructions for SelectionDAG operations.
  48. ///
  49. namespace {
  50. class ARMDAGToDAGISel : public SelectionDAGISel {
  51. /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
  52. /// make the right decision when generating code for different targets.
  53. const ARMSubtarget *Subtarget;
  54. public:
  55. static char ID;
  56. ARMDAGToDAGISel() = delete;
  57. explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
  58. : SelectionDAGISel(ID, tm, OptLevel) {}
  59. bool runOnMachineFunction(MachineFunction &MF) override {
  60. // Reset the subtarget each time through.
  61. Subtarget = &MF.getSubtarget<ARMSubtarget>();
  62. SelectionDAGISel::runOnMachineFunction(MF);
  63. return true;
  64. }
  65. void PreprocessISelDAG() override;
  66. /// getI32Imm - Return a target constant of type i32 with the specified
  67. /// value.
  68. inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
  69. return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
  70. }
  71. void Select(SDNode *N) override;
  72. /// Return true as some complex patterns, like those that call
  73. /// canExtractShiftFromMul can modify the DAG inplace.
  74. bool ComplexPatternFuncMutatesDAG() const override { return true; }
  75. bool hasNoVMLxHazardUse(SDNode *N) const;
  76. bool isShifterOpProfitable(const SDValue &Shift,
  77. ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
  78. bool SelectRegShifterOperand(SDValue N, SDValue &A,
  79. SDValue &B, SDValue &C,
  80. bool CheckProfitability = true);
  81. bool SelectImmShifterOperand(SDValue N, SDValue &A,
  82. SDValue &B, bool CheckProfitability = true);
  83. bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
  84. SDValue &C) {
  85. // Don't apply the profitability check
  86. return SelectRegShifterOperand(N, A, B, C, false);
  87. }
  88. bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
  89. // Don't apply the profitability check
  90. return SelectImmShifterOperand(N, A, B, false);
  91. }
  92. bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
  93. if (!N.hasOneUse())
  94. return false;
  95. return SelectImmShifterOperand(N, A, B, false);
  96. }
  97. bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
  98. bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
  99. bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
  100. bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
  101. const ConstantSDNode *CN = cast<ConstantSDNode>(N);
  102. Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
  103. Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
  104. return true;
  105. }
  106. bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
  107. SDValue &Offset, SDValue &Opc);
  108. bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
  109. SDValue &Offset, SDValue &Opc);
  110. bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
  111. SDValue &Offset, SDValue &Opc);
  112. bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
  113. bool SelectAddrMode3(SDValue N, SDValue &Base,
  114. SDValue &Offset, SDValue &Opc);
  115. bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
  116. SDValue &Offset, SDValue &Opc);
  117. bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
  118. bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
  119. bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
  120. bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
  121. bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
  122. bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
  123. // Thumb Addressing Modes:
  124. bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
  125. bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
  126. bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
  127. SDValue &OffImm);
  128. bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
  129. SDValue &OffImm);
  130. bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
  131. SDValue &OffImm);
  132. bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
  133. SDValue &OffImm);
  134. bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
  135. template <unsigned Shift>
  136. bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
  137. // Thumb 2 Addressing Modes:
  138. bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
  139. template <unsigned Shift>
  140. bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
  141. bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
  142. SDValue &OffImm);
  143. bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
  144. SDValue &OffImm);
  145. template <unsigned Shift>
  146. bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
  147. bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
  148. unsigned Shift);
  149. template <unsigned Shift>
  150. bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
  151. bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
  152. SDValue &OffReg, SDValue &ShImm);
  153. bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
  154. template<int Min, int Max>
  155. bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
  156. inline bool is_so_imm(unsigned Imm) const {
  157. return ARM_AM::getSOImmVal(Imm) != -1;
  158. }
  159. inline bool is_so_imm_not(unsigned Imm) const {
  160. return ARM_AM::getSOImmVal(~Imm) != -1;
  161. }
  162. inline bool is_t2_so_imm(unsigned Imm) const {
  163. return ARM_AM::getT2SOImmVal(Imm) != -1;
  164. }
  165. inline bool is_t2_so_imm_not(unsigned Imm) const {
  166. return ARM_AM::getT2SOImmVal(~Imm) != -1;
  167. }
  168. // Include the pieces autogenerated from the target description.
  169. #include "ARMGenDAGISel.inc"
  170. private:
  171. void transferMemOperands(SDNode *Src, SDNode *Dst);
  172. /// Indexed (pre/post inc/dec) load matching code for ARM.
  173. bool tryARMIndexedLoad(SDNode *N);
  174. bool tryT1IndexedLoad(SDNode *N);
  175. bool tryT2IndexedLoad(SDNode *N);
  176. bool tryMVEIndexedLoad(SDNode *N);
  177. bool tryFMULFixed(SDNode *N, SDLoc dl);
  178. bool tryFP_TO_INT(SDNode *N, SDLoc dl);
  179. bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
  180. bool IsUnsigned,
  181. bool FixedToFloat);
  182. /// SelectVLD - Select NEON load intrinsics. NumVecs should be
  183. /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
  184. /// loads of D registers and even subregs and odd subregs of Q registers.
  185. /// For NumVecs <= 2, QOpcodes1 is not used.
  186. void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
  187. const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
  188. const uint16_t *QOpcodes1);
  189. /// SelectVST - Select NEON store intrinsics. NumVecs should
  190. /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
  191. /// stores of D registers and even subregs and odd subregs of Q registers.
  192. /// For NumVecs <= 2, QOpcodes1 is not used.
  193. void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
  194. const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
  195. const uint16_t *QOpcodes1);
  196. /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
  197. /// be 2, 3 or 4. The opcode arrays specify the instructions used for
  198. /// load/store of D registers and Q registers.
  199. void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
  200. unsigned NumVecs, const uint16_t *DOpcodes,
  201. const uint16_t *QOpcodes);
  202. /// Helper functions for setting up clusters of MVE predication operands.
  203. template <typename SDValueVector>
  204. void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  205. SDValue PredicateMask);
  206. template <typename SDValueVector>
  207. void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  208. SDValue PredicateMask, SDValue Inactive);
  209. template <typename SDValueVector>
  210. void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
  211. template <typename SDValueVector>
  212. void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
  213. /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
  214. void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
  215. /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
  216. void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
  217. bool HasSaturationOperand);
  218. /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
  219. void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
  220. uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
  221. /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
  222. /// vector lanes.
  223. void SelectMVE_VSHLC(SDNode *N, bool Predicated);
  224. /// Select long MVE vector reductions with two vector operands
  225. /// Stride is the number of vector element widths the instruction can operate
  226. /// on:
  227. /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
  228. /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
  229. /// Stride is used when addressing the OpcodesS array which contains multiple
  230. /// opcodes for each element width.
  231. /// TySize is the index into the list of element types listed above
  232. void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
  233. const uint16_t *OpcodesS, const uint16_t *OpcodesU,
  234. size_t Stride, size_t TySize);
  235. /// Select a 64-bit MVE vector reduction with two vector operands
  236. /// arm_mve_vmlldava_[predicated]
  237. void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
  238. const uint16_t *OpcodesU);
  239. /// Select a 72-bit MVE vector rounding reduction with two vector operands
  240. /// int_arm_mve_vrmlldavha[_predicated]
  241. void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
  242. const uint16_t *OpcodesU);
  243. /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
  244. /// should be 2 or 4. The opcode array specifies the instructions
  245. /// used for 8, 16 and 32-bit lane sizes respectively, and each
  246. /// pointer points to a set of NumVecs sub-opcodes used for the
  247. /// different stages (e.g. VLD20 versus VLD21) of each load family.
  248. void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
  249. const uint16_t *const *Opcodes, bool HasWriteback);
  250. /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
  251. /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
  252. void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
  253. bool Wrapping, bool Predicated);
  254. /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
  255. /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
  256. /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
  257. /// the accumulator and the immediate operand, i.e. 0
  258. /// for CX1*, 1 for CX2*, 2 for CX3*
  259. /// \arg \c HasAccum whether the instruction has an accumulator operand
  260. void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
  261. bool HasAccum);
  262. /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
  263. /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
  264. /// for loading D registers.
  265. void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
  266. unsigned NumVecs, const uint16_t *DOpcodes,
  267. const uint16_t *QOpcodes0 = nullptr,
  268. const uint16_t *QOpcodes1 = nullptr);
  269. /// Try to select SBFX/UBFX instructions for ARM.
  270. bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
  271. bool tryInsertVectorElt(SDNode *N);
  272. // Select special operations if node forms integer ABS pattern
  273. bool tryABSOp(SDNode *N);
  274. bool tryReadRegister(SDNode *N);
  275. bool tryWriteRegister(SDNode *N);
  276. bool tryInlineAsm(SDNode *N);
  277. void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
  278. void SelectCMP_SWAP(SDNode *N);
  279. /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
  280. /// inline asm expressions.
  281. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
  282. std::vector<SDValue> &OutOps) override;
  283. // Form pairs of consecutive R, S, D, or Q registers.
  284. SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
  285. SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
  286. SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
  287. SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
  288. // Form sequences of 4 consecutive S, D, or Q registers.
  289. SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
  290. SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
  291. SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
  292. // Get the alignment operand for a NEON VLD or VST instruction.
  293. SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
  294. bool is64BitVector);
  295. /// Checks if N is a multiplication by a constant where we can extract out a
  296. /// power of two from the constant so that it can be used in a shift, but only
  297. /// if it simplifies the materialization of the constant. Returns true if it
  298. /// is, and assigns to PowerOfTwo the power of two that should be extracted
  299. /// out and to NewMulConst the new constant to be multiplied by.
  300. bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
  301. unsigned &PowerOfTwo, SDValue &NewMulConst) const;
  302. /// Replace N with M in CurDAG, in a way that also ensures that M gets
  303. /// selected when N would have been selected.
  304. void replaceDAGValue(const SDValue &N, SDValue M);
  305. };
  306. }
  307. char ARMDAGToDAGISel::ID = 0;
  308. INITIALIZE_PASS(ARMDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
  309. /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
  310. /// operand. If so Imm will receive the 32-bit value.
  311. static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
  312. if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
  313. Imm = cast<ConstantSDNode>(N)->getZExtValue();
  314. return true;
  315. }
  316. return false;
  317. }
  318. // isInt32Immediate - This method tests to see if a constant operand.
  319. // If so Imm will receive the 32 bit value.
  320. static bool isInt32Immediate(SDValue N, unsigned &Imm) {
  321. return isInt32Immediate(N.getNode(), Imm);
  322. }
  323. // isOpcWithIntImmediate - This method tests to see if the node is a specific
  324. // opcode and that it has a immediate integer right operand.
  325. // If so Imm will receive the 32 bit value.
  326. static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
  327. return N->getOpcode() == Opc &&
  328. isInt32Immediate(N->getOperand(1).getNode(), Imm);
  329. }
  330. /// Check whether a particular node is a constant value representable as
  331. /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
  332. ///
  333. /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
  334. static bool isScaledConstantInRange(SDValue Node, int Scale,
  335. int RangeMin, int RangeMax,
  336. int &ScaledConstant) {
  337. assert(Scale > 0 && "Invalid scale!");
  338. // Check that this is a constant.
  339. const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
  340. if (!C)
  341. return false;
  342. ScaledConstant = (int) C->getZExtValue();
  343. if ((ScaledConstant % Scale) != 0)
  344. return false;
  345. ScaledConstant /= Scale;
  346. return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
  347. }
  348. void ARMDAGToDAGISel::PreprocessISelDAG() {
  349. if (!Subtarget->hasV6T2Ops())
  350. return;
  351. bool isThumb2 = Subtarget->isThumb();
  352. // We use make_early_inc_range to avoid invalidation issues.
  353. for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
  354. if (N.getOpcode() != ISD::ADD)
  355. continue;
  356. // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
  357. // leading zeros, followed by consecutive set bits, followed by 1 or 2
  358. // trailing zeros, e.g. 1020.
  359. // Transform the expression to
  360. // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
  361. // of trailing zeros of c2. The left shift would be folded as an shifter
  362. // operand of 'add' and the 'and' and 'srl' would become a bits extraction
  363. // node (UBFX).
  364. SDValue N0 = N.getOperand(0);
  365. SDValue N1 = N.getOperand(1);
  366. unsigned And_imm = 0;
  367. if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
  368. if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
  369. std::swap(N0, N1);
  370. }
  371. if (!And_imm)
  372. continue;
  373. // Check if the AND mask is an immediate of the form: 000.....1111111100
  374. unsigned TZ = countTrailingZeros(And_imm);
  375. if (TZ != 1 && TZ != 2)
  376. // Be conservative here. Shifter operands aren't always free. e.g. On
  377. // Swift, left shifter operand of 1 / 2 for free but others are not.
  378. // e.g.
  379. // ubfx r3, r1, #16, #8
  380. // ldr.w r3, [r0, r3, lsl #2]
  381. // vs.
  382. // mov.w r9, #1020
  383. // and.w r2, r9, r1, lsr #14
  384. // ldr r2, [r0, r2]
  385. continue;
  386. And_imm >>= TZ;
  387. if (And_imm & (And_imm + 1))
  388. continue;
  389. // Look for (and (srl X, c1), c2).
  390. SDValue Srl = N1.getOperand(0);
  391. unsigned Srl_imm = 0;
  392. if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
  393. (Srl_imm <= 2))
  394. continue;
  395. // Make sure first operand is not a shifter operand which would prevent
  396. // folding of the left shift.
  397. SDValue CPTmp0;
  398. SDValue CPTmp1;
  399. SDValue CPTmp2;
  400. if (isThumb2) {
  401. if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
  402. continue;
  403. } else {
  404. if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
  405. SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
  406. continue;
  407. }
  408. // Now make the transformation.
  409. Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
  410. Srl.getOperand(0),
  411. CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
  412. MVT::i32));
  413. N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
  414. Srl,
  415. CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
  416. N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
  417. N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
  418. CurDAG->UpdateNodeOperands(&N, N0, N1);
  419. }
  420. }
  421. /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
  422. /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
  423. /// least on current ARM implementations) which should be avoidded.
  424. bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
  425. if (OptLevel == CodeGenOpt::None)
  426. return true;
  427. if (!Subtarget->hasVMLxHazards())
  428. return true;
  429. if (!N->hasOneUse())
  430. return false;
  431. SDNode *Use = *N->use_begin();
  432. if (Use->getOpcode() == ISD::CopyToReg)
  433. return true;
  434. if (Use->isMachineOpcode()) {
  435. const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
  436. CurDAG->getSubtarget().getInstrInfo());
  437. const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
  438. if (MCID.mayStore())
  439. return true;
  440. unsigned Opcode = MCID.getOpcode();
  441. if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
  442. return true;
  443. // vmlx feeding into another vmlx. We actually want to unfold
  444. // the use later in the MLxExpansion pass. e.g.
  445. // vmla
  446. // vmla (stall 8 cycles)
  447. //
  448. // vmul (5 cycles)
  449. // vadd (5 cycles)
  450. // vmla
  451. // This adds up to about 18 - 19 cycles.
  452. //
  453. // vmla
  454. // vmul (stall 4 cycles)
  455. // vadd adds up to about 14 cycles.
  456. return TII->isFpMLxInstruction(Opcode);
  457. }
  458. return false;
  459. }
  460. bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
  461. ARM_AM::ShiftOpc ShOpcVal,
  462. unsigned ShAmt) {
  463. if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
  464. return true;
  465. if (Shift.hasOneUse())
  466. return true;
  467. // R << 2 is free.
  468. return ShOpcVal == ARM_AM::lsl &&
  469. (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
  470. }
  471. bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
  472. unsigned MaxShift,
  473. unsigned &PowerOfTwo,
  474. SDValue &NewMulConst) const {
  475. assert(N.getOpcode() == ISD::MUL);
  476. assert(MaxShift > 0);
  477. // If the multiply is used in more than one place then changing the constant
  478. // will make other uses incorrect, so don't.
  479. if (!N.hasOneUse()) return false;
  480. // Check if the multiply is by a constant
  481. ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
  482. if (!MulConst) return false;
  483. // If the constant is used in more than one place then modifying it will mean
  484. // we need to materialize two constants instead of one, which is a bad idea.
  485. if (!MulConst->hasOneUse()) return false;
  486. unsigned MulConstVal = MulConst->getZExtValue();
  487. if (MulConstVal == 0) return false;
  488. // Find the largest power of 2 that MulConstVal is a multiple of
  489. PowerOfTwo = MaxShift;
  490. while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
  491. --PowerOfTwo;
  492. if (PowerOfTwo == 0) return false;
  493. }
  494. // Only optimise if the new cost is better
  495. unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
  496. NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
  497. unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
  498. unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
  499. return NewCost < OldCost;
  500. }
  501. void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
  502. CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
  503. ReplaceUses(N, M);
  504. }
  505. bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
  506. SDValue &BaseReg,
  507. SDValue &Opc,
  508. bool CheckProfitability) {
  509. if (DisableShifterOp)
  510. return false;
  511. // If N is a multiply-by-constant and it's profitable to extract a shift and
  512. // use it in a shifted operand do so.
  513. if (N.getOpcode() == ISD::MUL) {
  514. unsigned PowerOfTwo = 0;
  515. SDValue NewMulConst;
  516. if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
  517. HandleSDNode Handle(N);
  518. SDLoc Loc(N);
  519. replaceDAGValue(N.getOperand(1), NewMulConst);
  520. BaseReg = Handle.getValue();
  521. Opc = CurDAG->getTargetConstant(
  522. ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
  523. return true;
  524. }
  525. }
  526. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
  527. // Don't match base register only case. That is matched to a separate
  528. // lower complexity pattern with explicit register operand.
  529. if (ShOpcVal == ARM_AM::no_shift) return false;
  530. BaseReg = N.getOperand(0);
  531. unsigned ShImmVal = 0;
  532. ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
  533. if (!RHS) return false;
  534. ShImmVal = RHS->getZExtValue() & 31;
  535. Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
  536. SDLoc(N), MVT::i32);
  537. return true;
  538. }
  539. bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
  540. SDValue &BaseReg,
  541. SDValue &ShReg,
  542. SDValue &Opc,
  543. bool CheckProfitability) {
  544. if (DisableShifterOp)
  545. return false;
  546. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
  547. // Don't match base register only case. That is matched to a separate
  548. // lower complexity pattern with explicit register operand.
  549. if (ShOpcVal == ARM_AM::no_shift) return false;
  550. BaseReg = N.getOperand(0);
  551. unsigned ShImmVal = 0;
  552. ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
  553. if (RHS) return false;
  554. ShReg = N.getOperand(1);
  555. if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
  556. return false;
  557. Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
  558. SDLoc(N), MVT::i32);
  559. return true;
  560. }
  561. // Determine whether an ISD::OR's operands are suitable to turn the operation
  562. // into an addition, which often has more compact encodings.
  563. bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
  564. assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
  565. Out = N;
  566. return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
  567. }
  568. bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
  569. SDValue &Base,
  570. SDValue &OffImm) {
  571. // Match simple R + imm12 operands.
  572. // Base only.
  573. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  574. !CurDAG->isBaseWithConstantOffset(N)) {
  575. if (N.getOpcode() == ISD::FrameIndex) {
  576. // Match frame index.
  577. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  578. Base = CurDAG->getTargetFrameIndex(
  579. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  580. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  581. return true;
  582. }
  583. if (N.getOpcode() == ARMISD::Wrapper &&
  584. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  585. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  586. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  587. Base = N.getOperand(0);
  588. } else
  589. Base = N;
  590. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  591. return true;
  592. }
  593. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  594. int RHSC = (int)RHS->getSExtValue();
  595. if (N.getOpcode() == ISD::SUB)
  596. RHSC = -RHSC;
  597. if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
  598. Base = N.getOperand(0);
  599. if (Base.getOpcode() == ISD::FrameIndex) {
  600. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  601. Base = CurDAG->getTargetFrameIndex(
  602. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  603. }
  604. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  605. return true;
  606. }
  607. }
  608. // Base only.
  609. Base = N;
  610. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  611. return true;
  612. }
  613. bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
  614. SDValue &Opc) {
  615. if (N.getOpcode() == ISD::MUL &&
  616. ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
  617. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  618. // X * [3,5,9] -> X + X * [2,4,8] etc.
  619. int RHSC = (int)RHS->getZExtValue();
  620. if (RHSC & 1) {
  621. RHSC = RHSC & ~1;
  622. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  623. if (RHSC < 0) {
  624. AddSub = ARM_AM::sub;
  625. RHSC = - RHSC;
  626. }
  627. if (isPowerOf2_32(RHSC)) {
  628. unsigned ShAmt = Log2_32(RHSC);
  629. Base = Offset = N.getOperand(0);
  630. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
  631. ARM_AM::lsl),
  632. SDLoc(N), MVT::i32);
  633. return true;
  634. }
  635. }
  636. }
  637. }
  638. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  639. // ISD::OR that is equivalent to an ISD::ADD.
  640. !CurDAG->isBaseWithConstantOffset(N))
  641. return false;
  642. // Leave simple R +/- imm12 operands for LDRi12
  643. if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
  644. int RHSC;
  645. if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
  646. -0x1000+1, 0x1000, RHSC)) // 12 bits.
  647. return false;
  648. }
  649. // Otherwise this is R +/- [possibly shifted] R.
  650. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
  651. ARM_AM::ShiftOpc ShOpcVal =
  652. ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
  653. unsigned ShAmt = 0;
  654. Base = N.getOperand(0);
  655. Offset = N.getOperand(1);
  656. if (ShOpcVal != ARM_AM::no_shift) {
  657. // Check to see if the RHS of the shift is a constant, if not, we can't fold
  658. // it.
  659. if (ConstantSDNode *Sh =
  660. dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
  661. ShAmt = Sh->getZExtValue();
  662. if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
  663. Offset = N.getOperand(1).getOperand(0);
  664. else {
  665. ShAmt = 0;
  666. ShOpcVal = ARM_AM::no_shift;
  667. }
  668. } else {
  669. ShOpcVal = ARM_AM::no_shift;
  670. }
  671. }
  672. // Try matching (R shl C) + (R).
  673. if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
  674. !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
  675. N.getOperand(0).hasOneUse())) {
  676. ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
  677. if (ShOpcVal != ARM_AM::no_shift) {
  678. // Check to see if the RHS of the shift is a constant, if not, we can't
  679. // fold it.
  680. if (ConstantSDNode *Sh =
  681. dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
  682. ShAmt = Sh->getZExtValue();
  683. if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
  684. Offset = N.getOperand(0).getOperand(0);
  685. Base = N.getOperand(1);
  686. } else {
  687. ShAmt = 0;
  688. ShOpcVal = ARM_AM::no_shift;
  689. }
  690. } else {
  691. ShOpcVal = ARM_AM::no_shift;
  692. }
  693. }
  694. }
  695. // If Offset is a multiply-by-constant and it's profitable to extract a shift
  696. // and use it in a shifted operand do so.
  697. if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
  698. unsigned PowerOfTwo = 0;
  699. SDValue NewMulConst;
  700. if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
  701. HandleSDNode Handle(Offset);
  702. replaceDAGValue(Offset.getOperand(1), NewMulConst);
  703. Offset = Handle.getValue();
  704. ShAmt = PowerOfTwo;
  705. ShOpcVal = ARM_AM::lsl;
  706. }
  707. }
  708. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
  709. SDLoc(N), MVT::i32);
  710. return true;
  711. }
  712. bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
  713. SDValue &Offset, SDValue &Opc) {
  714. unsigned Opcode = Op->getOpcode();
  715. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  716. ? cast<LoadSDNode>(Op)->getAddressingMode()
  717. : cast<StoreSDNode>(Op)->getAddressingMode();
  718. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  719. ? ARM_AM::add : ARM_AM::sub;
  720. int Val;
  721. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
  722. return false;
  723. Offset = N;
  724. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
  725. unsigned ShAmt = 0;
  726. if (ShOpcVal != ARM_AM::no_shift) {
  727. // Check to see if the RHS of the shift is a constant, if not, we can't fold
  728. // it.
  729. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  730. ShAmt = Sh->getZExtValue();
  731. if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
  732. Offset = N.getOperand(0);
  733. else {
  734. ShAmt = 0;
  735. ShOpcVal = ARM_AM::no_shift;
  736. }
  737. } else {
  738. ShOpcVal = ARM_AM::no_shift;
  739. }
  740. }
  741. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
  742. SDLoc(N), MVT::i32);
  743. return true;
  744. }
  745. bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
  746. SDValue &Offset, SDValue &Opc) {
  747. unsigned Opcode = Op->getOpcode();
  748. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  749. ? cast<LoadSDNode>(Op)->getAddressingMode()
  750. : cast<StoreSDNode>(Op)->getAddressingMode();
  751. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  752. ? ARM_AM::add : ARM_AM::sub;
  753. int Val;
  754. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
  755. if (AddSub == ARM_AM::sub) Val *= -1;
  756. Offset = CurDAG->getRegister(0, MVT::i32);
  757. Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
  758. return true;
  759. }
  760. return false;
  761. }
  762. bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
  763. SDValue &Offset, SDValue &Opc) {
  764. unsigned Opcode = Op->getOpcode();
  765. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  766. ? cast<LoadSDNode>(Op)->getAddressingMode()
  767. : cast<StoreSDNode>(Op)->getAddressingMode();
  768. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  769. ? ARM_AM::add : ARM_AM::sub;
  770. int Val;
  771. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
  772. Offset = CurDAG->getRegister(0, MVT::i32);
  773. Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
  774. ARM_AM::no_shift),
  775. SDLoc(Op), MVT::i32);
  776. return true;
  777. }
  778. return false;
  779. }
  780. bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
  781. Base = N;
  782. return true;
  783. }
  784. bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
  785. SDValue &Base, SDValue &Offset,
  786. SDValue &Opc) {
  787. if (N.getOpcode() == ISD::SUB) {
  788. // X - C is canonicalize to X + -C, no need to handle it here.
  789. Base = N.getOperand(0);
  790. Offset = N.getOperand(1);
  791. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
  792. MVT::i32);
  793. return true;
  794. }
  795. if (!CurDAG->isBaseWithConstantOffset(N)) {
  796. Base = N;
  797. if (N.getOpcode() == ISD::FrameIndex) {
  798. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  799. Base = CurDAG->getTargetFrameIndex(
  800. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  801. }
  802. Offset = CurDAG->getRegister(0, MVT::i32);
  803. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
  804. MVT::i32);
  805. return true;
  806. }
  807. // If the RHS is +/- imm8, fold into addr mode.
  808. int RHSC;
  809. if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
  810. -256 + 1, 256, RHSC)) { // 8 bits.
  811. Base = N.getOperand(0);
  812. if (Base.getOpcode() == ISD::FrameIndex) {
  813. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  814. Base = CurDAG->getTargetFrameIndex(
  815. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  816. }
  817. Offset = CurDAG->getRegister(0, MVT::i32);
  818. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  819. if (RHSC < 0) {
  820. AddSub = ARM_AM::sub;
  821. RHSC = -RHSC;
  822. }
  823. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
  824. MVT::i32);
  825. return true;
  826. }
  827. Base = N.getOperand(0);
  828. Offset = N.getOperand(1);
  829. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
  830. MVT::i32);
  831. return true;
  832. }
  833. bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
  834. SDValue &Offset, SDValue &Opc) {
  835. unsigned Opcode = Op->getOpcode();
  836. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  837. ? cast<LoadSDNode>(Op)->getAddressingMode()
  838. : cast<StoreSDNode>(Op)->getAddressingMode();
  839. ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
  840. ? ARM_AM::add : ARM_AM::sub;
  841. int Val;
  842. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
  843. Offset = CurDAG->getRegister(0, MVT::i32);
  844. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
  845. MVT::i32);
  846. return true;
  847. }
  848. Offset = N;
  849. Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
  850. MVT::i32);
  851. return true;
  852. }
  853. bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
  854. bool FP16) {
  855. if (!CurDAG->isBaseWithConstantOffset(N)) {
  856. Base = N;
  857. if (N.getOpcode() == ISD::FrameIndex) {
  858. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  859. Base = CurDAG->getTargetFrameIndex(
  860. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  861. } else if (N.getOpcode() == ARMISD::Wrapper &&
  862. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  863. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  864. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  865. Base = N.getOperand(0);
  866. }
  867. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
  868. SDLoc(N), MVT::i32);
  869. return true;
  870. }
  871. // If the RHS is +/- imm8, fold into addr mode.
  872. int RHSC;
  873. const int Scale = FP16 ? 2 : 4;
  874. if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
  875. Base = N.getOperand(0);
  876. if (Base.getOpcode() == ISD::FrameIndex) {
  877. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  878. Base = CurDAG->getTargetFrameIndex(
  879. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  880. }
  881. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  882. if (RHSC < 0) {
  883. AddSub = ARM_AM::sub;
  884. RHSC = -RHSC;
  885. }
  886. if (FP16)
  887. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
  888. SDLoc(N), MVT::i32);
  889. else
  890. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
  891. SDLoc(N), MVT::i32);
  892. return true;
  893. }
  894. Base = N;
  895. if (FP16)
  896. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
  897. SDLoc(N), MVT::i32);
  898. else
  899. Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
  900. SDLoc(N), MVT::i32);
  901. return true;
  902. }
  903. bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
  904. SDValue &Base, SDValue &Offset) {
  905. return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
  906. }
  907. bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
  908. SDValue &Base, SDValue &Offset) {
  909. return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
  910. }
  911. bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
  912. SDValue &Align) {
  913. Addr = N;
  914. unsigned Alignment = 0;
  915. MemSDNode *MemN = cast<MemSDNode>(Parent);
  916. if (isa<LSBaseSDNode>(MemN) ||
  917. ((MemN->getOpcode() == ARMISD::VST1_UPD ||
  918. MemN->getOpcode() == ARMISD::VLD1_UPD) &&
  919. MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
  920. // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
  921. // The maximum alignment is equal to the memory size being referenced.
  922. llvm::Align MMOAlign = MemN->getAlign();
  923. unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
  924. if (MMOAlign.value() >= MemSize && MemSize > 1)
  925. Alignment = MemSize;
  926. } else {
  927. // All other uses of addrmode6 are for intrinsics. For now just record
  928. // the raw alignment value; it will be refined later based on the legal
  929. // alignment operands for the intrinsic.
  930. Alignment = MemN->getAlign().value();
  931. }
  932. Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
  933. return true;
  934. }
  935. bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
  936. SDValue &Offset) {
  937. LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
  938. ISD::MemIndexedMode AM = LdSt->getAddressingMode();
  939. if (AM != ISD::POST_INC)
  940. return false;
  941. Offset = N;
  942. if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
  943. if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
  944. Offset = CurDAG->getRegister(0, MVT::i32);
  945. }
  946. return true;
  947. }
  948. bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
  949. SDValue &Offset, SDValue &Label) {
  950. if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
  951. Offset = N.getOperand(0);
  952. SDValue N1 = N.getOperand(1);
  953. Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
  954. SDLoc(N), MVT::i32);
  955. return true;
  956. }
  957. return false;
  958. }
  959. //===----------------------------------------------------------------------===//
  960. // Thumb Addressing Modes
  961. //===----------------------------------------------------------------------===//
  962. static bool shouldUseZeroOffsetLdSt(SDValue N) {
  963. // Negative numbers are difficult to materialise in thumb1. If we are
  964. // selecting the add of a negative, instead try to select ri with a zero
  965. // offset, so create the add node directly which will become a sub.
  966. if (N.getOpcode() != ISD::ADD)
  967. return false;
  968. // Look for an imm which is not legal for ld/st, but is legal for sub.
  969. if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
  970. return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
  971. return false;
  972. }
  973. bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
  974. SDValue &Offset) {
  975. if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
  976. ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
  977. if (!NC || !NC->isZero())
  978. return false;
  979. Base = Offset = N;
  980. return true;
  981. }
  982. Base = N.getOperand(0);
  983. Offset = N.getOperand(1);
  984. return true;
  985. }
  986. bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
  987. SDValue &Offset) {
  988. if (shouldUseZeroOffsetLdSt(N))
  989. return false; // Select ri instead
  990. return SelectThumbAddrModeRRSext(N, Base, Offset);
  991. }
  992. bool
  993. ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
  994. SDValue &Base, SDValue &OffImm) {
  995. if (shouldUseZeroOffsetLdSt(N)) {
  996. Base = N;
  997. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  998. return true;
  999. }
  1000. if (!CurDAG->isBaseWithConstantOffset(N)) {
  1001. if (N.getOpcode() == ISD::ADD) {
  1002. return false; // We want to select register offset instead
  1003. } else if (N.getOpcode() == ARMISD::Wrapper &&
  1004. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  1005. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  1006. N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
  1007. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  1008. Base = N.getOperand(0);
  1009. } else {
  1010. Base = N;
  1011. }
  1012. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1013. return true;
  1014. }
  1015. // If the RHS is + imm5 * scale, fold into addr mode.
  1016. int RHSC;
  1017. if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
  1018. Base = N.getOperand(0);
  1019. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1020. return true;
  1021. }
  1022. // Offset is too large, so use register offset instead.
  1023. return false;
  1024. }
  1025. bool
  1026. ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
  1027. SDValue &OffImm) {
  1028. return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
  1029. }
  1030. bool
  1031. ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
  1032. SDValue &OffImm) {
  1033. return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
  1034. }
  1035. bool
  1036. ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
  1037. SDValue &OffImm) {
  1038. return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
  1039. }
  1040. bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
  1041. SDValue &Base, SDValue &OffImm) {
  1042. if (N.getOpcode() == ISD::FrameIndex) {
  1043. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  1044. // Only multiples of 4 are allowed for the offset, so the frame object
  1045. // alignment must be at least 4.
  1046. MachineFrameInfo &MFI = MF->getFrameInfo();
  1047. if (MFI.getObjectAlign(FI) < Align(4))
  1048. MFI.setObjectAlignment(FI, Align(4));
  1049. Base = CurDAG->getTargetFrameIndex(
  1050. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1051. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1052. return true;
  1053. }
  1054. if (!CurDAG->isBaseWithConstantOffset(N))
  1055. return false;
  1056. if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
  1057. // If the RHS is + imm8 * scale, fold into addr mode.
  1058. int RHSC;
  1059. if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
  1060. Base = N.getOperand(0);
  1061. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1062. // Make sure the offset is inside the object, or we might fail to
  1063. // allocate an emergency spill slot. (An out-of-range access is UB, but
  1064. // it could show up anyway.)
  1065. MachineFrameInfo &MFI = MF->getFrameInfo();
  1066. if (RHSC * 4 < MFI.getObjectSize(FI)) {
  1067. // For LHS+RHS to result in an offset that's a multiple of 4 the object
  1068. // indexed by the LHS must be 4-byte aligned.
  1069. if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
  1070. MFI.setObjectAlignment(FI, Align(4));
  1071. if (MFI.getObjectAlign(FI) >= Align(4)) {
  1072. Base = CurDAG->getTargetFrameIndex(
  1073. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1074. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1075. return true;
  1076. }
  1077. }
  1078. }
  1079. }
  1080. return false;
  1081. }
  1082. template <unsigned Shift>
  1083. bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
  1084. SDValue &OffImm) {
  1085. if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
  1086. int RHSC;
  1087. if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
  1088. RHSC)) {
  1089. Base = N.getOperand(0);
  1090. if (N.getOpcode() == ISD::SUB)
  1091. RHSC = -RHSC;
  1092. OffImm =
  1093. CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
  1094. return true;
  1095. }
  1096. }
  1097. // Base only.
  1098. Base = N;
  1099. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1100. return true;
  1101. }
  1102. //===----------------------------------------------------------------------===//
  1103. // Thumb 2 Addressing Modes
  1104. //===----------------------------------------------------------------------===//
  1105. bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
  1106. SDValue &Base, SDValue &OffImm) {
  1107. // Match simple R + imm12 operands.
  1108. // Base only.
  1109. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  1110. !CurDAG->isBaseWithConstantOffset(N)) {
  1111. if (N.getOpcode() == ISD::FrameIndex) {
  1112. // Match frame index.
  1113. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  1114. Base = CurDAG->getTargetFrameIndex(
  1115. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1116. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1117. return true;
  1118. }
  1119. if (N.getOpcode() == ARMISD::Wrapper &&
  1120. N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
  1121. N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
  1122. N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
  1123. Base = N.getOperand(0);
  1124. if (Base.getOpcode() == ISD::TargetConstantPool)
  1125. return false; // We want to select t2LDRpci instead.
  1126. } else
  1127. Base = N;
  1128. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1129. return true;
  1130. }
  1131. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  1132. if (SelectT2AddrModeImm8(N, Base, OffImm))
  1133. // Let t2LDRi8 handle (R - imm8).
  1134. return false;
  1135. int RHSC = (int)RHS->getZExtValue();
  1136. if (N.getOpcode() == ISD::SUB)
  1137. RHSC = -RHSC;
  1138. if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
  1139. Base = N.getOperand(0);
  1140. if (Base.getOpcode() == ISD::FrameIndex) {
  1141. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1142. Base = CurDAG->getTargetFrameIndex(
  1143. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1144. }
  1145. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1146. return true;
  1147. }
  1148. }
  1149. // Base only.
  1150. Base = N;
  1151. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1152. return true;
  1153. }
  1154. template <unsigned Shift>
  1155. bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
  1156. SDValue &OffImm) {
  1157. if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
  1158. int RHSC;
  1159. if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
  1160. Base = N.getOperand(0);
  1161. if (Base.getOpcode() == ISD::FrameIndex) {
  1162. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1163. Base = CurDAG->getTargetFrameIndex(
  1164. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1165. }
  1166. if (N.getOpcode() == ISD::SUB)
  1167. RHSC = -RHSC;
  1168. OffImm =
  1169. CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
  1170. return true;
  1171. }
  1172. }
  1173. // Base only.
  1174. Base = N;
  1175. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1176. return true;
  1177. }
  1178. bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
  1179. SDValue &Base, SDValue &OffImm) {
  1180. // Match simple R - imm8 operands.
  1181. if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
  1182. !CurDAG->isBaseWithConstantOffset(N))
  1183. return false;
  1184. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  1185. int RHSC = (int)RHS->getSExtValue();
  1186. if (N.getOpcode() == ISD::SUB)
  1187. RHSC = -RHSC;
  1188. if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
  1189. Base = N.getOperand(0);
  1190. if (Base.getOpcode() == ISD::FrameIndex) {
  1191. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1192. Base = CurDAG->getTargetFrameIndex(
  1193. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1194. }
  1195. OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
  1196. return true;
  1197. }
  1198. }
  1199. return false;
  1200. }
  1201. bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
  1202. SDValue &OffImm){
  1203. unsigned Opcode = Op->getOpcode();
  1204. ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
  1205. ? cast<LoadSDNode>(Op)->getAddressingMode()
  1206. : cast<StoreSDNode>(Op)->getAddressingMode();
  1207. int RHSC;
  1208. if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
  1209. OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
  1210. ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
  1211. : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
  1212. return true;
  1213. }
  1214. return false;
  1215. }
  1216. template <unsigned Shift>
  1217. bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
  1218. SDValue &OffImm) {
  1219. if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
  1220. int RHSC;
  1221. if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
  1222. RHSC)) {
  1223. Base = N.getOperand(0);
  1224. if (Base.getOpcode() == ISD::FrameIndex) {
  1225. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1226. Base = CurDAG->getTargetFrameIndex(
  1227. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1228. }
  1229. if (N.getOpcode() == ISD::SUB)
  1230. RHSC = -RHSC;
  1231. OffImm =
  1232. CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
  1233. return true;
  1234. }
  1235. }
  1236. // Base only.
  1237. Base = N;
  1238. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1239. return true;
  1240. }
  1241. template <unsigned Shift>
  1242. bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
  1243. SDValue &OffImm) {
  1244. return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
  1245. }
  1246. bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
  1247. SDValue &OffImm,
  1248. unsigned Shift) {
  1249. unsigned Opcode = Op->getOpcode();
  1250. ISD::MemIndexedMode AM;
  1251. switch (Opcode) {
  1252. case ISD::LOAD:
  1253. AM = cast<LoadSDNode>(Op)->getAddressingMode();
  1254. break;
  1255. case ISD::STORE:
  1256. AM = cast<StoreSDNode>(Op)->getAddressingMode();
  1257. break;
  1258. case ISD::MLOAD:
  1259. AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
  1260. break;
  1261. case ISD::MSTORE:
  1262. AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
  1263. break;
  1264. default:
  1265. llvm_unreachable("Unexpected Opcode for Imm7Offset");
  1266. }
  1267. int RHSC;
  1268. // 7 bit constant, shifted by Shift.
  1269. if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
  1270. OffImm =
  1271. ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
  1272. ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
  1273. : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
  1274. MVT::i32);
  1275. return true;
  1276. }
  1277. return false;
  1278. }
  1279. template <int Min, int Max>
  1280. bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
  1281. int Val;
  1282. if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
  1283. OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
  1284. return true;
  1285. }
  1286. return false;
  1287. }
  1288. bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
  1289. SDValue &Base,
  1290. SDValue &OffReg, SDValue &ShImm) {
  1291. // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
  1292. if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
  1293. return false;
  1294. // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
  1295. if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  1296. int RHSC = (int)RHS->getZExtValue();
  1297. if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
  1298. return false;
  1299. else if (RHSC < 0 && RHSC >= -255) // 8 bits
  1300. return false;
  1301. }
  1302. // Look for (R + R) or (R + (R << [1,2,3])).
  1303. unsigned ShAmt = 0;
  1304. Base = N.getOperand(0);
  1305. OffReg = N.getOperand(1);
  1306. // Swap if it is ((R << c) + R).
  1307. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
  1308. if (ShOpcVal != ARM_AM::lsl) {
  1309. ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
  1310. if (ShOpcVal == ARM_AM::lsl)
  1311. std::swap(Base, OffReg);
  1312. }
  1313. if (ShOpcVal == ARM_AM::lsl) {
  1314. // Check to see if the RHS of the shift is a constant, if not, we can't fold
  1315. // it.
  1316. if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
  1317. ShAmt = Sh->getZExtValue();
  1318. if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
  1319. OffReg = OffReg.getOperand(0);
  1320. else {
  1321. ShAmt = 0;
  1322. }
  1323. }
  1324. }
  1325. // If OffReg is a multiply-by-constant and it's profitable to extract a shift
  1326. // and use it in a shifted operand do so.
  1327. if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
  1328. unsigned PowerOfTwo = 0;
  1329. SDValue NewMulConst;
  1330. if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
  1331. HandleSDNode Handle(OffReg);
  1332. replaceDAGValue(OffReg.getOperand(1), NewMulConst);
  1333. OffReg = Handle.getValue();
  1334. ShAmt = PowerOfTwo;
  1335. }
  1336. }
  1337. ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
  1338. return true;
  1339. }
  1340. bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
  1341. SDValue &OffImm) {
  1342. // This *must* succeed since it's used for the irreplaceable ldrex and strex
  1343. // instructions.
  1344. Base = N;
  1345. OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
  1346. if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
  1347. return true;
  1348. ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
  1349. if (!RHS)
  1350. return true;
  1351. uint32_t RHSC = (int)RHS->getZExtValue();
  1352. if (RHSC > 1020 || RHSC % 4 != 0)
  1353. return true;
  1354. Base = N.getOperand(0);
  1355. if (Base.getOpcode() == ISD::FrameIndex) {
  1356. int FI = cast<FrameIndexSDNode>(Base)->getIndex();
  1357. Base = CurDAG->getTargetFrameIndex(
  1358. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  1359. }
  1360. OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
  1361. return true;
  1362. }
  1363. //===--------------------------------------------------------------------===//
  1364. /// getAL - Returns a ARMCC::AL immediate node.
  1365. static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
  1366. return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
  1367. }
  1368. void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
  1369. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  1370. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
  1371. }
  1372. bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
  1373. LoadSDNode *LD = cast<LoadSDNode>(N);
  1374. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1375. if (AM == ISD::UNINDEXED)
  1376. return false;
  1377. EVT LoadedVT = LD->getMemoryVT();
  1378. SDValue Offset, AMOpc;
  1379. bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1380. unsigned Opcode = 0;
  1381. bool Match = false;
  1382. if (LoadedVT == MVT::i32 && isPre &&
  1383. SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
  1384. Opcode = ARM::LDR_PRE_IMM;
  1385. Match = true;
  1386. } else if (LoadedVT == MVT::i32 && !isPre &&
  1387. SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
  1388. Opcode = ARM::LDR_POST_IMM;
  1389. Match = true;
  1390. } else if (LoadedVT == MVT::i32 &&
  1391. SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
  1392. Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
  1393. Match = true;
  1394. } else if (LoadedVT == MVT::i16 &&
  1395. SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
  1396. Match = true;
  1397. Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
  1398. ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
  1399. : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
  1400. } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
  1401. if (LD->getExtensionType() == ISD::SEXTLOAD) {
  1402. if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
  1403. Match = true;
  1404. Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
  1405. }
  1406. } else {
  1407. if (isPre &&
  1408. SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
  1409. Match = true;
  1410. Opcode = ARM::LDRB_PRE_IMM;
  1411. } else if (!isPre &&
  1412. SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
  1413. Match = true;
  1414. Opcode = ARM::LDRB_POST_IMM;
  1415. } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
  1416. Match = true;
  1417. Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
  1418. }
  1419. }
  1420. }
  1421. if (Match) {
  1422. if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
  1423. SDValue Chain = LD->getChain();
  1424. SDValue Base = LD->getBasePtr();
  1425. SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
  1426. CurDAG->getRegister(0, MVT::i32), Chain };
  1427. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
  1428. MVT::Other, Ops);
  1429. transferMemOperands(N, New);
  1430. ReplaceNode(N, New);
  1431. return true;
  1432. } else {
  1433. SDValue Chain = LD->getChain();
  1434. SDValue Base = LD->getBasePtr();
  1435. SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
  1436. CurDAG->getRegister(0, MVT::i32), Chain };
  1437. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
  1438. MVT::Other, Ops);
  1439. transferMemOperands(N, New);
  1440. ReplaceNode(N, New);
  1441. return true;
  1442. }
  1443. }
  1444. return false;
  1445. }
  1446. bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
  1447. LoadSDNode *LD = cast<LoadSDNode>(N);
  1448. EVT LoadedVT = LD->getMemoryVT();
  1449. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1450. if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
  1451. LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
  1452. return false;
  1453. auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
  1454. if (!COffs || COffs->getZExtValue() != 4)
  1455. return false;
  1456. // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
  1457. // The encoding of LDM is not how the rest of ISel expects a post-inc load to
  1458. // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
  1459. // ISel.
  1460. SDValue Chain = LD->getChain();
  1461. SDValue Base = LD->getBasePtr();
  1462. SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
  1463. CurDAG->getRegister(0, MVT::i32), Chain };
  1464. SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
  1465. MVT::i32, MVT::Other, Ops);
  1466. transferMemOperands(N, New);
  1467. ReplaceNode(N, New);
  1468. return true;
  1469. }
  1470. bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
  1471. LoadSDNode *LD = cast<LoadSDNode>(N);
  1472. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1473. if (AM == ISD::UNINDEXED)
  1474. return false;
  1475. EVT LoadedVT = LD->getMemoryVT();
  1476. bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
  1477. SDValue Offset;
  1478. bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1479. unsigned Opcode = 0;
  1480. bool Match = false;
  1481. if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
  1482. switch (LoadedVT.getSimpleVT().SimpleTy) {
  1483. case MVT::i32:
  1484. Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
  1485. break;
  1486. case MVT::i16:
  1487. if (isSExtLd)
  1488. Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
  1489. else
  1490. Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
  1491. break;
  1492. case MVT::i8:
  1493. case MVT::i1:
  1494. if (isSExtLd)
  1495. Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
  1496. else
  1497. Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
  1498. break;
  1499. default:
  1500. return false;
  1501. }
  1502. Match = true;
  1503. }
  1504. if (Match) {
  1505. SDValue Chain = LD->getChain();
  1506. SDValue Base = LD->getBasePtr();
  1507. SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
  1508. CurDAG->getRegister(0, MVT::i32), Chain };
  1509. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
  1510. MVT::Other, Ops);
  1511. transferMemOperands(N, New);
  1512. ReplaceNode(N, New);
  1513. return true;
  1514. }
  1515. return false;
  1516. }
  1517. bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
  1518. EVT LoadedVT;
  1519. unsigned Opcode = 0;
  1520. bool isSExtLd, isPre;
  1521. Align Alignment;
  1522. ARMVCC::VPTCodes Pred;
  1523. SDValue PredReg;
  1524. SDValue Chain, Base, Offset;
  1525. if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
  1526. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1527. if (AM == ISD::UNINDEXED)
  1528. return false;
  1529. LoadedVT = LD->getMemoryVT();
  1530. if (!LoadedVT.isVector())
  1531. return false;
  1532. Chain = LD->getChain();
  1533. Base = LD->getBasePtr();
  1534. Offset = LD->getOffset();
  1535. Alignment = LD->getAlign();
  1536. isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
  1537. isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1538. Pred = ARMVCC::None;
  1539. PredReg = CurDAG->getRegister(0, MVT::i32);
  1540. } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
  1541. ISD::MemIndexedMode AM = LD->getAddressingMode();
  1542. if (AM == ISD::UNINDEXED)
  1543. return false;
  1544. LoadedVT = LD->getMemoryVT();
  1545. if (!LoadedVT.isVector())
  1546. return false;
  1547. Chain = LD->getChain();
  1548. Base = LD->getBasePtr();
  1549. Offset = LD->getOffset();
  1550. Alignment = LD->getAlign();
  1551. isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
  1552. isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
  1553. Pred = ARMVCC::Then;
  1554. PredReg = LD->getMask();
  1555. } else
  1556. llvm_unreachable("Expected a Load or a Masked Load!");
  1557. // We allow LE non-masked loads to change the type (for example use a vldrb.8
  1558. // as opposed to a vldrw.32). This can allow extra addressing modes or
  1559. // alignments for what is otherwise an equivalent instruction.
  1560. bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
  1561. SDValue NewOffset;
  1562. if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
  1563. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
  1564. if (isSExtLd)
  1565. Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
  1566. else
  1567. Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
  1568. } else if (LoadedVT == MVT::v8i8 &&
  1569. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
  1570. if (isSExtLd)
  1571. Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
  1572. else
  1573. Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
  1574. } else if (LoadedVT == MVT::v4i8 &&
  1575. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
  1576. if (isSExtLd)
  1577. Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
  1578. else
  1579. Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
  1580. } else if (Alignment >= Align(4) &&
  1581. (CanChangeType || LoadedVT == MVT::v4i32 ||
  1582. LoadedVT == MVT::v4f32) &&
  1583. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
  1584. Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
  1585. else if (Alignment >= Align(2) &&
  1586. (CanChangeType || LoadedVT == MVT::v8i16 ||
  1587. LoadedVT == MVT::v8f16) &&
  1588. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
  1589. Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
  1590. else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
  1591. SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
  1592. Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
  1593. else
  1594. return false;
  1595. SDValue Ops[] = {Base,
  1596. NewOffset,
  1597. CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
  1598. PredReg,
  1599. CurDAG->getRegister(0, MVT::i32), // tp_reg
  1600. Chain};
  1601. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
  1602. N->getValueType(0), MVT::Other, Ops);
  1603. transferMemOperands(N, New);
  1604. ReplaceUses(SDValue(N, 0), SDValue(New, 1));
  1605. ReplaceUses(SDValue(N, 1), SDValue(New, 0));
  1606. ReplaceUses(SDValue(N, 2), SDValue(New, 2));
  1607. CurDAG->RemoveDeadNode(N);
  1608. return true;
  1609. }
  1610. /// Form a GPRPair pseudo register from a pair of GPR regs.
  1611. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
  1612. SDLoc dl(V0.getNode());
  1613. SDValue RegClass =
  1614. CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
  1615. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
  1616. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
  1617. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1618. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1619. }
  1620. /// Form a D register from a pair of S registers.
  1621. SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
  1622. SDLoc dl(V0.getNode());
  1623. SDValue RegClass =
  1624. CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
  1625. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
  1626. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
  1627. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1628. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1629. }
  1630. /// Form a quad register from a pair of D registers.
  1631. SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
  1632. SDLoc dl(V0.getNode());
  1633. SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
  1634. MVT::i32);
  1635. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
  1636. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
  1637. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1638. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1639. }
  1640. /// Form 4 consecutive D registers from a pair of Q registers.
  1641. SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
  1642. SDLoc dl(V0.getNode());
  1643. SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
  1644. MVT::i32);
  1645. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
  1646. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
  1647. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
  1648. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1649. }
  1650. /// Form 4 consecutive S registers.
  1651. SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
  1652. SDValue V2, SDValue V3) {
  1653. SDLoc dl(V0.getNode());
  1654. SDValue RegClass =
  1655. CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
  1656. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
  1657. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
  1658. SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
  1659. SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
  1660. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
  1661. V2, SubReg2, V3, SubReg3 };
  1662. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1663. }
  1664. /// Form 4 consecutive D registers.
  1665. SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
  1666. SDValue V2, SDValue V3) {
  1667. SDLoc dl(V0.getNode());
  1668. SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
  1669. MVT::i32);
  1670. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
  1671. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
  1672. SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
  1673. SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
  1674. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
  1675. V2, SubReg2, V3, SubReg3 };
  1676. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1677. }
  1678. /// Form 4 consecutive Q registers.
  1679. SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
  1680. SDValue V2, SDValue V3) {
  1681. SDLoc dl(V0.getNode());
  1682. SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
  1683. MVT::i32);
  1684. SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
  1685. SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
  1686. SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
  1687. SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
  1688. const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
  1689. V2, SubReg2, V3, SubReg3 };
  1690. return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
  1691. }
  1692. /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
  1693. /// of a NEON VLD or VST instruction. The supported values depend on the
  1694. /// number of registers being loaded.
  1695. SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
  1696. unsigned NumVecs, bool is64BitVector) {
  1697. unsigned NumRegs = NumVecs;
  1698. if (!is64BitVector && NumVecs < 3)
  1699. NumRegs *= 2;
  1700. unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
  1701. if (Alignment >= 32 && NumRegs == 4)
  1702. Alignment = 32;
  1703. else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
  1704. Alignment = 16;
  1705. else if (Alignment >= 8)
  1706. Alignment = 8;
  1707. else
  1708. Alignment = 0;
  1709. return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
  1710. }
  1711. static bool isVLDfixed(unsigned Opc)
  1712. {
  1713. switch (Opc) {
  1714. default: return false;
  1715. case ARM::VLD1d8wb_fixed : return true;
  1716. case ARM::VLD1d16wb_fixed : return true;
  1717. case ARM::VLD1d64Qwb_fixed : return true;
  1718. case ARM::VLD1d32wb_fixed : return true;
  1719. case ARM::VLD1d64wb_fixed : return true;
  1720. case ARM::VLD1d8TPseudoWB_fixed : return true;
  1721. case ARM::VLD1d16TPseudoWB_fixed : return true;
  1722. case ARM::VLD1d32TPseudoWB_fixed : return true;
  1723. case ARM::VLD1d64TPseudoWB_fixed : return true;
  1724. case ARM::VLD1d8QPseudoWB_fixed : return true;
  1725. case ARM::VLD1d16QPseudoWB_fixed : return true;
  1726. case ARM::VLD1d32QPseudoWB_fixed : return true;
  1727. case ARM::VLD1d64QPseudoWB_fixed : return true;
  1728. case ARM::VLD1q8wb_fixed : return true;
  1729. case ARM::VLD1q16wb_fixed : return true;
  1730. case ARM::VLD1q32wb_fixed : return true;
  1731. case ARM::VLD1q64wb_fixed : return true;
  1732. case ARM::VLD1DUPd8wb_fixed : return true;
  1733. case ARM::VLD1DUPd16wb_fixed : return true;
  1734. case ARM::VLD1DUPd32wb_fixed : return true;
  1735. case ARM::VLD1DUPq8wb_fixed : return true;
  1736. case ARM::VLD1DUPq16wb_fixed : return true;
  1737. case ARM::VLD1DUPq32wb_fixed : return true;
  1738. case ARM::VLD2d8wb_fixed : return true;
  1739. case ARM::VLD2d16wb_fixed : return true;
  1740. case ARM::VLD2d32wb_fixed : return true;
  1741. case ARM::VLD2q8PseudoWB_fixed : return true;
  1742. case ARM::VLD2q16PseudoWB_fixed : return true;
  1743. case ARM::VLD2q32PseudoWB_fixed : return true;
  1744. case ARM::VLD2DUPd8wb_fixed : return true;
  1745. case ARM::VLD2DUPd16wb_fixed : return true;
  1746. case ARM::VLD2DUPd32wb_fixed : return true;
  1747. case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
  1748. case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
  1749. case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
  1750. }
  1751. }
  1752. static bool isVSTfixed(unsigned Opc)
  1753. {
  1754. switch (Opc) {
  1755. default: return false;
  1756. case ARM::VST1d8wb_fixed : return true;
  1757. case ARM::VST1d16wb_fixed : return true;
  1758. case ARM::VST1d32wb_fixed : return true;
  1759. case ARM::VST1d64wb_fixed : return true;
  1760. case ARM::VST1q8wb_fixed : return true;
  1761. case ARM::VST1q16wb_fixed : return true;
  1762. case ARM::VST1q32wb_fixed : return true;
  1763. case ARM::VST1q64wb_fixed : return true;
  1764. case ARM::VST1d8TPseudoWB_fixed : return true;
  1765. case ARM::VST1d16TPseudoWB_fixed : return true;
  1766. case ARM::VST1d32TPseudoWB_fixed : return true;
  1767. case ARM::VST1d64TPseudoWB_fixed : return true;
  1768. case ARM::VST1d8QPseudoWB_fixed : return true;
  1769. case ARM::VST1d16QPseudoWB_fixed : return true;
  1770. case ARM::VST1d32QPseudoWB_fixed : return true;
  1771. case ARM::VST1d64QPseudoWB_fixed : return true;
  1772. case ARM::VST2d8wb_fixed : return true;
  1773. case ARM::VST2d16wb_fixed : return true;
  1774. case ARM::VST2d32wb_fixed : return true;
  1775. case ARM::VST2q8PseudoWB_fixed : return true;
  1776. case ARM::VST2q16PseudoWB_fixed : return true;
  1777. case ARM::VST2q32PseudoWB_fixed : return true;
  1778. }
  1779. }
  1780. // Get the register stride update opcode of a VLD/VST instruction that
  1781. // is otherwise equivalent to the given fixed stride updating instruction.
  1782. static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
  1783. assert((isVLDfixed(Opc) || isVSTfixed(Opc))
  1784. && "Incorrect fixed stride updating instruction.");
  1785. switch (Opc) {
  1786. default: break;
  1787. case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
  1788. case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
  1789. case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
  1790. case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
  1791. case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
  1792. case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
  1793. case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
  1794. case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
  1795. case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
  1796. case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
  1797. case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
  1798. case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
  1799. case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
  1800. case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
  1801. case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
  1802. case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
  1803. case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
  1804. case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
  1805. case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
  1806. case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
  1807. case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
  1808. case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
  1809. case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
  1810. case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
  1811. case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
  1812. case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
  1813. case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
  1814. case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
  1815. case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
  1816. case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
  1817. case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
  1818. case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
  1819. case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
  1820. case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
  1821. case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
  1822. case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
  1823. case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
  1824. case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
  1825. case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
  1826. case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
  1827. case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
  1828. case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
  1829. case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
  1830. case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
  1831. case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
  1832. case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
  1833. case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
  1834. case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
  1835. case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
  1836. case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
  1837. case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
  1838. case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
  1839. case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
  1840. case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
  1841. case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
  1842. case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
  1843. case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
  1844. case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
  1845. }
  1846. return Opc; // If not one we handle, return it unchanged.
  1847. }
  1848. /// Returns true if the given increment is a Constant known to be equal to the
  1849. /// access size performed by a NEON load/store. This means the "[rN]!" form can
  1850. /// be used.
  1851. static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
  1852. auto C = dyn_cast<ConstantSDNode>(Inc);
  1853. return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
  1854. }
  1855. void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
  1856. const uint16_t *DOpcodes,
  1857. const uint16_t *QOpcodes0,
  1858. const uint16_t *QOpcodes1) {
  1859. assert(Subtarget->hasNEON());
  1860. assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
  1861. SDLoc dl(N);
  1862. SDValue MemAddr, Align;
  1863. bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
  1864. // nodes are not intrinsics.
  1865. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  1866. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  1867. return;
  1868. SDValue Chain = N->getOperand(0);
  1869. EVT VT = N->getValueType(0);
  1870. bool is64BitVector = VT.is64BitVector();
  1871. Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
  1872. unsigned OpcodeIndex;
  1873. switch (VT.getSimpleVT().SimpleTy) {
  1874. default: llvm_unreachable("unhandled vld type");
  1875. // Double-register operations:
  1876. case MVT::v8i8: OpcodeIndex = 0; break;
  1877. case MVT::v4f16:
  1878. case MVT::v4bf16:
  1879. case MVT::v4i16: OpcodeIndex = 1; break;
  1880. case MVT::v2f32:
  1881. case MVT::v2i32: OpcodeIndex = 2; break;
  1882. case MVT::v1i64: OpcodeIndex = 3; break;
  1883. // Quad-register operations:
  1884. case MVT::v16i8: OpcodeIndex = 0; break;
  1885. case MVT::v8f16:
  1886. case MVT::v8bf16:
  1887. case MVT::v8i16: OpcodeIndex = 1; break;
  1888. case MVT::v4f32:
  1889. case MVT::v4i32: OpcodeIndex = 2; break;
  1890. case MVT::v2f64:
  1891. case MVT::v2i64: OpcodeIndex = 3; break;
  1892. }
  1893. EVT ResTy;
  1894. if (NumVecs == 1)
  1895. ResTy = VT;
  1896. else {
  1897. unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
  1898. if (!is64BitVector)
  1899. ResTyElts *= 2;
  1900. ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
  1901. }
  1902. std::vector<EVT> ResTys;
  1903. ResTys.push_back(ResTy);
  1904. if (isUpdating)
  1905. ResTys.push_back(MVT::i32);
  1906. ResTys.push_back(MVT::Other);
  1907. SDValue Pred = getAL(CurDAG, dl);
  1908. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  1909. SDNode *VLd;
  1910. SmallVector<SDValue, 7> Ops;
  1911. // Double registers and VLD1/VLD2 quad registers are directly supported.
  1912. if (is64BitVector || NumVecs <= 2) {
  1913. unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
  1914. QOpcodes0[OpcodeIndex]);
  1915. Ops.push_back(MemAddr);
  1916. Ops.push_back(Align);
  1917. if (isUpdating) {
  1918. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  1919. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
  1920. if (!IsImmUpdate) {
  1921. // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
  1922. // check for the opcode rather than the number of vector elements.
  1923. if (isVLDfixed(Opc))
  1924. Opc = getVLDSTRegisterUpdateOpcode(Opc);
  1925. Ops.push_back(Inc);
  1926. // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
  1927. // the operands if not such an opcode.
  1928. } else if (!isVLDfixed(Opc))
  1929. Ops.push_back(Reg0);
  1930. }
  1931. Ops.push_back(Pred);
  1932. Ops.push_back(Reg0);
  1933. Ops.push_back(Chain);
  1934. VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  1935. } else {
  1936. // Otherwise, quad registers are loaded with two separate instructions,
  1937. // where one loads the even registers and the other loads the odd registers.
  1938. EVT AddrTy = MemAddr.getValueType();
  1939. // Load the even subregs. This is always an updating load, so that it
  1940. // provides the address to the second load for the odd subregs.
  1941. SDValue ImplDef =
  1942. SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
  1943. const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
  1944. SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
  1945. ResTy, AddrTy, MVT::Other, OpsA);
  1946. Chain = SDValue(VLdA, 2);
  1947. // Load the odd subregs.
  1948. Ops.push_back(SDValue(VLdA, 1));
  1949. Ops.push_back(Align);
  1950. if (isUpdating) {
  1951. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  1952. assert(isa<ConstantSDNode>(Inc.getNode()) &&
  1953. "only constant post-increment update allowed for VLD3/4");
  1954. (void)Inc;
  1955. Ops.push_back(Reg0);
  1956. }
  1957. Ops.push_back(SDValue(VLdA, 0));
  1958. Ops.push_back(Pred);
  1959. Ops.push_back(Reg0);
  1960. Ops.push_back(Chain);
  1961. VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
  1962. }
  1963. // Transfer memoperands.
  1964. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  1965. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
  1966. if (NumVecs == 1) {
  1967. ReplaceNode(N, VLd);
  1968. return;
  1969. }
  1970. // Extract out the subregisters.
  1971. SDValue SuperReg = SDValue(VLd, 0);
  1972. static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
  1973. ARM::qsub_3 == ARM::qsub_0 + 3,
  1974. "Unexpected subreg numbering");
  1975. unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
  1976. for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
  1977. ReplaceUses(SDValue(N, Vec),
  1978. CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
  1979. ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
  1980. if (isUpdating)
  1981. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
  1982. CurDAG->RemoveDeadNode(N);
  1983. }
  1984. void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
  1985. const uint16_t *DOpcodes,
  1986. const uint16_t *QOpcodes0,
  1987. const uint16_t *QOpcodes1) {
  1988. assert(Subtarget->hasNEON());
  1989. assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
  1990. SDLoc dl(N);
  1991. SDValue MemAddr, Align;
  1992. bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
  1993. // nodes are not intrinsics.
  1994. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  1995. unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
  1996. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  1997. return;
  1998. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  1999. SDValue Chain = N->getOperand(0);
  2000. EVT VT = N->getOperand(Vec0Idx).getValueType();
  2001. bool is64BitVector = VT.is64BitVector();
  2002. Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
  2003. unsigned OpcodeIndex;
  2004. switch (VT.getSimpleVT().SimpleTy) {
  2005. default: llvm_unreachable("unhandled vst type");
  2006. // Double-register operations:
  2007. case MVT::v8i8: OpcodeIndex = 0; break;
  2008. case MVT::v4f16:
  2009. case MVT::v4bf16:
  2010. case MVT::v4i16: OpcodeIndex = 1; break;
  2011. case MVT::v2f32:
  2012. case MVT::v2i32: OpcodeIndex = 2; break;
  2013. case MVT::v1i64: OpcodeIndex = 3; break;
  2014. // Quad-register operations:
  2015. case MVT::v16i8: OpcodeIndex = 0; break;
  2016. case MVT::v8f16:
  2017. case MVT::v8bf16:
  2018. case MVT::v8i16: OpcodeIndex = 1; break;
  2019. case MVT::v4f32:
  2020. case MVT::v4i32: OpcodeIndex = 2; break;
  2021. case MVT::v2f64:
  2022. case MVT::v2i64: OpcodeIndex = 3; break;
  2023. }
  2024. std::vector<EVT> ResTys;
  2025. if (isUpdating)
  2026. ResTys.push_back(MVT::i32);
  2027. ResTys.push_back(MVT::Other);
  2028. SDValue Pred = getAL(CurDAG, dl);
  2029. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2030. SmallVector<SDValue, 7> Ops;
  2031. // Double registers and VST1/VST2 quad registers are directly supported.
  2032. if (is64BitVector || NumVecs <= 2) {
  2033. SDValue SrcReg;
  2034. if (NumVecs == 1) {
  2035. SrcReg = N->getOperand(Vec0Idx);
  2036. } else if (is64BitVector) {
  2037. // Form a REG_SEQUENCE to force register allocation.
  2038. SDValue V0 = N->getOperand(Vec0Idx + 0);
  2039. SDValue V1 = N->getOperand(Vec0Idx + 1);
  2040. if (NumVecs == 2)
  2041. SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
  2042. else {
  2043. SDValue V2 = N->getOperand(Vec0Idx + 2);
  2044. // If it's a vst3, form a quad D-register and leave the last part as
  2045. // an undef.
  2046. SDValue V3 = (NumVecs == 3)
  2047. ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
  2048. : N->getOperand(Vec0Idx + 3);
  2049. SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
  2050. }
  2051. } else {
  2052. // Form a QQ register.
  2053. SDValue Q0 = N->getOperand(Vec0Idx);
  2054. SDValue Q1 = N->getOperand(Vec0Idx + 1);
  2055. SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
  2056. }
  2057. unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
  2058. QOpcodes0[OpcodeIndex]);
  2059. Ops.push_back(MemAddr);
  2060. Ops.push_back(Align);
  2061. if (isUpdating) {
  2062. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  2063. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
  2064. if (!IsImmUpdate) {
  2065. // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
  2066. // check for the opcode rather than the number of vector elements.
  2067. if (isVSTfixed(Opc))
  2068. Opc = getVLDSTRegisterUpdateOpcode(Opc);
  2069. Ops.push_back(Inc);
  2070. }
  2071. // VST1/VST2 fixed increment does not need Reg0 so only include it in
  2072. // the operands if not such an opcode.
  2073. else if (!isVSTfixed(Opc))
  2074. Ops.push_back(Reg0);
  2075. }
  2076. Ops.push_back(SrcReg);
  2077. Ops.push_back(Pred);
  2078. Ops.push_back(Reg0);
  2079. Ops.push_back(Chain);
  2080. SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  2081. // Transfer memoperands.
  2082. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
  2083. ReplaceNode(N, VSt);
  2084. return;
  2085. }
  2086. // Otherwise, quad registers are stored with two separate instructions,
  2087. // where one stores the even registers and the other stores the odd registers.
  2088. // Form the QQQQ REG_SEQUENCE.
  2089. SDValue V0 = N->getOperand(Vec0Idx + 0);
  2090. SDValue V1 = N->getOperand(Vec0Idx + 1);
  2091. SDValue V2 = N->getOperand(Vec0Idx + 2);
  2092. SDValue V3 = (NumVecs == 3)
  2093. ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
  2094. : N->getOperand(Vec0Idx + 3);
  2095. SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
  2096. // Store the even D registers. This is always an updating store, so that it
  2097. // provides the address to the second store for the odd subregs.
  2098. const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
  2099. SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
  2100. MemAddr.getValueType(),
  2101. MVT::Other, OpsA);
  2102. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
  2103. Chain = SDValue(VStA, 1);
  2104. // Store the odd D registers.
  2105. Ops.push_back(SDValue(VStA, 0));
  2106. Ops.push_back(Align);
  2107. if (isUpdating) {
  2108. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  2109. assert(isa<ConstantSDNode>(Inc.getNode()) &&
  2110. "only constant post-increment update allowed for VST3/4");
  2111. (void)Inc;
  2112. Ops.push_back(Reg0);
  2113. }
  2114. Ops.push_back(RegSeq);
  2115. Ops.push_back(Pred);
  2116. Ops.push_back(Reg0);
  2117. Ops.push_back(Chain);
  2118. SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
  2119. Ops);
  2120. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
  2121. ReplaceNode(N, VStB);
  2122. }
  2123. void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
  2124. unsigned NumVecs,
  2125. const uint16_t *DOpcodes,
  2126. const uint16_t *QOpcodes) {
  2127. assert(Subtarget->hasNEON());
  2128. assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
  2129. SDLoc dl(N);
  2130. SDValue MemAddr, Align;
  2131. bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
  2132. // nodes are not intrinsics.
  2133. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  2134. unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
  2135. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  2136. return;
  2137. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  2138. SDValue Chain = N->getOperand(0);
  2139. unsigned Lane =
  2140. cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
  2141. EVT VT = N->getOperand(Vec0Idx).getValueType();
  2142. bool is64BitVector = VT.is64BitVector();
  2143. unsigned Alignment = 0;
  2144. if (NumVecs != 3) {
  2145. Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
  2146. unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
  2147. if (Alignment > NumBytes)
  2148. Alignment = NumBytes;
  2149. if (Alignment < 8 && Alignment < NumBytes)
  2150. Alignment = 0;
  2151. // Alignment must be a power of two; make sure of that.
  2152. Alignment = (Alignment & -Alignment);
  2153. if (Alignment == 1)
  2154. Alignment = 0;
  2155. }
  2156. Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
  2157. unsigned OpcodeIndex;
  2158. switch (VT.getSimpleVT().SimpleTy) {
  2159. default: llvm_unreachable("unhandled vld/vst lane type");
  2160. // Double-register operations:
  2161. case MVT::v8i8: OpcodeIndex = 0; break;
  2162. case MVT::v4f16:
  2163. case MVT::v4bf16:
  2164. case MVT::v4i16: OpcodeIndex = 1; break;
  2165. case MVT::v2f32:
  2166. case MVT::v2i32: OpcodeIndex = 2; break;
  2167. // Quad-register operations:
  2168. case MVT::v8f16:
  2169. case MVT::v8bf16:
  2170. case MVT::v8i16: OpcodeIndex = 0; break;
  2171. case MVT::v4f32:
  2172. case MVT::v4i32: OpcodeIndex = 1; break;
  2173. }
  2174. std::vector<EVT> ResTys;
  2175. if (IsLoad) {
  2176. unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
  2177. if (!is64BitVector)
  2178. ResTyElts *= 2;
  2179. ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
  2180. MVT::i64, ResTyElts));
  2181. }
  2182. if (isUpdating)
  2183. ResTys.push_back(MVT::i32);
  2184. ResTys.push_back(MVT::Other);
  2185. SDValue Pred = getAL(CurDAG, dl);
  2186. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2187. SmallVector<SDValue, 8> Ops;
  2188. Ops.push_back(MemAddr);
  2189. Ops.push_back(Align);
  2190. if (isUpdating) {
  2191. SDValue Inc = N->getOperand(AddrOpIdx + 1);
  2192. bool IsImmUpdate =
  2193. isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
  2194. Ops.push_back(IsImmUpdate ? Reg0 : Inc);
  2195. }
  2196. SDValue SuperReg;
  2197. SDValue V0 = N->getOperand(Vec0Idx + 0);
  2198. SDValue V1 = N->getOperand(Vec0Idx + 1);
  2199. if (NumVecs == 2) {
  2200. if (is64BitVector)
  2201. SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
  2202. else
  2203. SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
  2204. } else {
  2205. SDValue V2 = N->getOperand(Vec0Idx + 2);
  2206. SDValue V3 = (NumVecs == 3)
  2207. ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
  2208. : N->getOperand(Vec0Idx + 3);
  2209. if (is64BitVector)
  2210. SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
  2211. else
  2212. SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
  2213. }
  2214. Ops.push_back(SuperReg);
  2215. Ops.push_back(getI32Imm(Lane, dl));
  2216. Ops.push_back(Pred);
  2217. Ops.push_back(Reg0);
  2218. Ops.push_back(Chain);
  2219. unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
  2220. QOpcodes[OpcodeIndex]);
  2221. SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  2222. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
  2223. if (!IsLoad) {
  2224. ReplaceNode(N, VLdLn);
  2225. return;
  2226. }
  2227. // Extract the subregisters.
  2228. SuperReg = SDValue(VLdLn, 0);
  2229. static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
  2230. ARM::qsub_3 == ARM::qsub_0 + 3,
  2231. "Unexpected subreg numbering");
  2232. unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
  2233. for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
  2234. ReplaceUses(SDValue(N, Vec),
  2235. CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
  2236. ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
  2237. if (isUpdating)
  2238. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
  2239. CurDAG->RemoveDeadNode(N);
  2240. }
  2241. template <typename SDValueVector>
  2242. void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  2243. SDValue PredicateMask) {
  2244. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
  2245. Ops.push_back(PredicateMask);
  2246. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2247. }
  2248. template <typename SDValueVector>
  2249. void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  2250. SDValue PredicateMask,
  2251. SDValue Inactive) {
  2252. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
  2253. Ops.push_back(PredicateMask);
  2254. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2255. Ops.push_back(Inactive);
  2256. }
  2257. template <typename SDValueVector>
  2258. void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
  2259. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
  2260. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  2261. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2262. }
  2263. template <typename SDValueVector>
  2264. void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
  2265. EVT InactiveTy) {
  2266. Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
  2267. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  2268. Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
  2269. Ops.push_back(SDValue(
  2270. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
  2271. }
  2272. void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
  2273. bool Predicated) {
  2274. SDLoc Loc(N);
  2275. SmallVector<SDValue, 8> Ops;
  2276. uint16_t Opcode;
  2277. switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
  2278. case 32:
  2279. Opcode = Opcodes[0];
  2280. break;
  2281. case 64:
  2282. Opcode = Opcodes[1];
  2283. break;
  2284. default:
  2285. llvm_unreachable("bad vector element size in SelectMVE_WB");
  2286. }
  2287. Ops.push_back(N->getOperand(2)); // vector of base addresses
  2288. int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2289. Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
  2290. if (Predicated)
  2291. AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
  2292. else
  2293. AddEmptyMVEPredicateToOps(Ops, Loc);
  2294. Ops.push_back(N->getOperand(0)); // chain
  2295. SmallVector<EVT, 8> VTs;
  2296. VTs.push_back(N->getValueType(1));
  2297. VTs.push_back(N->getValueType(0));
  2298. VTs.push_back(N->getValueType(2));
  2299. SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
  2300. ReplaceUses(SDValue(N, 0), SDValue(New, 1));
  2301. ReplaceUses(SDValue(N, 1), SDValue(New, 0));
  2302. ReplaceUses(SDValue(N, 2), SDValue(New, 2));
  2303. transferMemOperands(N, New);
  2304. CurDAG->RemoveDeadNode(N);
  2305. }
  2306. void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
  2307. bool Immediate,
  2308. bool HasSaturationOperand) {
  2309. SDLoc Loc(N);
  2310. SmallVector<SDValue, 8> Ops;
  2311. // Two 32-bit halves of the value to be shifted
  2312. Ops.push_back(N->getOperand(1));
  2313. Ops.push_back(N->getOperand(2));
  2314. // The shift count
  2315. if (Immediate) {
  2316. int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2317. Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
  2318. } else {
  2319. Ops.push_back(N->getOperand(3));
  2320. }
  2321. // The immediate saturation operand, if any
  2322. if (HasSaturationOperand) {
  2323. int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
  2324. int SatBit = (SatOp == 64 ? 0 : 1);
  2325. Ops.push_back(getI32Imm(SatBit, Loc));
  2326. }
  2327. // MVE scalar shifts are IT-predicable, so include the standard
  2328. // predicate arguments.
  2329. Ops.push_back(getAL(CurDAG, Loc));
  2330. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  2331. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
  2332. }
  2333. void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
  2334. uint16_t OpcodeWithNoCarry,
  2335. bool Add, bool Predicated) {
  2336. SDLoc Loc(N);
  2337. SmallVector<SDValue, 8> Ops;
  2338. uint16_t Opcode;
  2339. unsigned FirstInputOp = Predicated ? 2 : 1;
  2340. // Two input vectors and the input carry flag
  2341. Ops.push_back(N->getOperand(FirstInputOp));
  2342. Ops.push_back(N->getOperand(FirstInputOp + 1));
  2343. SDValue CarryIn = N->getOperand(FirstInputOp + 2);
  2344. ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
  2345. uint32_t CarryMask = 1 << 29;
  2346. uint32_t CarryExpected = Add ? 0 : CarryMask;
  2347. if (CarryInConstant &&
  2348. (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
  2349. Opcode = OpcodeWithNoCarry;
  2350. } else {
  2351. Ops.push_back(CarryIn);
  2352. Opcode = OpcodeWithCarry;
  2353. }
  2354. if (Predicated)
  2355. AddMVEPredicateToOps(Ops, Loc,
  2356. N->getOperand(FirstInputOp + 3), // predicate
  2357. N->getOperand(FirstInputOp - 1)); // inactive
  2358. else
  2359. AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
  2360. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
  2361. }
  2362. void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
  2363. SDLoc Loc(N);
  2364. SmallVector<SDValue, 8> Ops;
  2365. // One vector input, followed by a 32-bit word of bits to shift in
  2366. // and then an immediate shift count
  2367. Ops.push_back(N->getOperand(1));
  2368. Ops.push_back(N->getOperand(2));
  2369. int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
  2370. Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
  2371. if (Predicated)
  2372. AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
  2373. else
  2374. AddEmptyMVEPredicateToOps(Ops, Loc);
  2375. CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
  2376. }
  2377. static bool SDValueToConstBool(SDValue SDVal) {
  2378. assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
  2379. ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
  2380. uint64_t Value = SDValConstant->getZExtValue();
  2381. assert((Value == 0 || Value == 1) && "expected value 0 or 1");
  2382. return Value;
  2383. }
  2384. void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
  2385. const uint16_t *OpcodesS,
  2386. const uint16_t *OpcodesU,
  2387. size_t Stride, size_t TySize) {
  2388. assert(TySize < Stride && "Invalid TySize");
  2389. bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
  2390. bool IsSub = SDValueToConstBool(N->getOperand(2));
  2391. bool IsExchange = SDValueToConstBool(N->getOperand(3));
  2392. if (IsUnsigned) {
  2393. assert(!IsSub &&
  2394. "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
  2395. assert(!IsExchange &&
  2396. "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
  2397. }
  2398. auto OpIsZero = [N](size_t OpNo) {
  2399. if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
  2400. if (OpConst->getZExtValue() == 0)
  2401. return true;
  2402. return false;
  2403. };
  2404. // If the input accumulator value is not zero, select an instruction with
  2405. // accumulator, otherwise select an instruction without accumulator
  2406. bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
  2407. const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
  2408. if (IsSub)
  2409. Opcodes += 4 * Stride;
  2410. if (IsExchange)
  2411. Opcodes += 2 * Stride;
  2412. if (IsAccum)
  2413. Opcodes += Stride;
  2414. uint16_t Opcode = Opcodes[TySize];
  2415. SDLoc Loc(N);
  2416. SmallVector<SDValue, 8> Ops;
  2417. // Push the accumulator operands, if they are used
  2418. if (IsAccum) {
  2419. Ops.push_back(N->getOperand(4));
  2420. Ops.push_back(N->getOperand(5));
  2421. }
  2422. // Push the two vector operands
  2423. Ops.push_back(N->getOperand(6));
  2424. Ops.push_back(N->getOperand(7));
  2425. if (Predicated)
  2426. AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
  2427. else
  2428. AddEmptyMVEPredicateToOps(Ops, Loc);
  2429. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
  2430. }
  2431. void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
  2432. const uint16_t *OpcodesS,
  2433. const uint16_t *OpcodesU) {
  2434. EVT VecTy = N->getOperand(6).getValueType();
  2435. size_t SizeIndex;
  2436. switch (VecTy.getVectorElementType().getSizeInBits()) {
  2437. case 16:
  2438. SizeIndex = 0;
  2439. break;
  2440. case 32:
  2441. SizeIndex = 1;
  2442. break;
  2443. default:
  2444. llvm_unreachable("bad vector element size");
  2445. }
  2446. SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
  2447. }
  2448. void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
  2449. const uint16_t *OpcodesS,
  2450. const uint16_t *OpcodesU) {
  2451. assert(
  2452. N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
  2453. 32 &&
  2454. "bad vector element size");
  2455. SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
  2456. }
  2457. void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
  2458. const uint16_t *const *Opcodes,
  2459. bool HasWriteback) {
  2460. EVT VT = N->getValueType(0);
  2461. SDLoc Loc(N);
  2462. const uint16_t *OurOpcodes;
  2463. switch (VT.getVectorElementType().getSizeInBits()) {
  2464. case 8:
  2465. OurOpcodes = Opcodes[0];
  2466. break;
  2467. case 16:
  2468. OurOpcodes = Opcodes[1];
  2469. break;
  2470. case 32:
  2471. OurOpcodes = Opcodes[2];
  2472. break;
  2473. default:
  2474. llvm_unreachable("bad vector element size in SelectMVE_VLD");
  2475. }
  2476. EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
  2477. SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
  2478. unsigned PtrOperand = HasWriteback ? 1 : 2;
  2479. auto Data = SDValue(
  2480. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
  2481. SDValue Chain = N->getOperand(0);
  2482. // Add a MVE_VLDn instruction for each Vec, except the last
  2483. for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
  2484. SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
  2485. auto LoadInst =
  2486. CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
  2487. Data = SDValue(LoadInst, 0);
  2488. Chain = SDValue(LoadInst, 1);
  2489. transferMemOperands(N, LoadInst);
  2490. }
  2491. // The last may need a writeback on it
  2492. if (HasWriteback)
  2493. ResultTys = {DataTy, MVT::i32, MVT::Other};
  2494. SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
  2495. auto LoadInst =
  2496. CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
  2497. transferMemOperands(N, LoadInst);
  2498. unsigned i;
  2499. for (i = 0; i < NumVecs; i++)
  2500. ReplaceUses(SDValue(N, i),
  2501. CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
  2502. SDValue(LoadInst, 0)));
  2503. if (HasWriteback)
  2504. ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
  2505. ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
  2506. CurDAG->RemoveDeadNode(N);
  2507. }
  2508. void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
  2509. bool Wrapping, bool Predicated) {
  2510. EVT VT = N->getValueType(0);
  2511. SDLoc Loc(N);
  2512. uint16_t Opcode;
  2513. switch (VT.getScalarSizeInBits()) {
  2514. case 8:
  2515. Opcode = Opcodes[0];
  2516. break;
  2517. case 16:
  2518. Opcode = Opcodes[1];
  2519. break;
  2520. case 32:
  2521. Opcode = Opcodes[2];
  2522. break;
  2523. default:
  2524. llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
  2525. }
  2526. SmallVector<SDValue, 8> Ops;
  2527. unsigned OpIdx = 1;
  2528. SDValue Inactive;
  2529. if (Predicated)
  2530. Inactive = N->getOperand(OpIdx++);
  2531. Ops.push_back(N->getOperand(OpIdx++)); // base
  2532. if (Wrapping)
  2533. Ops.push_back(N->getOperand(OpIdx++)); // limit
  2534. SDValue ImmOp = N->getOperand(OpIdx++); // step
  2535. int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue();
  2536. Ops.push_back(getI32Imm(ImmValue, Loc));
  2537. if (Predicated)
  2538. AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
  2539. else
  2540. AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
  2541. CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
  2542. }
  2543. void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
  2544. size_t NumExtraOps, bool HasAccum) {
  2545. bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
  2546. SDLoc Loc(N);
  2547. SmallVector<SDValue, 8> Ops;
  2548. unsigned OpIdx = 1;
  2549. // Convert and append the immediate operand designating the coprocessor.
  2550. SDValue ImmCorpoc = N->getOperand(OpIdx++);
  2551. uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue();
  2552. Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
  2553. // For accumulating variants copy the low and high order parts of the
  2554. // accumulator into a register pair and add it to the operand vector.
  2555. if (HasAccum) {
  2556. SDValue AccLo = N->getOperand(OpIdx++);
  2557. SDValue AccHi = N->getOperand(OpIdx++);
  2558. if (IsBigEndian)
  2559. std::swap(AccLo, AccHi);
  2560. Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
  2561. }
  2562. // Copy extra operands as-is.
  2563. for (size_t I = 0; I < NumExtraOps; I++)
  2564. Ops.push_back(N->getOperand(OpIdx++));
  2565. // Convert and append the immediate operand
  2566. SDValue Imm = N->getOperand(OpIdx);
  2567. uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue();
  2568. Ops.push_back(getI32Imm(ImmVal, Loc));
  2569. // Accumulating variants are IT-predicable, add predicate operands.
  2570. if (HasAccum) {
  2571. SDValue Pred = getAL(CurDAG, Loc);
  2572. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  2573. Ops.push_back(Pred);
  2574. Ops.push_back(PredReg);
  2575. }
  2576. // Create the CDE intruction
  2577. SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
  2578. SDValue ResultPair = SDValue(InstrNode, 0);
  2579. // The original intrinsic had two outputs, and the output of the dual-register
  2580. // CDE instruction is a register pair. We need to extract the two subregisters
  2581. // and replace all uses of the original outputs with the extracted
  2582. // subregisters.
  2583. uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
  2584. if (IsBigEndian)
  2585. std::swap(SubRegs[0], SubRegs[1]);
  2586. for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
  2587. if (SDValue(N, ResIdx).use_empty())
  2588. continue;
  2589. SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
  2590. MVT::i32, ResultPair);
  2591. ReplaceUses(SDValue(N, ResIdx), SubReg);
  2592. }
  2593. CurDAG->RemoveDeadNode(N);
  2594. }
  2595. void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
  2596. bool isUpdating, unsigned NumVecs,
  2597. const uint16_t *DOpcodes,
  2598. const uint16_t *QOpcodes0,
  2599. const uint16_t *QOpcodes1) {
  2600. assert(Subtarget->hasNEON());
  2601. assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
  2602. SDLoc dl(N);
  2603. SDValue MemAddr, Align;
  2604. unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
  2605. if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
  2606. return;
  2607. SDValue Chain = N->getOperand(0);
  2608. EVT VT = N->getValueType(0);
  2609. bool is64BitVector = VT.is64BitVector();
  2610. unsigned Alignment = 0;
  2611. if (NumVecs != 3) {
  2612. Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
  2613. unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
  2614. if (Alignment > NumBytes)
  2615. Alignment = NumBytes;
  2616. if (Alignment < 8 && Alignment < NumBytes)
  2617. Alignment = 0;
  2618. // Alignment must be a power of two; make sure of that.
  2619. Alignment = (Alignment & -Alignment);
  2620. if (Alignment == 1)
  2621. Alignment = 0;
  2622. }
  2623. Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
  2624. unsigned OpcodeIndex;
  2625. switch (VT.getSimpleVT().SimpleTy) {
  2626. default: llvm_unreachable("unhandled vld-dup type");
  2627. case MVT::v8i8:
  2628. case MVT::v16i8: OpcodeIndex = 0; break;
  2629. case MVT::v4i16:
  2630. case MVT::v8i16:
  2631. case MVT::v4f16:
  2632. case MVT::v8f16:
  2633. case MVT::v4bf16:
  2634. case MVT::v8bf16:
  2635. OpcodeIndex = 1; break;
  2636. case MVT::v2f32:
  2637. case MVT::v2i32:
  2638. case MVT::v4f32:
  2639. case MVT::v4i32: OpcodeIndex = 2; break;
  2640. case MVT::v1f64:
  2641. case MVT::v1i64: OpcodeIndex = 3; break;
  2642. }
  2643. unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
  2644. if (!is64BitVector)
  2645. ResTyElts *= 2;
  2646. EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
  2647. std::vector<EVT> ResTys;
  2648. ResTys.push_back(ResTy);
  2649. if (isUpdating)
  2650. ResTys.push_back(MVT::i32);
  2651. ResTys.push_back(MVT::Other);
  2652. SDValue Pred = getAL(CurDAG, dl);
  2653. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2654. SmallVector<SDValue, 6> Ops;
  2655. Ops.push_back(MemAddr);
  2656. Ops.push_back(Align);
  2657. unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
  2658. : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
  2659. : QOpcodes1[OpcodeIndex];
  2660. if (isUpdating) {
  2661. SDValue Inc = N->getOperand(2);
  2662. bool IsImmUpdate =
  2663. isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
  2664. if (IsImmUpdate) {
  2665. if (!isVLDfixed(Opc))
  2666. Ops.push_back(Reg0);
  2667. } else {
  2668. if (isVLDfixed(Opc))
  2669. Opc = getVLDSTRegisterUpdateOpcode(Opc);
  2670. Ops.push_back(Inc);
  2671. }
  2672. }
  2673. if (is64BitVector || NumVecs == 1) {
  2674. // Double registers and VLD1 quad registers are directly supported.
  2675. } else if (NumVecs == 2) {
  2676. const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
  2677. SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
  2678. MVT::Other, OpsA);
  2679. Chain = SDValue(VLdA, 1);
  2680. } else {
  2681. SDValue ImplDef = SDValue(
  2682. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
  2683. const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
  2684. SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
  2685. MVT::Other, OpsA);
  2686. Ops.push_back(SDValue(VLdA, 0));
  2687. Chain = SDValue(VLdA, 1);
  2688. }
  2689. Ops.push_back(Pred);
  2690. Ops.push_back(Reg0);
  2691. Ops.push_back(Chain);
  2692. SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
  2693. // Transfer memoperands.
  2694. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  2695. CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
  2696. // Extract the subregisters.
  2697. if (NumVecs == 1) {
  2698. ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
  2699. } else {
  2700. SDValue SuperReg = SDValue(VLdDup, 0);
  2701. static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
  2702. unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
  2703. for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
  2704. ReplaceUses(SDValue(N, Vec),
  2705. CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
  2706. }
  2707. }
  2708. ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
  2709. if (isUpdating)
  2710. ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
  2711. CurDAG->RemoveDeadNode(N);
  2712. }
  2713. bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
  2714. if (!Subtarget->hasMVEIntegerOps())
  2715. return false;
  2716. SDLoc dl(N);
  2717. // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
  2718. // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
  2719. // inserts of the correct type:
  2720. SDValue Ins1 = SDValue(N, 0);
  2721. SDValue Ins2 = N->getOperand(0);
  2722. EVT VT = Ins1.getValueType();
  2723. if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
  2724. !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
  2725. !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
  2726. (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
  2727. return false;
  2728. unsigned Lane1 = Ins1.getConstantOperandVal(2);
  2729. unsigned Lane2 = Ins2.getConstantOperandVal(2);
  2730. if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
  2731. return false;
  2732. // If the inserted values will be able to use T/B already, leave it to the
  2733. // existing tablegen patterns. For example VCVTT/VCVTB.
  2734. SDValue Val1 = Ins1.getOperand(1);
  2735. SDValue Val2 = Ins2.getOperand(1);
  2736. if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
  2737. return false;
  2738. // Check if the inserted values are both extracts.
  2739. if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
  2740. Val1.getOpcode() == ARMISD::VGETLANEu) &&
  2741. (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
  2742. Val2.getOpcode() == ARMISD::VGETLANEu) &&
  2743. isa<ConstantSDNode>(Val1.getOperand(1)) &&
  2744. isa<ConstantSDNode>(Val2.getOperand(1)) &&
  2745. (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
  2746. Val1.getOperand(0).getValueType() == MVT::v8i16) &&
  2747. (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
  2748. Val2.getOperand(0).getValueType() == MVT::v8i16)) {
  2749. unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
  2750. unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
  2751. // If the two extracted lanes are from the same place and adjacent, this
  2752. // simplifies into a f32 lane move.
  2753. if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
  2754. ExtractLane1 == ExtractLane2 + 1) {
  2755. SDValue NewExt = CurDAG->getTargetExtractSubreg(
  2756. ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
  2757. SDValue NewIns = CurDAG->getTargetInsertSubreg(
  2758. ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
  2759. NewExt);
  2760. ReplaceUses(Ins1, NewIns);
  2761. return true;
  2762. }
  2763. // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
  2764. // extracting odd lanes.
  2765. if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
  2766. SDValue Inp1 = CurDAG->getTargetExtractSubreg(
  2767. ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
  2768. SDValue Inp2 = CurDAG->getTargetExtractSubreg(
  2769. ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
  2770. if (ExtractLane1 % 2 != 0)
  2771. Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
  2772. if (ExtractLane2 % 2 != 0)
  2773. Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
  2774. SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
  2775. SDValue NewIns =
  2776. CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
  2777. Ins2.getOperand(0), SDValue(VINS, 0));
  2778. ReplaceUses(Ins1, NewIns);
  2779. return true;
  2780. }
  2781. }
  2782. // The inserted values are not extracted - if they are f16 then insert them
  2783. // directly using a VINS.
  2784. if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
  2785. SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
  2786. SDValue NewIns =
  2787. CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
  2788. Ins2.getOperand(0), SDValue(VINS, 0));
  2789. ReplaceUses(Ins1, NewIns);
  2790. return true;
  2791. }
  2792. return false;
  2793. }
  2794. bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
  2795. SDNode *FMul,
  2796. bool IsUnsigned,
  2797. bool FixedToFloat) {
  2798. auto Type = N->getValueType(0);
  2799. unsigned ScalarBits = Type.getScalarSizeInBits();
  2800. if (ScalarBits > 32)
  2801. return false;
  2802. SDNodeFlags FMulFlags = FMul->getFlags();
  2803. // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
  2804. // allowed in 16 bit unsigned floats
  2805. if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
  2806. return false;
  2807. SDValue ImmNode = FMul->getOperand(1);
  2808. SDValue VecVal = FMul->getOperand(0);
  2809. if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
  2810. VecVal->getOpcode() == ISD::SINT_TO_FP)
  2811. VecVal = VecVal->getOperand(0);
  2812. if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
  2813. return false;
  2814. if (ImmNode.getOpcode() == ISD::BITCAST) {
  2815. if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
  2816. return false;
  2817. ImmNode = ImmNode.getOperand(0);
  2818. }
  2819. if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
  2820. return false;
  2821. APFloat ImmAPF(0.0f);
  2822. switch (ImmNode.getOpcode()) {
  2823. case ARMISD::VMOVIMM:
  2824. case ARMISD::VDUP: {
  2825. if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
  2826. return false;
  2827. unsigned Imm = ImmNode.getConstantOperandVal(0);
  2828. if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
  2829. Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
  2830. ImmAPF =
  2831. APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
  2832. APInt(ScalarBits, Imm));
  2833. break;
  2834. }
  2835. case ARMISD::VMOVFPIMM: {
  2836. ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
  2837. break;
  2838. }
  2839. default:
  2840. return false;
  2841. }
  2842. // Where n is the number of fractional bits, multiplying by 2^n will convert
  2843. // from float to fixed and multiplying by 2^-n will convert from fixed to
  2844. // float. Taking log2 of the factor (after taking the inverse in the case of
  2845. // float to fixed) will give n.
  2846. APFloat ToConvert = ImmAPF;
  2847. if (FixedToFloat) {
  2848. if (!ImmAPF.getExactInverse(&ToConvert))
  2849. return false;
  2850. }
  2851. APSInt Converted(64, false);
  2852. bool IsExact;
  2853. ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,
  2854. &IsExact);
  2855. if (!IsExact || !Converted.isPowerOf2())
  2856. return false;
  2857. unsigned FracBits = Converted.logBase2();
  2858. if (FracBits > ScalarBits)
  2859. return false;
  2860. SmallVector<SDValue, 3> Ops{
  2861. VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
  2862. AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
  2863. unsigned int Opcode;
  2864. switch (ScalarBits) {
  2865. case 16:
  2866. if (FixedToFloat)
  2867. Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
  2868. else
  2869. Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
  2870. break;
  2871. case 32:
  2872. if (FixedToFloat)
  2873. Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
  2874. else
  2875. Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
  2876. break;
  2877. default:
  2878. llvm_unreachable("unexpected number of scalar bits");
  2879. break;
  2880. }
  2881. ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
  2882. return true;
  2883. }
  2884. bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
  2885. // Transform a floating-point to fixed-point conversion to a VCVT
  2886. if (!Subtarget->hasMVEFloatOps())
  2887. return false;
  2888. EVT Type = N->getValueType(0);
  2889. if (!Type.isVector())
  2890. return false;
  2891. unsigned int ScalarBits = Type.getScalarSizeInBits();
  2892. bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
  2893. N->getOpcode() == ISD::FP_TO_UINT_SAT;
  2894. SDNode *Node = N->getOperand(0).getNode();
  2895. // floating-point to fixed-point with one fractional bit gets turned into an
  2896. // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
  2897. if (Node->getOpcode() == ISD::FADD) {
  2898. if (Node->getOperand(0) != Node->getOperand(1))
  2899. return false;
  2900. SDNodeFlags Flags = Node->getFlags();
  2901. // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
  2902. // allowed in 16 bit unsigned floats
  2903. if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
  2904. return false;
  2905. unsigned Opcode;
  2906. switch (ScalarBits) {
  2907. case 16:
  2908. Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
  2909. break;
  2910. case 32:
  2911. Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
  2912. break;
  2913. }
  2914. SmallVector<SDValue, 3> Ops{Node->getOperand(0),
  2915. CurDAG->getConstant(1, dl, MVT::i32)};
  2916. AddEmptyMVEPredicateToOps(Ops, dl, Type);
  2917. ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
  2918. return true;
  2919. }
  2920. if (Node->getOpcode() != ISD::FMUL)
  2921. return false;
  2922. return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
  2923. }
  2924. bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
  2925. // Transform a fixed-point to floating-point conversion to a VCVT
  2926. if (!Subtarget->hasMVEFloatOps())
  2927. return false;
  2928. auto Type = N->getValueType(0);
  2929. if (!Type.isVector())
  2930. return false;
  2931. auto LHS = N->getOperand(0);
  2932. if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
  2933. return false;
  2934. return transformFixedFloatingPointConversion(
  2935. N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
  2936. }
  2937. bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
  2938. if (!Subtarget->hasV6T2Ops())
  2939. return false;
  2940. unsigned Opc = isSigned
  2941. ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
  2942. : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
  2943. SDLoc dl(N);
  2944. // For unsigned extracts, check for a shift right and mask
  2945. unsigned And_imm = 0;
  2946. if (N->getOpcode() == ISD::AND) {
  2947. if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
  2948. // The immediate is a mask of the low bits iff imm & (imm+1) == 0
  2949. if (And_imm & (And_imm + 1))
  2950. return false;
  2951. unsigned Srl_imm = 0;
  2952. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
  2953. Srl_imm)) {
  2954. assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
  2955. // Mask off the unnecessary bits of the AND immediate; normally
  2956. // DAGCombine will do this, but that might not happen if
  2957. // targetShrinkDemandedConstant chooses a different immediate.
  2958. And_imm &= -1U >> Srl_imm;
  2959. // Note: The width operand is encoded as width-1.
  2960. unsigned Width = countTrailingOnes(And_imm) - 1;
  2961. unsigned LSB = Srl_imm;
  2962. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  2963. if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
  2964. // It's cheaper to use a right shift to extract the top bits.
  2965. if (Subtarget->isThumb()) {
  2966. Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
  2967. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  2968. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  2969. getAL(CurDAG, dl), Reg0, Reg0 };
  2970. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  2971. return true;
  2972. }
  2973. // ARM models shift instructions as MOVsi with shifter operand.
  2974. ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
  2975. SDValue ShOpc =
  2976. CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
  2977. MVT::i32);
  2978. SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
  2979. getAL(CurDAG, dl), Reg0, Reg0 };
  2980. CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
  2981. return true;
  2982. }
  2983. assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
  2984. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  2985. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  2986. CurDAG->getTargetConstant(Width, dl, MVT::i32),
  2987. getAL(CurDAG, dl), Reg0 };
  2988. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  2989. return true;
  2990. }
  2991. }
  2992. return false;
  2993. }
  2994. // Otherwise, we're looking for a shift of a shift
  2995. unsigned Shl_imm = 0;
  2996. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
  2997. assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
  2998. unsigned Srl_imm = 0;
  2999. if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
  3000. assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
  3001. // Note: The width operand is encoded as width-1.
  3002. unsigned Width = 32 - Srl_imm - 1;
  3003. int LSB = Srl_imm - Shl_imm;
  3004. if (LSB < 0)
  3005. return false;
  3006. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3007. assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
  3008. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  3009. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  3010. CurDAG->getTargetConstant(Width, dl, MVT::i32),
  3011. getAL(CurDAG, dl), Reg0 };
  3012. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3013. return true;
  3014. }
  3015. }
  3016. // Or we are looking for a shift of an and, with a mask operand
  3017. if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
  3018. isShiftedMask_32(And_imm)) {
  3019. unsigned Srl_imm = 0;
  3020. unsigned LSB = countTrailingZeros(And_imm);
  3021. // Shift must be the same as the ands lsb
  3022. if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
  3023. assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
  3024. unsigned MSB = 31 - countLeadingZeros(And_imm);
  3025. // Note: The width operand is encoded as width-1.
  3026. unsigned Width = MSB - LSB;
  3027. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3028. assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
  3029. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  3030. CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
  3031. CurDAG->getTargetConstant(Width, dl, MVT::i32),
  3032. getAL(CurDAG, dl), Reg0 };
  3033. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3034. return true;
  3035. }
  3036. }
  3037. if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
  3038. unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
  3039. unsigned LSB = 0;
  3040. if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
  3041. !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
  3042. return false;
  3043. if (LSB + Width > 32)
  3044. return false;
  3045. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3046. assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
  3047. SDValue Ops[] = { N->getOperand(0).getOperand(0),
  3048. CurDAG->getTargetConstant(LSB, dl, MVT::i32),
  3049. CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
  3050. getAL(CurDAG, dl), Reg0 };
  3051. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3052. return true;
  3053. }
  3054. return false;
  3055. }
  3056. /// Target-specific DAG combining for ISD::SUB.
  3057. /// Target-independent combining lowers SELECT_CC nodes of the form
  3058. /// select_cc setg[ge] X, 0, X, -X
  3059. /// select_cc setgt X, -1, X, -X
  3060. /// select_cc setl[te] X, 0, -X, X
  3061. /// select_cc setlt X, 1, -X, X
  3062. /// which represent Integer ABS into:
  3063. /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
  3064. /// ARM instruction selection detects the latter and matches it to
  3065. /// ARM::ABS or ARM::t2ABS machine node.
  3066. bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
  3067. SDValue SUBSrc0 = N->getOperand(0);
  3068. SDValue SUBSrc1 = N->getOperand(1);
  3069. EVT VT = N->getValueType(0);
  3070. if (Subtarget->isThumb1Only())
  3071. return false;
  3072. if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
  3073. return false;
  3074. SDValue XORSrc0 = SUBSrc0.getOperand(0);
  3075. SDValue XORSrc1 = SUBSrc0.getOperand(1);
  3076. SDValue SRASrc0 = SUBSrc1.getOperand(0);
  3077. SDValue SRASrc1 = SUBSrc1.getOperand(1);
  3078. ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
  3079. EVT XType = SRASrc0.getValueType();
  3080. unsigned Size = XType.getSizeInBits() - 1;
  3081. if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
  3082. SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
  3083. unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
  3084. CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
  3085. return true;
  3086. }
  3087. return false;
  3088. }
  3089. /// We've got special pseudo-instructions for these
  3090. void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
  3091. unsigned Opcode;
  3092. EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
  3093. if (MemTy == MVT::i8)
  3094. Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
  3095. else if (MemTy == MVT::i16)
  3096. Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
  3097. else if (MemTy == MVT::i32)
  3098. Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
  3099. else
  3100. llvm_unreachable("Unknown AtomicCmpSwap type");
  3101. SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
  3102. N->getOperand(0)};
  3103. SDNode *CmpSwap = CurDAG->getMachineNode(
  3104. Opcode, SDLoc(N),
  3105. CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
  3106. MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
  3107. CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
  3108. ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
  3109. ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
  3110. CurDAG->RemoveDeadNode(N);
  3111. }
  3112. static std::optional<std::pair<unsigned, unsigned>>
  3113. getContiguousRangeOfSetBits(const APInt &A) {
  3114. unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
  3115. unsigned LastOne = A.countTrailingZeros();
  3116. if (A.countPopulation() != (FirstOne - LastOne + 1))
  3117. return std::nullopt;
  3118. return std::make_pair(FirstOne, LastOne);
  3119. }
  3120. void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
  3121. assert(N->getOpcode() == ARMISD::CMPZ);
  3122. SwitchEQNEToPLMI = false;
  3123. if (!Subtarget->isThumb())
  3124. // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
  3125. // LSR don't exist as standalone instructions - they need the barrel shifter.
  3126. return;
  3127. // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
  3128. SDValue And = N->getOperand(0);
  3129. if (!And->hasOneUse())
  3130. return;
  3131. SDValue Zero = N->getOperand(1);
  3132. if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() ||
  3133. And->getOpcode() != ISD::AND)
  3134. return;
  3135. SDValue X = And.getOperand(0);
  3136. auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
  3137. if (!C)
  3138. return;
  3139. auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
  3140. if (!Range)
  3141. return;
  3142. // There are several ways to lower this:
  3143. SDNode *NewN;
  3144. SDLoc dl(N);
  3145. auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
  3146. if (Subtarget->isThumb2()) {
  3147. Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
  3148. SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
  3149. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
  3150. CurDAG->getRegister(0, MVT::i32) };
  3151. return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
  3152. } else {
  3153. SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
  3154. CurDAG->getTargetConstant(Imm, dl, MVT::i32),
  3155. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
  3156. return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
  3157. }
  3158. };
  3159. if (Range->second == 0) {
  3160. // 1. Mask includes the LSB -> Simply shift the top N bits off
  3161. NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
  3162. ReplaceNode(And.getNode(), NewN);
  3163. } else if (Range->first == 31) {
  3164. // 2. Mask includes the MSB -> Simply shift the bottom N bits off
  3165. NewN = EmitShift(ARM::tLSRri, X, Range->second);
  3166. ReplaceNode(And.getNode(), NewN);
  3167. } else if (Range->first == Range->second) {
  3168. // 3. Only one bit is set. We can shift this into the sign bit and use a
  3169. // PL/MI comparison.
  3170. NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
  3171. ReplaceNode(And.getNode(), NewN);
  3172. SwitchEQNEToPLMI = true;
  3173. } else if (!Subtarget->hasV6T2Ops()) {
  3174. // 4. Do a double shift to clear bottom and top bits, but only in
  3175. // thumb-1 mode as in thumb-2 we can use UBFX.
  3176. NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
  3177. NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
  3178. Range->second + (31 - Range->first));
  3179. ReplaceNode(And.getNode(), NewN);
  3180. }
  3181. }
  3182. static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
  3183. unsigned Opc128[3]) {
  3184. assert((VT.is64BitVector() || VT.is128BitVector()) &&
  3185. "Unexpected vector shuffle length");
  3186. switch (VT.getScalarSizeInBits()) {
  3187. default:
  3188. llvm_unreachable("Unexpected vector shuffle element size");
  3189. case 8:
  3190. return VT.is64BitVector() ? Opc64[0] : Opc128[0];
  3191. case 16:
  3192. return VT.is64BitVector() ? Opc64[1] : Opc128[1];
  3193. case 32:
  3194. return VT.is64BitVector() ? Opc64[2] : Opc128[2];
  3195. }
  3196. }
  3197. void ARMDAGToDAGISel::Select(SDNode *N) {
  3198. SDLoc dl(N);
  3199. if (N->isMachineOpcode()) {
  3200. N->setNodeId(-1);
  3201. return; // Already selected.
  3202. }
  3203. switch (N->getOpcode()) {
  3204. default: break;
  3205. case ISD::STORE: {
  3206. // For Thumb1, match an sp-relative store in C++. This is a little
  3207. // unfortunate, but I don't think I can make the chain check work
  3208. // otherwise. (The chain of the store has to be the same as the chain
  3209. // of the CopyFromReg, or else we can't replace the CopyFromReg with
  3210. // a direct reference to "SP".)
  3211. //
  3212. // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
  3213. // a different addressing mode from other four-byte stores.
  3214. //
  3215. // This pattern usually comes up with call arguments.
  3216. StoreSDNode *ST = cast<StoreSDNode>(N);
  3217. SDValue Ptr = ST->getBasePtr();
  3218. if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
  3219. int RHSC = 0;
  3220. if (Ptr.getOpcode() == ISD::ADD &&
  3221. isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
  3222. Ptr = Ptr.getOperand(0);
  3223. if (Ptr.getOpcode() == ISD::CopyFromReg &&
  3224. cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
  3225. Ptr.getOperand(0) == ST->getChain()) {
  3226. SDValue Ops[] = {ST->getValue(),
  3227. CurDAG->getRegister(ARM::SP, MVT::i32),
  3228. CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
  3229. getAL(CurDAG, dl),
  3230. CurDAG->getRegister(0, MVT::i32),
  3231. ST->getChain()};
  3232. MachineSDNode *ResNode =
  3233. CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
  3234. MachineMemOperand *MemOp = ST->getMemOperand();
  3235. CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
  3236. ReplaceNode(N, ResNode);
  3237. return;
  3238. }
  3239. }
  3240. break;
  3241. }
  3242. case ISD::WRITE_REGISTER:
  3243. if (tryWriteRegister(N))
  3244. return;
  3245. break;
  3246. case ISD::READ_REGISTER:
  3247. if (tryReadRegister(N))
  3248. return;
  3249. break;
  3250. case ISD::INLINEASM:
  3251. case ISD::INLINEASM_BR:
  3252. if (tryInlineAsm(N))
  3253. return;
  3254. break;
  3255. case ISD::SUB:
  3256. // Select special operations if SUB node forms integer ABS pattern
  3257. if (tryABSOp(N))
  3258. return;
  3259. // Other cases are autogenerated.
  3260. break;
  3261. case ISD::Constant: {
  3262. unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
  3263. // If we can't materialize the constant we need to use a literal pool
  3264. if (ConstantMaterializationCost(Val, Subtarget) > 2) {
  3265. SDValue CPIdx = CurDAG->getTargetConstantPool(
  3266. ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
  3267. TLI->getPointerTy(CurDAG->getDataLayout()));
  3268. SDNode *ResNode;
  3269. if (Subtarget->isThumb()) {
  3270. SDValue Ops[] = {
  3271. CPIdx,
  3272. getAL(CurDAG, dl),
  3273. CurDAG->getRegister(0, MVT::i32),
  3274. CurDAG->getEntryNode()
  3275. };
  3276. ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
  3277. Ops);
  3278. } else {
  3279. SDValue Ops[] = {
  3280. CPIdx,
  3281. CurDAG->getTargetConstant(0, dl, MVT::i32),
  3282. getAL(CurDAG, dl),
  3283. CurDAG->getRegister(0, MVT::i32),
  3284. CurDAG->getEntryNode()
  3285. };
  3286. ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
  3287. Ops);
  3288. }
  3289. // Annotate the Node with memory operand information so that MachineInstr
  3290. // queries work properly. This e.g. gives the register allocation the
  3291. // required information for rematerialization.
  3292. MachineFunction& MF = CurDAG->getMachineFunction();
  3293. MachineMemOperand *MemOp =
  3294. MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
  3295. MachineMemOperand::MOLoad, 4, Align(4));
  3296. CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
  3297. ReplaceNode(N, ResNode);
  3298. return;
  3299. }
  3300. // Other cases are autogenerated.
  3301. break;
  3302. }
  3303. case ISD::FrameIndex: {
  3304. // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
  3305. int FI = cast<FrameIndexSDNode>(N)->getIndex();
  3306. SDValue TFI = CurDAG->getTargetFrameIndex(
  3307. FI, TLI->getPointerTy(CurDAG->getDataLayout()));
  3308. if (Subtarget->isThumb1Only()) {
  3309. // Set the alignment of the frame object to 4, to avoid having to generate
  3310. // more than one ADD
  3311. MachineFrameInfo &MFI = MF->getFrameInfo();
  3312. if (MFI.getObjectAlign(FI) < Align(4))
  3313. MFI.setObjectAlignment(FI, Align(4));
  3314. CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
  3315. CurDAG->getTargetConstant(0, dl, MVT::i32));
  3316. return;
  3317. } else {
  3318. unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
  3319. ARM::t2ADDri : ARM::ADDri);
  3320. SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
  3321. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
  3322. CurDAG->getRegister(0, MVT::i32) };
  3323. CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
  3324. return;
  3325. }
  3326. }
  3327. case ISD::INSERT_VECTOR_ELT: {
  3328. if (tryInsertVectorElt(N))
  3329. return;
  3330. break;
  3331. }
  3332. case ISD::SRL:
  3333. if (tryV6T2BitfieldExtractOp(N, false))
  3334. return;
  3335. break;
  3336. case ISD::SIGN_EXTEND_INREG:
  3337. case ISD::SRA:
  3338. if (tryV6T2BitfieldExtractOp(N, true))
  3339. return;
  3340. break;
  3341. case ISD::FP_TO_UINT:
  3342. case ISD::FP_TO_SINT:
  3343. case ISD::FP_TO_UINT_SAT:
  3344. case ISD::FP_TO_SINT_SAT:
  3345. if (tryFP_TO_INT(N, dl))
  3346. return;
  3347. break;
  3348. case ISD::FMUL:
  3349. if (tryFMULFixed(N, dl))
  3350. return;
  3351. break;
  3352. case ISD::MUL:
  3353. if (Subtarget->isThumb1Only())
  3354. break;
  3355. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
  3356. unsigned RHSV = C->getZExtValue();
  3357. if (!RHSV) break;
  3358. if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
  3359. unsigned ShImm = Log2_32(RHSV-1);
  3360. if (ShImm >= 32)
  3361. break;
  3362. SDValue V = N->getOperand(0);
  3363. ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
  3364. SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
  3365. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3366. if (Subtarget->isThumb()) {
  3367. SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
  3368. CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
  3369. return;
  3370. } else {
  3371. SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
  3372. Reg0 };
  3373. CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
  3374. return;
  3375. }
  3376. }
  3377. if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
  3378. unsigned ShImm = Log2_32(RHSV+1);
  3379. if (ShImm >= 32)
  3380. break;
  3381. SDValue V = N->getOperand(0);
  3382. ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
  3383. SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
  3384. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  3385. if (Subtarget->isThumb()) {
  3386. SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
  3387. CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
  3388. return;
  3389. } else {
  3390. SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
  3391. Reg0 };
  3392. CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
  3393. return;
  3394. }
  3395. }
  3396. }
  3397. break;
  3398. case ISD::AND: {
  3399. // Check for unsigned bitfield extract
  3400. if (tryV6T2BitfieldExtractOp(N, false))
  3401. return;
  3402. // If an immediate is used in an AND node, it is possible that the immediate
  3403. // can be more optimally materialized when negated. If this is the case we
  3404. // can negate the immediate and use a BIC instead.
  3405. auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
  3406. if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
  3407. uint32_t Imm = (uint32_t) N1C->getZExtValue();
  3408. // In Thumb2 mode, an AND can take a 12-bit immediate. If this
  3409. // immediate can be negated and fit in the immediate operand of
  3410. // a t2BIC, don't do any manual transform here as this can be
  3411. // handled by the generic ISel machinery.
  3412. bool PreferImmediateEncoding =
  3413. Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
  3414. if (!PreferImmediateEncoding &&
  3415. ConstantMaterializationCost(Imm, Subtarget) >
  3416. ConstantMaterializationCost(~Imm, Subtarget)) {
  3417. // The current immediate costs more to materialize than a negated
  3418. // immediate, so negate the immediate and use a BIC.
  3419. SDValue NewImm =
  3420. CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
  3421. // If the new constant didn't exist before, reposition it in the topological
  3422. // ordering so it is just before N. Otherwise, don't touch its location.
  3423. if (NewImm->getNodeId() == -1)
  3424. CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
  3425. if (!Subtarget->hasThumb2()) {
  3426. SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
  3427. N->getOperand(0), NewImm, getAL(CurDAG, dl),
  3428. CurDAG->getRegister(0, MVT::i32)};
  3429. ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
  3430. return;
  3431. } else {
  3432. SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
  3433. CurDAG->getRegister(0, MVT::i32),
  3434. CurDAG->getRegister(0, MVT::i32)};
  3435. ReplaceNode(N,
  3436. CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
  3437. return;
  3438. }
  3439. }
  3440. }
  3441. // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
  3442. // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
  3443. // are entirely contributed by c2 and lower 16-bits are entirely contributed
  3444. // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
  3445. // Select it to: "movt x, ((c1 & 0xffff) >> 16)
  3446. EVT VT = N->getValueType(0);
  3447. if (VT != MVT::i32)
  3448. break;
  3449. unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
  3450. ? ARM::t2MOVTi16
  3451. : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
  3452. if (!Opc)
  3453. break;
  3454. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
  3455. N1C = dyn_cast<ConstantSDNode>(N1);
  3456. if (!N1C)
  3457. break;
  3458. if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
  3459. SDValue N2 = N0.getOperand(1);
  3460. ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
  3461. if (!N2C)
  3462. break;
  3463. unsigned N1CVal = N1C->getZExtValue();
  3464. unsigned N2CVal = N2C->getZExtValue();
  3465. if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
  3466. (N1CVal & 0xffffU) == 0xffffU &&
  3467. (N2CVal & 0xffffU) == 0x0U) {
  3468. SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
  3469. dl, MVT::i32);
  3470. SDValue Ops[] = { N0.getOperand(0), Imm16,
  3471. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
  3472. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
  3473. return;
  3474. }
  3475. }
  3476. break;
  3477. }
  3478. case ARMISD::UMAAL: {
  3479. unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
  3480. SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
  3481. N->getOperand(2), N->getOperand(3),
  3482. getAL(CurDAG, dl),
  3483. CurDAG->getRegister(0, MVT::i32) };
  3484. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
  3485. return;
  3486. }
  3487. case ARMISD::UMLAL:{
  3488. if (Subtarget->isThumb()) {
  3489. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3490. N->getOperand(3), getAL(CurDAG, dl),
  3491. CurDAG->getRegister(0, MVT::i32)};
  3492. ReplaceNode(
  3493. N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
  3494. return;
  3495. }else{
  3496. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3497. N->getOperand(3), getAL(CurDAG, dl),
  3498. CurDAG->getRegister(0, MVT::i32),
  3499. CurDAG->getRegister(0, MVT::i32) };
  3500. ReplaceNode(N, CurDAG->getMachineNode(
  3501. Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
  3502. MVT::i32, MVT::i32, Ops));
  3503. return;
  3504. }
  3505. }
  3506. case ARMISD::SMLAL:{
  3507. if (Subtarget->isThumb()) {
  3508. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3509. N->getOperand(3), getAL(CurDAG, dl),
  3510. CurDAG->getRegister(0, MVT::i32)};
  3511. ReplaceNode(
  3512. N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
  3513. return;
  3514. }else{
  3515. SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
  3516. N->getOperand(3), getAL(CurDAG, dl),
  3517. CurDAG->getRegister(0, MVT::i32),
  3518. CurDAG->getRegister(0, MVT::i32) };
  3519. ReplaceNode(N, CurDAG->getMachineNode(
  3520. Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
  3521. MVT::i32, MVT::i32, Ops));
  3522. return;
  3523. }
  3524. }
  3525. case ARMISD::SUBE: {
  3526. if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
  3527. break;
  3528. // Look for a pattern to match SMMLS
  3529. // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
  3530. if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
  3531. N->getOperand(2).getOpcode() != ARMISD::SUBC ||
  3532. !SDValue(N, 1).use_empty())
  3533. break;
  3534. if (Subtarget->isThumb())
  3535. assert(Subtarget->hasThumb2() &&
  3536. "This pattern should not be generated for Thumb");
  3537. SDValue SmulLoHi = N->getOperand(1);
  3538. SDValue Subc = N->getOperand(2);
  3539. auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
  3540. if (!Zero || Zero->getZExtValue() != 0 ||
  3541. Subc.getOperand(1) != SmulLoHi.getValue(0) ||
  3542. N->getOperand(1) != SmulLoHi.getValue(1) ||
  3543. N->getOperand(2) != Subc.getValue(1))
  3544. break;
  3545. unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
  3546. SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
  3547. N->getOperand(0), getAL(CurDAG, dl),
  3548. CurDAG->getRegister(0, MVT::i32) };
  3549. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
  3550. return;
  3551. }
  3552. case ISD::LOAD: {
  3553. if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
  3554. return;
  3555. if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
  3556. if (tryT2IndexedLoad(N))
  3557. return;
  3558. } else if (Subtarget->isThumb()) {
  3559. if (tryT1IndexedLoad(N))
  3560. return;
  3561. } else if (tryARMIndexedLoad(N))
  3562. return;
  3563. // Other cases are autogenerated.
  3564. break;
  3565. }
  3566. case ISD::MLOAD:
  3567. if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
  3568. return;
  3569. // Other cases are autogenerated.
  3570. break;
  3571. case ARMISD::WLSSETUP: {
  3572. SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
  3573. N->getOperand(0));
  3574. ReplaceUses(N, New);
  3575. CurDAG->RemoveDeadNode(N);
  3576. return;
  3577. }
  3578. case ARMISD::WLS: {
  3579. SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
  3580. N->getOperand(1), N->getOperand(2),
  3581. N->getOperand(0));
  3582. ReplaceUses(N, New);
  3583. CurDAG->RemoveDeadNode(N);
  3584. return;
  3585. }
  3586. case ARMISD::LE: {
  3587. SDValue Ops[] = { N->getOperand(1),
  3588. N->getOperand(2),
  3589. N->getOperand(0) };
  3590. unsigned Opc = ARM::t2LoopEnd;
  3591. SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
  3592. ReplaceUses(N, New);
  3593. CurDAG->RemoveDeadNode(N);
  3594. return;
  3595. }
  3596. case ARMISD::LDRD: {
  3597. if (Subtarget->isThumb2())
  3598. break; // TableGen handles isel in this case.
  3599. SDValue Base, RegOffset, ImmOffset;
  3600. const SDValue &Chain = N->getOperand(0);
  3601. const SDValue &Addr = N->getOperand(1);
  3602. SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
  3603. if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
  3604. // The register-offset variant of LDRD mandates that the register
  3605. // allocated to RegOffset is not reused in any of the remaining operands.
  3606. // This restriction is currently not enforced. Therefore emitting this
  3607. // variant is explicitly avoided.
  3608. Base = Addr;
  3609. RegOffset = CurDAG->getRegister(0, MVT::i32);
  3610. }
  3611. SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
  3612. SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
  3613. {MVT::Untyped, MVT::Other}, Ops);
  3614. SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
  3615. SDValue(New, 0));
  3616. SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
  3617. SDValue(New, 0));
  3618. transferMemOperands(N, New);
  3619. ReplaceUses(SDValue(N, 0), Lo);
  3620. ReplaceUses(SDValue(N, 1), Hi);
  3621. ReplaceUses(SDValue(N, 2), SDValue(New, 1));
  3622. CurDAG->RemoveDeadNode(N);
  3623. return;
  3624. }
  3625. case ARMISD::STRD: {
  3626. if (Subtarget->isThumb2())
  3627. break; // TableGen handles isel in this case.
  3628. SDValue Base, RegOffset, ImmOffset;
  3629. const SDValue &Chain = N->getOperand(0);
  3630. const SDValue &Addr = N->getOperand(3);
  3631. SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
  3632. if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
  3633. // The register-offset variant of STRD mandates that the register
  3634. // allocated to RegOffset is not reused in any of the remaining operands.
  3635. // This restriction is currently not enforced. Therefore emitting this
  3636. // variant is explicitly avoided.
  3637. Base = Addr;
  3638. RegOffset = CurDAG->getRegister(0, MVT::i32);
  3639. }
  3640. SDNode *RegPair =
  3641. createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
  3642. SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
  3643. SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
  3644. transferMemOperands(N, New);
  3645. ReplaceUses(SDValue(N, 0), SDValue(New, 0));
  3646. CurDAG->RemoveDeadNode(N);
  3647. return;
  3648. }
  3649. case ARMISD::LOOP_DEC: {
  3650. SDValue Ops[] = { N->getOperand(1),
  3651. N->getOperand(2),
  3652. N->getOperand(0) };
  3653. SDNode *Dec =
  3654. CurDAG->getMachineNode(ARM::t2LoopDec, dl,
  3655. CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
  3656. ReplaceUses(N, Dec);
  3657. CurDAG->RemoveDeadNode(N);
  3658. return;
  3659. }
  3660. case ARMISD::BRCOND: {
  3661. // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
  3662. // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
  3663. // Pattern complexity = 6 cost = 1 size = 0
  3664. // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
  3665. // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
  3666. // Pattern complexity = 6 cost = 1 size = 0
  3667. // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
  3668. // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
  3669. // Pattern complexity = 6 cost = 1 size = 0
  3670. unsigned Opc = Subtarget->isThumb() ?
  3671. ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
  3672. SDValue Chain = N->getOperand(0);
  3673. SDValue N1 = N->getOperand(1);
  3674. SDValue N2 = N->getOperand(2);
  3675. SDValue N3 = N->getOperand(3);
  3676. SDValue InFlag = N->getOperand(4);
  3677. assert(N1.getOpcode() == ISD::BasicBlock);
  3678. assert(N2.getOpcode() == ISD::Constant);
  3679. assert(N3.getOpcode() == ISD::Register);
  3680. unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
  3681. if (InFlag.getOpcode() == ARMISD::CMPZ) {
  3682. if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
  3683. SDValue Int = InFlag.getOperand(0);
  3684. uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
  3685. // Handle low-overhead loops.
  3686. if (ID == Intrinsic::loop_decrement_reg) {
  3687. SDValue Elements = Int.getOperand(2);
  3688. SDValue Size = CurDAG->getTargetConstant(
  3689. cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl,
  3690. MVT::i32);
  3691. SDValue Args[] = { Elements, Size, Int.getOperand(0) };
  3692. SDNode *LoopDec =
  3693. CurDAG->getMachineNode(ARM::t2LoopDec, dl,
  3694. CurDAG->getVTList(MVT::i32, MVT::Other),
  3695. Args);
  3696. ReplaceUses(Int.getNode(), LoopDec);
  3697. SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
  3698. SDNode *LoopEnd =
  3699. CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
  3700. ReplaceUses(N, LoopEnd);
  3701. CurDAG->RemoveDeadNode(N);
  3702. CurDAG->RemoveDeadNode(InFlag.getNode());
  3703. CurDAG->RemoveDeadNode(Int.getNode());
  3704. return;
  3705. }
  3706. }
  3707. bool SwitchEQNEToPLMI;
  3708. SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
  3709. InFlag = N->getOperand(4);
  3710. if (SwitchEQNEToPLMI) {
  3711. switch ((ARMCC::CondCodes)CC) {
  3712. default: llvm_unreachable("CMPZ must be either NE or EQ!");
  3713. case ARMCC::NE:
  3714. CC = (unsigned)ARMCC::MI;
  3715. break;
  3716. case ARMCC::EQ:
  3717. CC = (unsigned)ARMCC::PL;
  3718. break;
  3719. }
  3720. }
  3721. }
  3722. SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
  3723. SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
  3724. SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
  3725. MVT::Glue, Ops);
  3726. Chain = SDValue(ResNode, 0);
  3727. if (N->getNumValues() == 2) {
  3728. InFlag = SDValue(ResNode, 1);
  3729. ReplaceUses(SDValue(N, 1), InFlag);
  3730. }
  3731. ReplaceUses(SDValue(N, 0),
  3732. SDValue(Chain.getNode(), Chain.getResNo()));
  3733. CurDAG->RemoveDeadNode(N);
  3734. return;
  3735. }
  3736. case ARMISD::CMPZ: {
  3737. // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
  3738. // This allows us to avoid materializing the expensive negative constant.
  3739. // The CMPZ #0 is useless and will be peepholed away but we need to keep it
  3740. // for its glue output.
  3741. SDValue X = N->getOperand(0);
  3742. auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
  3743. if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
  3744. int64_t Addend = -C->getSExtValue();
  3745. SDNode *Add = nullptr;
  3746. // ADDS can be better than CMN if the immediate fits in a
  3747. // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
  3748. // Outside that range we can just use a CMN which is 32-bit but has a
  3749. // 12-bit immediate range.
  3750. if (Addend < 1<<8) {
  3751. if (Subtarget->isThumb2()) {
  3752. SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
  3753. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
  3754. CurDAG->getRegister(0, MVT::i32) };
  3755. Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
  3756. } else {
  3757. unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
  3758. SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
  3759. CurDAG->getTargetConstant(Addend, dl, MVT::i32),
  3760. getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
  3761. Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
  3762. }
  3763. }
  3764. if (Add) {
  3765. SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
  3766. CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
  3767. }
  3768. }
  3769. // Other cases are autogenerated.
  3770. break;
  3771. }
  3772. case ARMISD::CMOV: {
  3773. SDValue InFlag = N->getOperand(4);
  3774. if (InFlag.getOpcode() == ARMISD::CMPZ) {
  3775. bool SwitchEQNEToPLMI;
  3776. SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
  3777. if (SwitchEQNEToPLMI) {
  3778. SDValue ARMcc = N->getOperand(2);
  3779. ARMCC::CondCodes CC =
  3780. (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
  3781. switch (CC) {
  3782. default: llvm_unreachable("CMPZ must be either NE or EQ!");
  3783. case ARMCC::NE:
  3784. CC = ARMCC::MI;
  3785. break;
  3786. case ARMCC::EQ:
  3787. CC = ARMCC::PL;
  3788. break;
  3789. }
  3790. SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
  3791. SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
  3792. N->getOperand(3), N->getOperand(4)};
  3793. CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
  3794. }
  3795. }
  3796. // Other cases are autogenerated.
  3797. break;
  3798. }
  3799. case ARMISD::VZIP: {
  3800. EVT VT = N->getValueType(0);
  3801. // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  3802. unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
  3803. unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
  3804. unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
  3805. SDValue Pred = getAL(CurDAG, dl);
  3806. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  3807. SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
  3808. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
  3809. return;
  3810. }
  3811. case ARMISD::VUZP: {
  3812. EVT VT = N->getValueType(0);
  3813. // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
  3814. unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
  3815. unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
  3816. unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
  3817. SDValue Pred = getAL(CurDAG, dl);
  3818. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  3819. SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
  3820. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
  3821. return;
  3822. }
  3823. case ARMISD::VTRN: {
  3824. EVT VT = N->getValueType(0);
  3825. unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
  3826. unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
  3827. unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
  3828. SDValue Pred = getAL(CurDAG, dl);
  3829. SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
  3830. SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
  3831. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
  3832. return;
  3833. }
  3834. case ARMISD::BUILD_VECTOR: {
  3835. EVT VecVT = N->getValueType(0);
  3836. EVT EltVT = VecVT.getVectorElementType();
  3837. unsigned NumElts = VecVT.getVectorNumElements();
  3838. if (EltVT == MVT::f64) {
  3839. assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
  3840. ReplaceNode(
  3841. N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
  3842. return;
  3843. }
  3844. assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
  3845. if (NumElts == 2) {
  3846. ReplaceNode(
  3847. N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
  3848. return;
  3849. }
  3850. assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
  3851. ReplaceNode(N,
  3852. createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
  3853. N->getOperand(2), N->getOperand(3)));
  3854. return;
  3855. }
  3856. case ARMISD::VLD1DUP: {
  3857. static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
  3858. ARM::VLD1DUPd32 };
  3859. static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
  3860. ARM::VLD1DUPq32 };
  3861. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
  3862. return;
  3863. }
  3864. case ARMISD::VLD2DUP: {
  3865. static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
  3866. ARM::VLD2DUPd32 };
  3867. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
  3868. return;
  3869. }
  3870. case ARMISD::VLD3DUP: {
  3871. static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
  3872. ARM::VLD3DUPd16Pseudo,
  3873. ARM::VLD3DUPd32Pseudo };
  3874. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
  3875. return;
  3876. }
  3877. case ARMISD::VLD4DUP: {
  3878. static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
  3879. ARM::VLD4DUPd16Pseudo,
  3880. ARM::VLD4DUPd32Pseudo };
  3881. SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
  3882. return;
  3883. }
  3884. case ARMISD::VLD1DUP_UPD: {
  3885. static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
  3886. ARM::VLD1DUPd16wb_fixed,
  3887. ARM::VLD1DUPd32wb_fixed };
  3888. static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
  3889. ARM::VLD1DUPq16wb_fixed,
  3890. ARM::VLD1DUPq32wb_fixed };
  3891. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
  3892. return;
  3893. }
  3894. case ARMISD::VLD2DUP_UPD: {
  3895. static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
  3896. ARM::VLD2DUPd16wb_fixed,
  3897. ARM::VLD2DUPd32wb_fixed,
  3898. ARM::VLD1q64wb_fixed };
  3899. static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
  3900. ARM::VLD2DUPq16EvenPseudo,
  3901. ARM::VLD2DUPq32EvenPseudo };
  3902. static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
  3903. ARM::VLD2DUPq16OddPseudoWB_fixed,
  3904. ARM::VLD2DUPq32OddPseudoWB_fixed };
  3905. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
  3906. return;
  3907. }
  3908. case ARMISD::VLD3DUP_UPD: {
  3909. static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
  3910. ARM::VLD3DUPd16Pseudo_UPD,
  3911. ARM::VLD3DUPd32Pseudo_UPD,
  3912. ARM::VLD1d64TPseudoWB_fixed };
  3913. static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
  3914. ARM::VLD3DUPq16EvenPseudo,
  3915. ARM::VLD3DUPq32EvenPseudo };
  3916. static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
  3917. ARM::VLD3DUPq16OddPseudo_UPD,
  3918. ARM::VLD3DUPq32OddPseudo_UPD };
  3919. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  3920. return;
  3921. }
  3922. case ARMISD::VLD4DUP_UPD: {
  3923. static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
  3924. ARM::VLD4DUPd16Pseudo_UPD,
  3925. ARM::VLD4DUPd32Pseudo_UPD,
  3926. ARM::VLD1d64QPseudoWB_fixed };
  3927. static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
  3928. ARM::VLD4DUPq16EvenPseudo,
  3929. ARM::VLD4DUPq32EvenPseudo };
  3930. static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
  3931. ARM::VLD4DUPq16OddPseudo_UPD,
  3932. ARM::VLD4DUPq32OddPseudo_UPD };
  3933. SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  3934. return;
  3935. }
  3936. case ARMISD::VLD1_UPD: {
  3937. static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
  3938. ARM::VLD1d16wb_fixed,
  3939. ARM::VLD1d32wb_fixed,
  3940. ARM::VLD1d64wb_fixed };
  3941. static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
  3942. ARM::VLD1q16wb_fixed,
  3943. ARM::VLD1q32wb_fixed,
  3944. ARM::VLD1q64wb_fixed };
  3945. SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
  3946. return;
  3947. }
  3948. case ARMISD::VLD2_UPD: {
  3949. if (Subtarget->hasNEON()) {
  3950. static const uint16_t DOpcodes[] = {
  3951. ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
  3952. ARM::VLD1q64wb_fixed};
  3953. static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
  3954. ARM::VLD2q16PseudoWB_fixed,
  3955. ARM::VLD2q32PseudoWB_fixed};
  3956. SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
  3957. } else {
  3958. static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
  3959. ARM::MVE_VLD21_8_wb};
  3960. static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
  3961. ARM::MVE_VLD21_16_wb};
  3962. static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
  3963. ARM::MVE_VLD21_32_wb};
  3964. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  3965. SelectMVE_VLD(N, 2, Opcodes, true);
  3966. }
  3967. return;
  3968. }
  3969. case ARMISD::VLD3_UPD: {
  3970. static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
  3971. ARM::VLD3d16Pseudo_UPD,
  3972. ARM::VLD3d32Pseudo_UPD,
  3973. ARM::VLD1d64TPseudoWB_fixed};
  3974. static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
  3975. ARM::VLD3q16Pseudo_UPD,
  3976. ARM::VLD3q32Pseudo_UPD };
  3977. static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
  3978. ARM::VLD3q16oddPseudo_UPD,
  3979. ARM::VLD3q32oddPseudo_UPD };
  3980. SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  3981. return;
  3982. }
  3983. case ARMISD::VLD4_UPD: {
  3984. if (Subtarget->hasNEON()) {
  3985. static const uint16_t DOpcodes[] = {
  3986. ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
  3987. ARM::VLD1d64QPseudoWB_fixed};
  3988. static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
  3989. ARM::VLD4q16Pseudo_UPD,
  3990. ARM::VLD4q32Pseudo_UPD};
  3991. static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
  3992. ARM::VLD4q16oddPseudo_UPD,
  3993. ARM::VLD4q32oddPseudo_UPD};
  3994. SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  3995. } else {
  3996. static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
  3997. ARM::MVE_VLD42_8,
  3998. ARM::MVE_VLD43_8_wb};
  3999. static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
  4000. ARM::MVE_VLD42_16,
  4001. ARM::MVE_VLD43_16_wb};
  4002. static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
  4003. ARM::MVE_VLD42_32,
  4004. ARM::MVE_VLD43_32_wb};
  4005. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  4006. SelectMVE_VLD(N, 4, Opcodes, true);
  4007. }
  4008. return;
  4009. }
  4010. case ARMISD::VLD1x2_UPD: {
  4011. if (Subtarget->hasNEON()) {
  4012. static const uint16_t DOpcodes[] = {
  4013. ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
  4014. ARM::VLD1q64wb_fixed};
  4015. static const uint16_t QOpcodes[] = {
  4016. ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
  4017. ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
  4018. SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
  4019. return;
  4020. }
  4021. break;
  4022. }
  4023. case ARMISD::VLD1x3_UPD: {
  4024. if (Subtarget->hasNEON()) {
  4025. static const uint16_t DOpcodes[] = {
  4026. ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
  4027. ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
  4028. static const uint16_t QOpcodes0[] = {
  4029. ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
  4030. ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
  4031. static const uint16_t QOpcodes1[] = {
  4032. ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
  4033. ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
  4034. SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4035. return;
  4036. }
  4037. break;
  4038. }
  4039. case ARMISD::VLD1x4_UPD: {
  4040. if (Subtarget->hasNEON()) {
  4041. static const uint16_t DOpcodes[] = {
  4042. ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
  4043. ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
  4044. static const uint16_t QOpcodes0[] = {
  4045. ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
  4046. ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
  4047. static const uint16_t QOpcodes1[] = {
  4048. ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
  4049. ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
  4050. SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4051. return;
  4052. }
  4053. break;
  4054. }
  4055. case ARMISD::VLD2LN_UPD: {
  4056. static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
  4057. ARM::VLD2LNd16Pseudo_UPD,
  4058. ARM::VLD2LNd32Pseudo_UPD };
  4059. static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
  4060. ARM::VLD2LNq32Pseudo_UPD };
  4061. SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
  4062. return;
  4063. }
  4064. case ARMISD::VLD3LN_UPD: {
  4065. static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
  4066. ARM::VLD3LNd16Pseudo_UPD,
  4067. ARM::VLD3LNd32Pseudo_UPD };
  4068. static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
  4069. ARM::VLD3LNq32Pseudo_UPD };
  4070. SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
  4071. return;
  4072. }
  4073. case ARMISD::VLD4LN_UPD: {
  4074. static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
  4075. ARM::VLD4LNd16Pseudo_UPD,
  4076. ARM::VLD4LNd32Pseudo_UPD };
  4077. static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
  4078. ARM::VLD4LNq32Pseudo_UPD };
  4079. SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
  4080. return;
  4081. }
  4082. case ARMISD::VST1_UPD: {
  4083. static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
  4084. ARM::VST1d16wb_fixed,
  4085. ARM::VST1d32wb_fixed,
  4086. ARM::VST1d64wb_fixed };
  4087. static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
  4088. ARM::VST1q16wb_fixed,
  4089. ARM::VST1q32wb_fixed,
  4090. ARM::VST1q64wb_fixed };
  4091. SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
  4092. return;
  4093. }
  4094. case ARMISD::VST2_UPD: {
  4095. if (Subtarget->hasNEON()) {
  4096. static const uint16_t DOpcodes[] = {
  4097. ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
  4098. ARM::VST1q64wb_fixed};
  4099. static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
  4100. ARM::VST2q16PseudoWB_fixed,
  4101. ARM::VST2q32PseudoWB_fixed};
  4102. SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
  4103. return;
  4104. }
  4105. break;
  4106. }
  4107. case ARMISD::VST3_UPD: {
  4108. static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
  4109. ARM::VST3d16Pseudo_UPD,
  4110. ARM::VST3d32Pseudo_UPD,
  4111. ARM::VST1d64TPseudoWB_fixed};
  4112. static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
  4113. ARM::VST3q16Pseudo_UPD,
  4114. ARM::VST3q32Pseudo_UPD };
  4115. static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
  4116. ARM::VST3q16oddPseudo_UPD,
  4117. ARM::VST3q32oddPseudo_UPD };
  4118. SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4119. return;
  4120. }
  4121. case ARMISD::VST4_UPD: {
  4122. if (Subtarget->hasNEON()) {
  4123. static const uint16_t DOpcodes[] = {
  4124. ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
  4125. ARM::VST1d64QPseudoWB_fixed};
  4126. static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
  4127. ARM::VST4q16Pseudo_UPD,
  4128. ARM::VST4q32Pseudo_UPD};
  4129. static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
  4130. ARM::VST4q16oddPseudo_UPD,
  4131. ARM::VST4q32oddPseudo_UPD};
  4132. SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4133. return;
  4134. }
  4135. break;
  4136. }
  4137. case ARMISD::VST1x2_UPD: {
  4138. if (Subtarget->hasNEON()) {
  4139. static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
  4140. ARM::VST1q16wb_fixed,
  4141. ARM::VST1q32wb_fixed,
  4142. ARM::VST1q64wb_fixed};
  4143. static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
  4144. ARM::VST1d16QPseudoWB_fixed,
  4145. ARM::VST1d32QPseudoWB_fixed,
  4146. ARM::VST1d64QPseudoWB_fixed };
  4147. SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
  4148. return;
  4149. }
  4150. break;
  4151. }
  4152. case ARMISD::VST1x3_UPD: {
  4153. if (Subtarget->hasNEON()) {
  4154. static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
  4155. ARM::VST1d16TPseudoWB_fixed,
  4156. ARM::VST1d32TPseudoWB_fixed,
  4157. ARM::VST1d64TPseudoWB_fixed };
  4158. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
  4159. ARM::VST1q16LowTPseudo_UPD,
  4160. ARM::VST1q32LowTPseudo_UPD,
  4161. ARM::VST1q64LowTPseudo_UPD };
  4162. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
  4163. ARM::VST1q16HighTPseudo_UPD,
  4164. ARM::VST1q32HighTPseudo_UPD,
  4165. ARM::VST1q64HighTPseudo_UPD };
  4166. SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4167. return;
  4168. }
  4169. break;
  4170. }
  4171. case ARMISD::VST1x4_UPD: {
  4172. if (Subtarget->hasNEON()) {
  4173. static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
  4174. ARM::VST1d16QPseudoWB_fixed,
  4175. ARM::VST1d32QPseudoWB_fixed,
  4176. ARM::VST1d64QPseudoWB_fixed };
  4177. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
  4178. ARM::VST1q16LowQPseudo_UPD,
  4179. ARM::VST1q32LowQPseudo_UPD,
  4180. ARM::VST1q64LowQPseudo_UPD };
  4181. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
  4182. ARM::VST1q16HighQPseudo_UPD,
  4183. ARM::VST1q32HighQPseudo_UPD,
  4184. ARM::VST1q64HighQPseudo_UPD };
  4185. SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4186. return;
  4187. }
  4188. break;
  4189. }
  4190. case ARMISD::VST2LN_UPD: {
  4191. static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
  4192. ARM::VST2LNd16Pseudo_UPD,
  4193. ARM::VST2LNd32Pseudo_UPD };
  4194. static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
  4195. ARM::VST2LNq32Pseudo_UPD };
  4196. SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
  4197. return;
  4198. }
  4199. case ARMISD::VST3LN_UPD: {
  4200. static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
  4201. ARM::VST3LNd16Pseudo_UPD,
  4202. ARM::VST3LNd32Pseudo_UPD };
  4203. static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
  4204. ARM::VST3LNq32Pseudo_UPD };
  4205. SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
  4206. return;
  4207. }
  4208. case ARMISD::VST4LN_UPD: {
  4209. static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
  4210. ARM::VST4LNd16Pseudo_UPD,
  4211. ARM::VST4LNd32Pseudo_UPD };
  4212. static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
  4213. ARM::VST4LNq32Pseudo_UPD };
  4214. SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
  4215. return;
  4216. }
  4217. case ISD::INTRINSIC_VOID:
  4218. case ISD::INTRINSIC_W_CHAIN: {
  4219. unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
  4220. switch (IntNo) {
  4221. default:
  4222. break;
  4223. case Intrinsic::arm_mrrc:
  4224. case Intrinsic::arm_mrrc2: {
  4225. SDLoc dl(N);
  4226. SDValue Chain = N->getOperand(0);
  4227. unsigned Opc;
  4228. if (Subtarget->isThumb())
  4229. Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
  4230. else
  4231. Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
  4232. SmallVector<SDValue, 5> Ops;
  4233. Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
  4234. Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
  4235. Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
  4236. // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
  4237. // instruction will always be '1111' but it is possible in assembly language to specify
  4238. // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
  4239. if (Opc != ARM::MRRC2) {
  4240. Ops.push_back(getAL(CurDAG, dl));
  4241. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  4242. }
  4243. Ops.push_back(Chain);
  4244. // Writes to two registers.
  4245. const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
  4246. ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
  4247. return;
  4248. }
  4249. case Intrinsic::arm_ldaexd:
  4250. case Intrinsic::arm_ldrexd: {
  4251. SDLoc dl(N);
  4252. SDValue Chain = N->getOperand(0);
  4253. SDValue MemAddr = N->getOperand(2);
  4254. bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
  4255. bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
  4256. unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
  4257. : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
  4258. // arm_ldrexd returns a i64 value in {i32, i32}
  4259. std::vector<EVT> ResTys;
  4260. if (isThumb) {
  4261. ResTys.push_back(MVT::i32);
  4262. ResTys.push_back(MVT::i32);
  4263. } else
  4264. ResTys.push_back(MVT::Untyped);
  4265. ResTys.push_back(MVT::Other);
  4266. // Place arguments in the right order.
  4267. SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
  4268. CurDAG->getRegister(0, MVT::i32), Chain};
  4269. SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
  4270. // Transfer memoperands.
  4271. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  4272. CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
  4273. // Remap uses.
  4274. SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
  4275. if (!SDValue(N, 0).use_empty()) {
  4276. SDValue Result;
  4277. if (isThumb)
  4278. Result = SDValue(Ld, 0);
  4279. else {
  4280. SDValue SubRegIdx =
  4281. CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
  4282. SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  4283. dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
  4284. Result = SDValue(ResNode,0);
  4285. }
  4286. ReplaceUses(SDValue(N, 0), Result);
  4287. }
  4288. if (!SDValue(N, 1).use_empty()) {
  4289. SDValue Result;
  4290. if (isThumb)
  4291. Result = SDValue(Ld, 1);
  4292. else {
  4293. SDValue SubRegIdx =
  4294. CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
  4295. SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
  4296. dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
  4297. Result = SDValue(ResNode,0);
  4298. }
  4299. ReplaceUses(SDValue(N, 1), Result);
  4300. }
  4301. ReplaceUses(SDValue(N, 2), OutChain);
  4302. CurDAG->RemoveDeadNode(N);
  4303. return;
  4304. }
  4305. case Intrinsic::arm_stlexd:
  4306. case Intrinsic::arm_strexd: {
  4307. SDLoc dl(N);
  4308. SDValue Chain = N->getOperand(0);
  4309. SDValue Val0 = N->getOperand(2);
  4310. SDValue Val1 = N->getOperand(3);
  4311. SDValue MemAddr = N->getOperand(4);
  4312. // Store exclusive double return a i32 value which is the return status
  4313. // of the issued store.
  4314. const EVT ResTys[] = {MVT::i32, MVT::Other};
  4315. bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
  4316. // Place arguments in the right order.
  4317. SmallVector<SDValue, 7> Ops;
  4318. if (isThumb) {
  4319. Ops.push_back(Val0);
  4320. Ops.push_back(Val1);
  4321. } else
  4322. // arm_strexd uses GPRPair.
  4323. Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
  4324. Ops.push_back(MemAddr);
  4325. Ops.push_back(getAL(CurDAG, dl));
  4326. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  4327. Ops.push_back(Chain);
  4328. bool IsRelease = IntNo == Intrinsic::arm_stlexd;
  4329. unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
  4330. : (IsRelease ? ARM::STLEXD : ARM::STREXD);
  4331. SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
  4332. // Transfer memoperands.
  4333. MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
  4334. CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
  4335. ReplaceNode(N, St);
  4336. return;
  4337. }
  4338. case Intrinsic::arm_neon_vld1: {
  4339. static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
  4340. ARM::VLD1d32, ARM::VLD1d64 };
  4341. static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
  4342. ARM::VLD1q32, ARM::VLD1q64};
  4343. SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
  4344. return;
  4345. }
  4346. case Intrinsic::arm_neon_vld1x2: {
  4347. static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
  4348. ARM::VLD1q32, ARM::VLD1q64 };
  4349. static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
  4350. ARM::VLD1d16QPseudo,
  4351. ARM::VLD1d32QPseudo,
  4352. ARM::VLD1d64QPseudo };
  4353. SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4354. return;
  4355. }
  4356. case Intrinsic::arm_neon_vld1x3: {
  4357. static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
  4358. ARM::VLD1d16TPseudo,
  4359. ARM::VLD1d32TPseudo,
  4360. ARM::VLD1d64TPseudo };
  4361. static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
  4362. ARM::VLD1q16LowTPseudo_UPD,
  4363. ARM::VLD1q32LowTPseudo_UPD,
  4364. ARM::VLD1q64LowTPseudo_UPD };
  4365. static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
  4366. ARM::VLD1q16HighTPseudo,
  4367. ARM::VLD1q32HighTPseudo,
  4368. ARM::VLD1q64HighTPseudo };
  4369. SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4370. return;
  4371. }
  4372. case Intrinsic::arm_neon_vld1x4: {
  4373. static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
  4374. ARM::VLD1d16QPseudo,
  4375. ARM::VLD1d32QPseudo,
  4376. ARM::VLD1d64QPseudo };
  4377. static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
  4378. ARM::VLD1q16LowQPseudo_UPD,
  4379. ARM::VLD1q32LowQPseudo_UPD,
  4380. ARM::VLD1q64LowQPseudo_UPD };
  4381. static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
  4382. ARM::VLD1q16HighQPseudo,
  4383. ARM::VLD1q32HighQPseudo,
  4384. ARM::VLD1q64HighQPseudo };
  4385. SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4386. return;
  4387. }
  4388. case Intrinsic::arm_neon_vld2: {
  4389. static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
  4390. ARM::VLD2d32, ARM::VLD1q64 };
  4391. static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
  4392. ARM::VLD2q32Pseudo };
  4393. SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4394. return;
  4395. }
  4396. case Intrinsic::arm_neon_vld3: {
  4397. static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
  4398. ARM::VLD3d16Pseudo,
  4399. ARM::VLD3d32Pseudo,
  4400. ARM::VLD1d64TPseudo };
  4401. static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
  4402. ARM::VLD3q16Pseudo_UPD,
  4403. ARM::VLD3q32Pseudo_UPD };
  4404. static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
  4405. ARM::VLD3q16oddPseudo,
  4406. ARM::VLD3q32oddPseudo };
  4407. SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4408. return;
  4409. }
  4410. case Intrinsic::arm_neon_vld4: {
  4411. static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
  4412. ARM::VLD4d16Pseudo,
  4413. ARM::VLD4d32Pseudo,
  4414. ARM::VLD1d64QPseudo };
  4415. static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
  4416. ARM::VLD4q16Pseudo_UPD,
  4417. ARM::VLD4q32Pseudo_UPD };
  4418. static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
  4419. ARM::VLD4q16oddPseudo,
  4420. ARM::VLD4q32oddPseudo };
  4421. SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4422. return;
  4423. }
  4424. case Intrinsic::arm_neon_vld2dup: {
  4425. static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
  4426. ARM::VLD2DUPd32, ARM::VLD1q64 };
  4427. static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
  4428. ARM::VLD2DUPq16EvenPseudo,
  4429. ARM::VLD2DUPq32EvenPseudo };
  4430. static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
  4431. ARM::VLD2DUPq16OddPseudo,
  4432. ARM::VLD2DUPq32OddPseudo };
  4433. SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
  4434. DOpcodes, QOpcodes0, QOpcodes1);
  4435. return;
  4436. }
  4437. case Intrinsic::arm_neon_vld3dup: {
  4438. static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
  4439. ARM::VLD3DUPd16Pseudo,
  4440. ARM::VLD3DUPd32Pseudo,
  4441. ARM::VLD1d64TPseudo };
  4442. static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
  4443. ARM::VLD3DUPq16EvenPseudo,
  4444. ARM::VLD3DUPq32EvenPseudo };
  4445. static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
  4446. ARM::VLD3DUPq16OddPseudo,
  4447. ARM::VLD3DUPq32OddPseudo };
  4448. SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
  4449. DOpcodes, QOpcodes0, QOpcodes1);
  4450. return;
  4451. }
  4452. case Intrinsic::arm_neon_vld4dup: {
  4453. static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
  4454. ARM::VLD4DUPd16Pseudo,
  4455. ARM::VLD4DUPd32Pseudo,
  4456. ARM::VLD1d64QPseudo };
  4457. static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
  4458. ARM::VLD4DUPq16EvenPseudo,
  4459. ARM::VLD4DUPq32EvenPseudo };
  4460. static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
  4461. ARM::VLD4DUPq16OddPseudo,
  4462. ARM::VLD4DUPq32OddPseudo };
  4463. SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
  4464. DOpcodes, QOpcodes0, QOpcodes1);
  4465. return;
  4466. }
  4467. case Intrinsic::arm_neon_vld2lane: {
  4468. static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
  4469. ARM::VLD2LNd16Pseudo,
  4470. ARM::VLD2LNd32Pseudo };
  4471. static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
  4472. ARM::VLD2LNq32Pseudo };
  4473. SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
  4474. return;
  4475. }
  4476. case Intrinsic::arm_neon_vld3lane: {
  4477. static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
  4478. ARM::VLD3LNd16Pseudo,
  4479. ARM::VLD3LNd32Pseudo };
  4480. static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
  4481. ARM::VLD3LNq32Pseudo };
  4482. SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
  4483. return;
  4484. }
  4485. case Intrinsic::arm_neon_vld4lane: {
  4486. static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
  4487. ARM::VLD4LNd16Pseudo,
  4488. ARM::VLD4LNd32Pseudo };
  4489. static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
  4490. ARM::VLD4LNq32Pseudo };
  4491. SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
  4492. return;
  4493. }
  4494. case Intrinsic::arm_neon_vst1: {
  4495. static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
  4496. ARM::VST1d32, ARM::VST1d64 };
  4497. static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
  4498. ARM::VST1q32, ARM::VST1q64 };
  4499. SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
  4500. return;
  4501. }
  4502. case Intrinsic::arm_neon_vst1x2: {
  4503. static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
  4504. ARM::VST1q32, ARM::VST1q64 };
  4505. static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
  4506. ARM::VST1d16QPseudo,
  4507. ARM::VST1d32QPseudo,
  4508. ARM::VST1d64QPseudo };
  4509. SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4510. return;
  4511. }
  4512. case Intrinsic::arm_neon_vst1x3: {
  4513. static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
  4514. ARM::VST1d16TPseudo,
  4515. ARM::VST1d32TPseudo,
  4516. ARM::VST1d64TPseudo };
  4517. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
  4518. ARM::VST1q16LowTPseudo_UPD,
  4519. ARM::VST1q32LowTPseudo_UPD,
  4520. ARM::VST1q64LowTPseudo_UPD };
  4521. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
  4522. ARM::VST1q16HighTPseudo,
  4523. ARM::VST1q32HighTPseudo,
  4524. ARM::VST1q64HighTPseudo };
  4525. SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4526. return;
  4527. }
  4528. case Intrinsic::arm_neon_vst1x4: {
  4529. static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
  4530. ARM::VST1d16QPseudo,
  4531. ARM::VST1d32QPseudo,
  4532. ARM::VST1d64QPseudo };
  4533. static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
  4534. ARM::VST1q16LowQPseudo_UPD,
  4535. ARM::VST1q32LowQPseudo_UPD,
  4536. ARM::VST1q64LowQPseudo_UPD };
  4537. static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
  4538. ARM::VST1q16HighQPseudo,
  4539. ARM::VST1q32HighQPseudo,
  4540. ARM::VST1q64HighQPseudo };
  4541. SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4542. return;
  4543. }
  4544. case Intrinsic::arm_neon_vst2: {
  4545. static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
  4546. ARM::VST2d32, ARM::VST1q64 };
  4547. static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
  4548. ARM::VST2q32Pseudo };
  4549. SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
  4550. return;
  4551. }
  4552. case Intrinsic::arm_neon_vst3: {
  4553. static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
  4554. ARM::VST3d16Pseudo,
  4555. ARM::VST3d32Pseudo,
  4556. ARM::VST1d64TPseudo };
  4557. static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
  4558. ARM::VST3q16Pseudo_UPD,
  4559. ARM::VST3q32Pseudo_UPD };
  4560. static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
  4561. ARM::VST3q16oddPseudo,
  4562. ARM::VST3q32oddPseudo };
  4563. SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
  4564. return;
  4565. }
  4566. case Intrinsic::arm_neon_vst4: {
  4567. static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
  4568. ARM::VST4d16Pseudo,
  4569. ARM::VST4d32Pseudo,
  4570. ARM::VST1d64QPseudo };
  4571. static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
  4572. ARM::VST4q16Pseudo_UPD,
  4573. ARM::VST4q32Pseudo_UPD };
  4574. static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
  4575. ARM::VST4q16oddPseudo,
  4576. ARM::VST4q32oddPseudo };
  4577. SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
  4578. return;
  4579. }
  4580. case Intrinsic::arm_neon_vst2lane: {
  4581. static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
  4582. ARM::VST2LNd16Pseudo,
  4583. ARM::VST2LNd32Pseudo };
  4584. static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
  4585. ARM::VST2LNq32Pseudo };
  4586. SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
  4587. return;
  4588. }
  4589. case Intrinsic::arm_neon_vst3lane: {
  4590. static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
  4591. ARM::VST3LNd16Pseudo,
  4592. ARM::VST3LNd32Pseudo };
  4593. static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
  4594. ARM::VST3LNq32Pseudo };
  4595. SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
  4596. return;
  4597. }
  4598. case Intrinsic::arm_neon_vst4lane: {
  4599. static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
  4600. ARM::VST4LNd16Pseudo,
  4601. ARM::VST4LNd32Pseudo };
  4602. static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
  4603. ARM::VST4LNq32Pseudo };
  4604. SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
  4605. return;
  4606. }
  4607. case Intrinsic::arm_mve_vldr_gather_base_wb:
  4608. case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
  4609. static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
  4610. ARM::MVE_VLDRDU64_qi_pre};
  4611. SelectMVE_WB(N, Opcodes,
  4612. IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
  4613. return;
  4614. }
  4615. case Intrinsic::arm_mve_vld2q: {
  4616. static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
  4617. static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
  4618. ARM::MVE_VLD21_16};
  4619. static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
  4620. ARM::MVE_VLD21_32};
  4621. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  4622. SelectMVE_VLD(N, 2, Opcodes, false);
  4623. return;
  4624. }
  4625. case Intrinsic::arm_mve_vld4q: {
  4626. static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
  4627. ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
  4628. static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
  4629. ARM::MVE_VLD42_16,
  4630. ARM::MVE_VLD43_16};
  4631. static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
  4632. ARM::MVE_VLD42_32,
  4633. ARM::MVE_VLD43_32};
  4634. static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
  4635. SelectMVE_VLD(N, 4, Opcodes, false);
  4636. return;
  4637. }
  4638. }
  4639. break;
  4640. }
  4641. case ISD::INTRINSIC_WO_CHAIN: {
  4642. unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
  4643. switch (IntNo) {
  4644. default:
  4645. break;
  4646. // Scalar f32 -> bf16
  4647. case Intrinsic::arm_neon_vcvtbfp2bf: {
  4648. SDLoc dl(N);
  4649. const SDValue &Src = N->getOperand(1);
  4650. llvm::EVT DestTy = N->getValueType(0);
  4651. SDValue Pred = getAL(CurDAG, dl);
  4652. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  4653. SDValue Ops[] = { Src, Src, Pred, Reg0 };
  4654. CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
  4655. return;
  4656. }
  4657. // Vector v4f32 -> v4bf16
  4658. case Intrinsic::arm_neon_vcvtfp2bf: {
  4659. SDLoc dl(N);
  4660. const SDValue &Src = N->getOperand(1);
  4661. SDValue Pred = getAL(CurDAG, dl);
  4662. SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
  4663. SDValue Ops[] = { Src, Pred, Reg0 };
  4664. CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
  4665. return;
  4666. }
  4667. case Intrinsic::arm_mve_urshrl:
  4668. SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
  4669. return;
  4670. case Intrinsic::arm_mve_uqshll:
  4671. SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
  4672. return;
  4673. case Intrinsic::arm_mve_srshrl:
  4674. SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
  4675. return;
  4676. case Intrinsic::arm_mve_sqshll:
  4677. SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
  4678. return;
  4679. case Intrinsic::arm_mve_uqrshll:
  4680. SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
  4681. return;
  4682. case Intrinsic::arm_mve_sqrshrl:
  4683. SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
  4684. return;
  4685. case Intrinsic::arm_mve_vadc:
  4686. case Intrinsic::arm_mve_vadc_predicated:
  4687. SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
  4688. IntNo == Intrinsic::arm_mve_vadc_predicated);
  4689. return;
  4690. case Intrinsic::arm_mve_vsbc:
  4691. case Intrinsic::arm_mve_vsbc_predicated:
  4692. SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
  4693. IntNo == Intrinsic::arm_mve_vsbc_predicated);
  4694. return;
  4695. case Intrinsic::arm_mve_vshlc:
  4696. case Intrinsic::arm_mve_vshlc_predicated:
  4697. SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
  4698. return;
  4699. case Intrinsic::arm_mve_vmlldava:
  4700. case Intrinsic::arm_mve_vmlldava_predicated: {
  4701. static const uint16_t OpcodesU[] = {
  4702. ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
  4703. ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
  4704. };
  4705. static const uint16_t OpcodesS[] = {
  4706. ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
  4707. ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
  4708. ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
  4709. ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
  4710. ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
  4711. ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
  4712. ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
  4713. ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
  4714. };
  4715. SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
  4716. OpcodesS, OpcodesU);
  4717. return;
  4718. }
  4719. case Intrinsic::arm_mve_vrmlldavha:
  4720. case Intrinsic::arm_mve_vrmlldavha_predicated: {
  4721. static const uint16_t OpcodesU[] = {
  4722. ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
  4723. };
  4724. static const uint16_t OpcodesS[] = {
  4725. ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
  4726. ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
  4727. ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
  4728. ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
  4729. };
  4730. SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
  4731. OpcodesS, OpcodesU);
  4732. return;
  4733. }
  4734. case Intrinsic::arm_mve_vidup:
  4735. case Intrinsic::arm_mve_vidup_predicated: {
  4736. static const uint16_t Opcodes[] = {
  4737. ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
  4738. };
  4739. SelectMVE_VxDUP(N, Opcodes, false,
  4740. IntNo == Intrinsic::arm_mve_vidup_predicated);
  4741. return;
  4742. }
  4743. case Intrinsic::arm_mve_vddup:
  4744. case Intrinsic::arm_mve_vddup_predicated: {
  4745. static const uint16_t Opcodes[] = {
  4746. ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
  4747. };
  4748. SelectMVE_VxDUP(N, Opcodes, false,
  4749. IntNo == Intrinsic::arm_mve_vddup_predicated);
  4750. return;
  4751. }
  4752. case Intrinsic::arm_mve_viwdup:
  4753. case Intrinsic::arm_mve_viwdup_predicated: {
  4754. static const uint16_t Opcodes[] = {
  4755. ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
  4756. };
  4757. SelectMVE_VxDUP(N, Opcodes, true,
  4758. IntNo == Intrinsic::arm_mve_viwdup_predicated);
  4759. return;
  4760. }
  4761. case Intrinsic::arm_mve_vdwdup:
  4762. case Intrinsic::arm_mve_vdwdup_predicated: {
  4763. static const uint16_t Opcodes[] = {
  4764. ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
  4765. };
  4766. SelectMVE_VxDUP(N, Opcodes, true,
  4767. IntNo == Intrinsic::arm_mve_vdwdup_predicated);
  4768. return;
  4769. }
  4770. case Intrinsic::arm_cde_cx1d:
  4771. case Intrinsic::arm_cde_cx1da:
  4772. case Intrinsic::arm_cde_cx2d:
  4773. case Intrinsic::arm_cde_cx2da:
  4774. case Intrinsic::arm_cde_cx3d:
  4775. case Intrinsic::arm_cde_cx3da: {
  4776. bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
  4777. IntNo == Intrinsic::arm_cde_cx2da ||
  4778. IntNo == Intrinsic::arm_cde_cx3da;
  4779. size_t NumExtraOps;
  4780. uint16_t Opcode;
  4781. switch (IntNo) {
  4782. case Intrinsic::arm_cde_cx1d:
  4783. case Intrinsic::arm_cde_cx1da:
  4784. NumExtraOps = 0;
  4785. Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
  4786. break;
  4787. case Intrinsic::arm_cde_cx2d:
  4788. case Intrinsic::arm_cde_cx2da:
  4789. NumExtraOps = 1;
  4790. Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
  4791. break;
  4792. case Intrinsic::arm_cde_cx3d:
  4793. case Intrinsic::arm_cde_cx3da:
  4794. NumExtraOps = 2;
  4795. Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
  4796. break;
  4797. default:
  4798. llvm_unreachable("Unexpected opcode");
  4799. }
  4800. SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
  4801. return;
  4802. }
  4803. }
  4804. break;
  4805. }
  4806. case ISD::ATOMIC_CMP_SWAP:
  4807. SelectCMP_SWAP(N);
  4808. return;
  4809. }
  4810. SelectCode(N);
  4811. }
  4812. // Inspect a register string of the form
  4813. // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
  4814. // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
  4815. // and obtain the integer operands from them, adding these operands to the
  4816. // provided vector.
  4817. static void getIntOperandsFromRegisterString(StringRef RegString,
  4818. SelectionDAG *CurDAG,
  4819. const SDLoc &DL,
  4820. std::vector<SDValue> &Ops) {
  4821. SmallVector<StringRef, 5> Fields;
  4822. RegString.split(Fields, ':');
  4823. if (Fields.size() > 1) {
  4824. bool AllIntFields = true;
  4825. for (StringRef Field : Fields) {
  4826. // Need to trim out leading 'cp' characters and get the integer field.
  4827. unsigned IntField;
  4828. AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
  4829. Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
  4830. }
  4831. assert(AllIntFields &&
  4832. "Unexpected non-integer value in special register string.");
  4833. (void)AllIntFields;
  4834. }
  4835. }
  4836. // Maps a Banked Register string to its mask value. The mask value returned is
  4837. // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
  4838. // mask operand, which expresses which register is to be used, e.g. r8, and in
  4839. // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
  4840. // was invalid.
  4841. static inline int getBankedRegisterMask(StringRef RegString) {
  4842. auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
  4843. if (!TheReg)
  4844. return -1;
  4845. return TheReg->Encoding;
  4846. }
  4847. // The flags here are common to those allowed for apsr in the A class cores and
  4848. // those allowed for the special registers in the M class cores. Returns a
  4849. // value representing which flags were present, -1 if invalid.
  4850. static inline int getMClassFlagsMask(StringRef Flags) {
  4851. return StringSwitch<int>(Flags)
  4852. .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
  4853. // correct when flags are not permitted
  4854. .Case("g", 0x1)
  4855. .Case("nzcvq", 0x2)
  4856. .Case("nzcvqg", 0x3)
  4857. .Default(-1);
  4858. }
  4859. // Maps MClass special registers string to its value for use in the
  4860. // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
  4861. // Returns -1 to signify that the string was invalid.
  4862. static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
  4863. auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
  4864. const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
  4865. if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
  4866. return -1;
  4867. return (int)(TheReg->Encoding & 0xFFF); // SYSm value
  4868. }
  4869. static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
  4870. // The mask operand contains the special register (R Bit) in bit 4, whether
  4871. // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
  4872. // bits 3-0 contains the fields to be accessed in the special register, set by
  4873. // the flags provided with the register.
  4874. int Mask = 0;
  4875. if (Reg == "apsr") {
  4876. // The flags permitted for apsr are the same flags that are allowed in
  4877. // M class registers. We get the flag value and then shift the flags into
  4878. // the correct place to combine with the mask.
  4879. Mask = getMClassFlagsMask(Flags);
  4880. if (Mask == -1)
  4881. return -1;
  4882. return Mask << 2;
  4883. }
  4884. if (Reg != "cpsr" && Reg != "spsr") {
  4885. return -1;
  4886. }
  4887. // This is the same as if the flags were "fc"
  4888. if (Flags.empty() || Flags == "all")
  4889. return Mask | 0x9;
  4890. // Inspect the supplied flags string and set the bits in the mask for
  4891. // the relevant and valid flags allowed for cpsr and spsr.
  4892. for (char Flag : Flags) {
  4893. int FlagVal;
  4894. switch (Flag) {
  4895. case 'c':
  4896. FlagVal = 0x1;
  4897. break;
  4898. case 'x':
  4899. FlagVal = 0x2;
  4900. break;
  4901. case 's':
  4902. FlagVal = 0x4;
  4903. break;
  4904. case 'f':
  4905. FlagVal = 0x8;
  4906. break;
  4907. default:
  4908. FlagVal = 0;
  4909. }
  4910. // This avoids allowing strings where the same flag bit appears twice.
  4911. if (!FlagVal || (Mask & FlagVal))
  4912. return -1;
  4913. Mask |= FlagVal;
  4914. }
  4915. // If the register is spsr then we need to set the R bit.
  4916. if (Reg == "spsr")
  4917. Mask |= 0x10;
  4918. return Mask;
  4919. }
  4920. // Lower the read_register intrinsic to ARM specific DAG nodes
  4921. // using the supplied metadata string to select the instruction node to use
  4922. // and the registers/masks to construct as operands for the node.
  4923. bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
  4924. const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
  4925. const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
  4926. bool IsThumb2 = Subtarget->isThumb2();
  4927. SDLoc DL(N);
  4928. std::vector<SDValue> Ops;
  4929. getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
  4930. if (!Ops.empty()) {
  4931. // If the special register string was constructed of fields (as defined
  4932. // in the ACLE) then need to lower to MRC node (32 bit) or
  4933. // MRRC node(64 bit), we can make the distinction based on the number of
  4934. // operands we have.
  4935. unsigned Opcode;
  4936. SmallVector<EVT, 3> ResTypes;
  4937. if (Ops.size() == 5){
  4938. Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
  4939. ResTypes.append({ MVT::i32, MVT::Other });
  4940. } else {
  4941. assert(Ops.size() == 3 &&
  4942. "Invalid number of fields in special register string.");
  4943. Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
  4944. ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
  4945. }
  4946. Ops.push_back(getAL(CurDAG, DL));
  4947. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  4948. Ops.push_back(N->getOperand(0));
  4949. ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
  4950. return true;
  4951. }
  4952. std::string SpecialReg = RegString->getString().lower();
  4953. int BankedReg = getBankedRegisterMask(SpecialReg);
  4954. if (BankedReg != -1) {
  4955. Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
  4956. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  4957. N->getOperand(0) };
  4958. ReplaceNode(
  4959. N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
  4960. DL, MVT::i32, MVT::Other, Ops));
  4961. return true;
  4962. }
  4963. // The VFP registers are read by creating SelectionDAG nodes with opcodes
  4964. // corresponding to the register that is being read from. So we switch on the
  4965. // string to find which opcode we need to use.
  4966. unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
  4967. .Case("fpscr", ARM::VMRS)
  4968. .Case("fpexc", ARM::VMRS_FPEXC)
  4969. .Case("fpsid", ARM::VMRS_FPSID)
  4970. .Case("mvfr0", ARM::VMRS_MVFR0)
  4971. .Case("mvfr1", ARM::VMRS_MVFR1)
  4972. .Case("mvfr2", ARM::VMRS_MVFR2)
  4973. .Case("fpinst", ARM::VMRS_FPINST)
  4974. .Case("fpinst2", ARM::VMRS_FPINST2)
  4975. .Default(0);
  4976. // If an opcode was found then we can lower the read to a VFP instruction.
  4977. if (Opcode) {
  4978. if (!Subtarget->hasVFP2Base())
  4979. return false;
  4980. if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
  4981. return false;
  4982. Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  4983. N->getOperand(0) };
  4984. ReplaceNode(N,
  4985. CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
  4986. return true;
  4987. }
  4988. // If the target is M Class then need to validate that the register string
  4989. // is an acceptable value, so check that a mask can be constructed from the
  4990. // string.
  4991. if (Subtarget->isMClass()) {
  4992. int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
  4993. if (SYSmValue == -1)
  4994. return false;
  4995. SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
  4996. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  4997. N->getOperand(0) };
  4998. ReplaceNode(
  4999. N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
  5000. return true;
  5001. }
  5002. // Here we know the target is not M Class so we need to check if it is one
  5003. // of the remaining possible values which are apsr, cpsr or spsr.
  5004. if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
  5005. Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5006. N->getOperand(0) };
  5007. ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
  5008. DL, MVT::i32, MVT::Other, Ops));
  5009. return true;
  5010. }
  5011. if (SpecialReg == "spsr") {
  5012. Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5013. N->getOperand(0) };
  5014. ReplaceNode(
  5015. N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
  5016. MVT::i32, MVT::Other, Ops));
  5017. return true;
  5018. }
  5019. return false;
  5020. }
  5021. // Lower the write_register intrinsic to ARM specific DAG nodes
  5022. // using the supplied metadata string to select the instruction node to use
  5023. // and the registers/masks to use in the nodes
  5024. bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
  5025. const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
  5026. const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
  5027. bool IsThumb2 = Subtarget->isThumb2();
  5028. SDLoc DL(N);
  5029. std::vector<SDValue> Ops;
  5030. getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
  5031. if (!Ops.empty()) {
  5032. // If the special register string was constructed of fields (as defined
  5033. // in the ACLE) then need to lower to MCR node (32 bit) or
  5034. // MCRR node(64 bit), we can make the distinction based on the number of
  5035. // operands we have.
  5036. unsigned Opcode;
  5037. if (Ops.size() == 5) {
  5038. Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
  5039. Ops.insert(Ops.begin()+2, N->getOperand(2));
  5040. } else {
  5041. assert(Ops.size() == 3 &&
  5042. "Invalid number of fields in special register string.");
  5043. Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
  5044. SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
  5045. Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
  5046. }
  5047. Ops.push_back(getAL(CurDAG, DL));
  5048. Ops.push_back(CurDAG->getRegister(0, MVT::i32));
  5049. Ops.push_back(N->getOperand(0));
  5050. ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
  5051. return true;
  5052. }
  5053. std::string SpecialReg = RegString->getString().lower();
  5054. int BankedReg = getBankedRegisterMask(SpecialReg);
  5055. if (BankedReg != -1) {
  5056. Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
  5057. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5058. N->getOperand(0) };
  5059. ReplaceNode(
  5060. N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
  5061. DL, MVT::Other, Ops));
  5062. return true;
  5063. }
  5064. // The VFP registers are written to by creating SelectionDAG nodes with
  5065. // opcodes corresponding to the register that is being written. So we switch
  5066. // on the string to find which opcode we need to use.
  5067. unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
  5068. .Case("fpscr", ARM::VMSR)
  5069. .Case("fpexc", ARM::VMSR_FPEXC)
  5070. .Case("fpsid", ARM::VMSR_FPSID)
  5071. .Case("fpinst", ARM::VMSR_FPINST)
  5072. .Case("fpinst2", ARM::VMSR_FPINST2)
  5073. .Default(0);
  5074. if (Opcode) {
  5075. if (!Subtarget->hasVFP2Base())
  5076. return false;
  5077. Ops = { N->getOperand(2), getAL(CurDAG, DL),
  5078. CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
  5079. ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
  5080. return true;
  5081. }
  5082. std::pair<StringRef, StringRef> Fields;
  5083. Fields = StringRef(SpecialReg).rsplit('_');
  5084. std::string Reg = Fields.first.str();
  5085. StringRef Flags = Fields.second;
  5086. // If the target was M Class then need to validate the special register value
  5087. // and retrieve the mask for use in the instruction node.
  5088. if (Subtarget->isMClass()) {
  5089. int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
  5090. if (SYSmValue == -1)
  5091. return false;
  5092. SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
  5093. N->getOperand(2), getAL(CurDAG, DL),
  5094. CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
  5095. ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
  5096. return true;
  5097. }
  5098. // We then check to see if a valid mask can be constructed for one of the
  5099. // register string values permitted for the A and R class cores. These values
  5100. // are apsr, spsr and cpsr; these are also valid on older cores.
  5101. int Mask = getARClassRegisterMask(Reg, Flags);
  5102. if (Mask != -1) {
  5103. Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
  5104. getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
  5105. N->getOperand(0) };
  5106. ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
  5107. DL, MVT::Other, Ops));
  5108. return true;
  5109. }
  5110. return false;
  5111. }
  5112. bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
  5113. std::vector<SDValue> AsmNodeOperands;
  5114. unsigned Flag, Kind;
  5115. bool Changed = false;
  5116. unsigned NumOps = N->getNumOperands();
  5117. // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
  5118. // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
  5119. // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
  5120. // respectively. Since there is no constraint to explicitly specify a
  5121. // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
  5122. // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
  5123. // them into a GPRPair.
  5124. SDLoc dl(N);
  5125. SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
  5126. SmallVector<bool, 8> OpChanged;
  5127. // Glue node will be appended late.
  5128. for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
  5129. SDValue op = N->getOperand(i);
  5130. AsmNodeOperands.push_back(op);
  5131. if (i < InlineAsm::Op_FirstOperand)
  5132. continue;
  5133. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
  5134. Flag = C->getZExtValue();
  5135. Kind = InlineAsm::getKind(Flag);
  5136. }
  5137. else
  5138. continue;
  5139. // Immediate operands to inline asm in the SelectionDAG are modeled with
  5140. // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
  5141. // the second is a constant with the value of the immediate. If we get here
  5142. // and we have a Kind_Imm, skip the next operand, and continue.
  5143. if (Kind == InlineAsm::Kind_Imm) {
  5144. SDValue op = N->getOperand(++i);
  5145. AsmNodeOperands.push_back(op);
  5146. continue;
  5147. }
  5148. unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
  5149. if (NumRegs)
  5150. OpChanged.push_back(false);
  5151. unsigned DefIdx = 0;
  5152. bool IsTiedToChangedOp = false;
  5153. // If it's a use that is tied with a previous def, it has no
  5154. // reg class constraint.
  5155. if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
  5156. IsTiedToChangedOp = OpChanged[DefIdx];
  5157. // Memory operands to inline asm in the SelectionDAG are modeled with two
  5158. // operands: a constant of value InlineAsm::Kind_Mem followed by the input
  5159. // operand. If we get here and we have a Kind_Mem, skip the next operand (so
  5160. // it doesn't get misinterpreted), and continue. We do this here because
  5161. // it's important to update the OpChanged array correctly before moving on.
  5162. if (Kind == InlineAsm::Kind_Mem) {
  5163. SDValue op = N->getOperand(++i);
  5164. AsmNodeOperands.push_back(op);
  5165. continue;
  5166. }
  5167. if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
  5168. && Kind != InlineAsm::Kind_RegDefEarlyClobber)
  5169. continue;
  5170. unsigned RC;
  5171. bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
  5172. if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
  5173. || NumRegs != 2)
  5174. continue;
  5175. assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
  5176. SDValue V0 = N->getOperand(i+1);
  5177. SDValue V1 = N->getOperand(i+2);
  5178. Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
  5179. Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
  5180. SDValue PairedReg;
  5181. MachineRegisterInfo &MRI = MF->getRegInfo();
  5182. if (Kind == InlineAsm::Kind_RegDef ||
  5183. Kind == InlineAsm::Kind_RegDefEarlyClobber) {
  5184. // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
  5185. // the original GPRs.
  5186. Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
  5187. PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
  5188. SDValue Chain = SDValue(N,0);
  5189. SDNode *GU = N->getGluedUser();
  5190. SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
  5191. Chain.getValue(1));
  5192. // Extract values from a GPRPair reg and copy to the original GPR reg.
  5193. SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
  5194. RegCopy);
  5195. SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
  5196. RegCopy);
  5197. SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
  5198. RegCopy.getValue(1));
  5199. SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
  5200. // Update the original glue user.
  5201. std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
  5202. Ops.push_back(T1.getValue(1));
  5203. CurDAG->UpdateNodeOperands(GU, Ops);
  5204. }
  5205. else {
  5206. // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
  5207. // GPRPair and then pass the GPRPair to the inline asm.
  5208. SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
  5209. // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
  5210. SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
  5211. Chain.getValue(1));
  5212. SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
  5213. T0.getValue(1));
  5214. SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
  5215. // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
  5216. // i32 VRs of inline asm with it.
  5217. Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
  5218. PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
  5219. Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
  5220. AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
  5221. Glue = Chain.getValue(1);
  5222. }
  5223. Changed = true;
  5224. if(PairedReg.getNode()) {
  5225. OpChanged[OpChanged.size() -1 ] = true;
  5226. Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
  5227. if (IsTiedToChangedOp)
  5228. Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
  5229. else
  5230. Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
  5231. // Replace the current flag.
  5232. AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
  5233. Flag, dl, MVT::i32);
  5234. // Add the new register node and skip the original two GPRs.
  5235. AsmNodeOperands.push_back(PairedReg);
  5236. // Skip the next two GPRs.
  5237. i += 2;
  5238. }
  5239. }
  5240. if (Glue.getNode())
  5241. AsmNodeOperands.push_back(Glue);
  5242. if (!Changed)
  5243. return false;
  5244. SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
  5245. CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
  5246. New->setNodeId(-1);
  5247. ReplaceNode(N, New.getNode());
  5248. return true;
  5249. }
  5250. bool ARMDAGToDAGISel::
  5251. SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
  5252. std::vector<SDValue> &OutOps) {
  5253. switch(ConstraintID) {
  5254. default:
  5255. llvm_unreachable("Unexpected asm memory constraint");
  5256. case InlineAsm::Constraint_m:
  5257. case InlineAsm::Constraint_o:
  5258. case InlineAsm::Constraint_Q:
  5259. case InlineAsm::Constraint_Um:
  5260. case InlineAsm::Constraint_Un:
  5261. case InlineAsm::Constraint_Uq:
  5262. case InlineAsm::Constraint_Us:
  5263. case InlineAsm::Constraint_Ut:
  5264. case InlineAsm::Constraint_Uv:
  5265. case InlineAsm::Constraint_Uy:
  5266. // Require the address to be in a register. That is safe for all ARM
  5267. // variants and it is hard to do anything much smarter without knowing
  5268. // how the operand is used.
  5269. OutOps.push_back(Op);
  5270. return false;
  5271. }
  5272. return true;
  5273. }
  5274. /// createARMISelDag - This pass converts a legalized DAG into a
  5275. /// ARM-specific DAG, ready for instruction scheduling.
  5276. ///
  5277. FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
  5278. CodeGenOpt::Level OptLevel) {
  5279. return new ARMDAGToDAGISel(TM, OptLevel);
  5280. }