X86ISelDAGToDAG.cpp 227 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133
  1. //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines a DAG pattern matching instruction selector for X86,
  10. // converting from a legalized dag to a X86 dag.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "X86.h"
  14. #include "X86MachineFunctionInfo.h"
  15. #include "X86RegisterInfo.h"
  16. #include "X86Subtarget.h"
  17. #include "X86TargetMachine.h"
  18. #include "llvm/ADT/Statistic.h"
  19. #include "llvm/CodeGen/MachineModuleInfo.h"
  20. #include "llvm/CodeGen/SelectionDAGISel.h"
  21. #include "llvm/Config/llvm-config.h"
  22. #include "llvm/IR/ConstantRange.h"
  23. #include "llvm/IR/Function.h"
  24. #include "llvm/IR/Instructions.h"
  25. #include "llvm/IR/Intrinsics.h"
  26. #include "llvm/IR/IntrinsicsX86.h"
  27. #include "llvm/IR/Type.h"
  28. #include "llvm/Support/Debug.h"
  29. #include "llvm/Support/ErrorHandling.h"
  30. #include "llvm/Support/KnownBits.h"
  31. #include "llvm/Support/MathExtras.h"
  32. #include <cstdint>
  33. using namespace llvm;
  34. #define DEBUG_TYPE "x86-isel"
  35. STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
  36. static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true),
  37. cl::desc("Enable setting constant bits to reduce size of mask immediates"),
  38. cl::Hidden);
  39. static cl::opt<bool> EnablePromoteAnyextLoad(
  40. "x86-promote-anyext-load", cl::init(true),
  41. cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden);
  42. extern cl::opt<bool> IndirectBranchTracking;
  43. //===----------------------------------------------------------------------===//
  44. // Pattern Matcher Implementation
  45. //===----------------------------------------------------------------------===//
  46. namespace {
  47. /// This corresponds to X86AddressMode, but uses SDValue's instead of register
  48. /// numbers for the leaves of the matched tree.
  49. struct X86ISelAddressMode {
  50. enum {
  51. RegBase,
  52. FrameIndexBase
  53. } BaseType;
  54. // This is really a union, discriminated by BaseType!
  55. SDValue Base_Reg;
  56. int Base_FrameIndex;
  57. unsigned Scale;
  58. SDValue IndexReg;
  59. int32_t Disp;
  60. SDValue Segment;
  61. const GlobalValue *GV;
  62. const Constant *CP;
  63. const BlockAddress *BlockAddr;
  64. const char *ES;
  65. MCSymbol *MCSym;
  66. int JT;
  67. Align Alignment; // CP alignment.
  68. unsigned char SymbolFlags; // X86II::MO_*
  69. bool NegateIndex = false;
  70. X86ISelAddressMode()
  71. : BaseType(RegBase), Base_FrameIndex(0), Scale(1), Disp(0), GV(nullptr),
  72. CP(nullptr), BlockAddr(nullptr), ES(nullptr), MCSym(nullptr), JT(-1),
  73. SymbolFlags(X86II::MO_NO_FLAG) {}
  74. bool hasSymbolicDisplacement() const {
  75. return GV != nullptr || CP != nullptr || ES != nullptr ||
  76. MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
  77. }
  78. bool hasBaseOrIndexReg() const {
  79. return BaseType == FrameIndexBase ||
  80. IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
  81. }
  82. /// Return true if this addressing mode is already RIP-relative.
  83. bool isRIPRelative() const {
  84. if (BaseType != RegBase) return false;
  85. if (RegisterSDNode *RegNode =
  86. dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
  87. return RegNode->getReg() == X86::RIP;
  88. return false;
  89. }
  90. void setBaseReg(SDValue Reg) {
  91. BaseType = RegBase;
  92. Base_Reg = Reg;
  93. }
  94. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  95. void dump(SelectionDAG *DAG = nullptr) {
  96. dbgs() << "X86ISelAddressMode " << this << '\n';
  97. dbgs() << "Base_Reg ";
  98. if (Base_Reg.getNode())
  99. Base_Reg.getNode()->dump(DAG);
  100. else
  101. dbgs() << "nul\n";
  102. if (BaseType == FrameIndexBase)
  103. dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n';
  104. dbgs() << " Scale " << Scale << '\n'
  105. << "IndexReg ";
  106. if (NegateIndex)
  107. dbgs() << "negate ";
  108. if (IndexReg.getNode())
  109. IndexReg.getNode()->dump(DAG);
  110. else
  111. dbgs() << "nul\n";
  112. dbgs() << " Disp " << Disp << '\n'
  113. << "GV ";
  114. if (GV)
  115. GV->dump();
  116. else
  117. dbgs() << "nul";
  118. dbgs() << " CP ";
  119. if (CP)
  120. CP->dump();
  121. else
  122. dbgs() << "nul";
  123. dbgs() << '\n'
  124. << "ES ";
  125. if (ES)
  126. dbgs() << ES;
  127. else
  128. dbgs() << "nul";
  129. dbgs() << " MCSym ";
  130. if (MCSym)
  131. dbgs() << MCSym;
  132. else
  133. dbgs() << "nul";
  134. dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n';
  135. }
  136. #endif
  137. };
  138. }
  139. namespace {
  140. //===--------------------------------------------------------------------===//
  141. /// ISel - X86-specific code to select X86 machine instructions for
  142. /// SelectionDAG operations.
  143. ///
  144. class X86DAGToDAGISel final : public SelectionDAGISel {
  145. /// Keep a pointer to the X86Subtarget around so that we can
  146. /// make the right decision when generating code for different targets.
  147. const X86Subtarget *Subtarget;
  148. /// If true, selector should try to optimize for minimum code size.
  149. bool OptForMinSize;
  150. /// Disable direct TLS access through segment registers.
  151. bool IndirectTlsSegRefs;
  152. public:
  153. explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
  154. : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
  155. OptForMinSize(false), IndirectTlsSegRefs(false) {}
  156. StringRef getPassName() const override {
  157. return "X86 DAG->DAG Instruction Selection";
  158. }
  159. bool runOnMachineFunction(MachineFunction &MF) override {
  160. // Reset the subtarget each time through.
  161. Subtarget = &MF.getSubtarget<X86Subtarget>();
  162. IndirectTlsSegRefs = MF.getFunction().hasFnAttribute(
  163. "indirect-tls-seg-refs");
  164. // OptFor[Min]Size are used in pattern predicates that isel is matching.
  165. OptForMinSize = MF.getFunction().hasMinSize();
  166. assert((!OptForMinSize || MF.getFunction().hasOptSize()) &&
  167. "OptForMinSize implies OptForSize");
  168. SelectionDAGISel::runOnMachineFunction(MF);
  169. return true;
  170. }
  171. void emitFunctionEntryCode() override;
  172. bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
  173. void PreprocessISelDAG() override;
  174. void PostprocessISelDAG() override;
  175. // Include the pieces autogenerated from the target description.
  176. #include "X86GenDAGISel.inc"
  177. private:
  178. void Select(SDNode *N) override;
  179. bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
  180. bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
  181. bool AllowSegmentRegForX32 = false);
  182. bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
  183. bool matchAddress(SDValue N, X86ISelAddressMode &AM);
  184. bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
  185. bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
  186. bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
  187. unsigned Depth);
  188. bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
  189. unsigned Depth);
  190. bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
  191. bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
  192. SDValue &Scale, SDValue &Index, SDValue &Disp,
  193. SDValue &Segment);
  194. bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp,
  195. SDValue ScaleOp, SDValue &Base, SDValue &Scale,
  196. SDValue &Index, SDValue &Disp, SDValue &Segment);
  197. bool selectMOV64Imm32(SDValue N, SDValue &Imm);
  198. bool selectLEAAddr(SDValue N, SDValue &Base,
  199. SDValue &Scale, SDValue &Index, SDValue &Disp,
  200. SDValue &Segment);
  201. bool selectLEA64_32Addr(SDValue N, SDValue &Base,
  202. SDValue &Scale, SDValue &Index, SDValue &Disp,
  203. SDValue &Segment);
  204. bool selectTLSADDRAddr(SDValue N, SDValue &Base,
  205. SDValue &Scale, SDValue &Index, SDValue &Disp,
  206. SDValue &Segment);
  207. bool selectRelocImm(SDValue N, SDValue &Op);
  208. bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
  209. SDValue &Base, SDValue &Scale,
  210. SDValue &Index, SDValue &Disp,
  211. SDValue &Segment);
  212. // Convenience method where P is also root.
  213. bool tryFoldLoad(SDNode *P, SDValue N,
  214. SDValue &Base, SDValue &Scale,
  215. SDValue &Index, SDValue &Disp,
  216. SDValue &Segment) {
  217. return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment);
  218. }
  219. bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
  220. SDValue &Base, SDValue &Scale,
  221. SDValue &Index, SDValue &Disp,
  222. SDValue &Segment);
  223. bool isProfitableToFormMaskedOp(SDNode *N) const;
  224. /// Implement addressing mode selection for inline asm expressions.
  225. bool SelectInlineAsmMemoryOperand(const SDValue &Op,
  226. unsigned ConstraintID,
  227. std::vector<SDValue> &OutOps) override;
  228. void emitSpecialCodeForMain();
  229. inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL,
  230. MVT VT, SDValue &Base, SDValue &Scale,
  231. SDValue &Index, SDValue &Disp,
  232. SDValue &Segment) {
  233. if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
  234. Base = CurDAG->getTargetFrameIndex(
  235. AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout()));
  236. else if (AM.Base_Reg.getNode())
  237. Base = AM.Base_Reg;
  238. else
  239. Base = CurDAG->getRegister(0, VT);
  240. Scale = getI8Imm(AM.Scale, DL);
  241. // Negate the index if needed.
  242. if (AM.NegateIndex) {
  243. unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
  244. SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
  245. AM.IndexReg), 0);
  246. AM.IndexReg = Neg;
  247. }
  248. if (AM.IndexReg.getNode())
  249. Index = AM.IndexReg;
  250. else
  251. Index = CurDAG->getRegister(0, VT);
  252. // These are 32-bit even in 64-bit mode since RIP-relative offset
  253. // is 32-bit.
  254. if (AM.GV)
  255. Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
  256. MVT::i32, AM.Disp,
  257. AM.SymbolFlags);
  258. else if (AM.CP)
  259. Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment,
  260. AM.Disp, AM.SymbolFlags);
  261. else if (AM.ES) {
  262. assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
  263. Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
  264. } else if (AM.MCSym) {
  265. assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
  266. assert(AM.SymbolFlags == 0 && "oo");
  267. Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
  268. } else if (AM.JT != -1) {
  269. assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
  270. Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
  271. } else if (AM.BlockAddr)
  272. Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
  273. AM.SymbolFlags);
  274. else
  275. Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
  276. if (AM.Segment.getNode())
  277. Segment = AM.Segment;
  278. else
  279. Segment = CurDAG->getRegister(0, MVT::i16);
  280. }
  281. // Utility function to determine whether we should avoid selecting
  282. // immediate forms of instructions for better code size or not.
  283. // At a high level, we'd like to avoid such instructions when
  284. // we have similar constants used within the same basic block
  285. // that can be kept in a register.
  286. //
  287. bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
  288. uint32_t UseCount = 0;
  289. // Do not want to hoist if we're not optimizing for size.
  290. // TODO: We'd like to remove this restriction.
  291. // See the comment in X86InstrInfo.td for more info.
  292. if (!CurDAG->shouldOptForSize())
  293. return false;
  294. // Walk all the users of the immediate.
  295. for (const SDNode *User : N->uses()) {
  296. if (UseCount >= 2)
  297. break;
  298. // This user is already selected. Count it as a legitimate use and
  299. // move on.
  300. if (User->isMachineOpcode()) {
  301. UseCount++;
  302. continue;
  303. }
  304. // We want to count stores of immediates as real uses.
  305. if (User->getOpcode() == ISD::STORE &&
  306. User->getOperand(1).getNode() == N) {
  307. UseCount++;
  308. continue;
  309. }
  310. // We don't currently match users that have > 2 operands (except
  311. // for stores, which are handled above)
  312. // Those instruction won't match in ISEL, for now, and would
  313. // be counted incorrectly.
  314. // This may change in the future as we add additional instruction
  315. // types.
  316. if (User->getNumOperands() != 2)
  317. continue;
  318. // If this is a sign-extended 8-bit integer immediate used in an ALU
  319. // instruction, there is probably an opcode encoding to save space.
  320. auto *C = dyn_cast<ConstantSDNode>(N);
  321. if (C && isInt<8>(C->getSExtValue()))
  322. continue;
  323. // Immediates that are used for offsets as part of stack
  324. // manipulation should be left alone. These are typically
  325. // used to indicate SP offsets for argument passing and
  326. // will get pulled into stores/pushes (implicitly).
  327. if (User->getOpcode() == X86ISD::ADD ||
  328. User->getOpcode() == ISD::ADD ||
  329. User->getOpcode() == X86ISD::SUB ||
  330. User->getOpcode() == ISD::SUB) {
  331. // Find the other operand of the add/sub.
  332. SDValue OtherOp = User->getOperand(0);
  333. if (OtherOp.getNode() == N)
  334. OtherOp = User->getOperand(1);
  335. // Don't count if the other operand is SP.
  336. RegisterSDNode *RegNode;
  337. if (OtherOp->getOpcode() == ISD::CopyFromReg &&
  338. (RegNode = dyn_cast_or_null<RegisterSDNode>(
  339. OtherOp->getOperand(1).getNode())))
  340. if ((RegNode->getReg() == X86::ESP) ||
  341. (RegNode->getReg() == X86::RSP))
  342. continue;
  343. }
  344. // ... otherwise, count this and move on.
  345. UseCount++;
  346. }
  347. // If we have more than 1 use, then recommend for hoisting.
  348. return (UseCount > 1);
  349. }
  350. /// Return a target constant with the specified value of type i8.
  351. inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) {
  352. return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
  353. }
  354. /// Return a target constant with the specified value, of type i32.
  355. inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
  356. return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
  357. }
  358. /// Return a target constant with the specified value, of type i64.
  359. inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) {
  360. return CurDAG->getTargetConstant(Imm, DL, MVT::i64);
  361. }
  362. SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth,
  363. const SDLoc &DL) {
  364. assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
  365. uint64_t Index = N->getConstantOperandVal(1);
  366. MVT VecVT = N->getOperand(0).getSimpleValueType();
  367. return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
  368. }
  369. SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth,
  370. const SDLoc &DL) {
  371. assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width");
  372. uint64_t Index = N->getConstantOperandVal(2);
  373. MVT VecVT = N->getSimpleValueType(0);
  374. return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
  375. }
  376. SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth,
  377. const SDLoc &DL) {
  378. assert(VecWidth == 128 && "Unexpected vector width");
  379. uint64_t Index = N->getConstantOperandVal(2);
  380. MVT VecVT = N->getSimpleValueType(0);
  381. uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth;
  382. assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index");
  383. // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub)
  384. // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub)
  385. return getI8Imm(InsertIdx ? 0x02 : 0x30, DL);
  386. }
  387. // Helper to detect unneeded and instructions on shift amounts. Called
  388. // from PatFrags in tablegen.
  389. bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
  390. assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
  391. const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
  392. if (Val.countTrailingOnes() >= Width)
  393. return true;
  394. APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
  395. return Mask.countTrailingOnes() >= Width;
  396. }
  397. /// Return an SDNode that returns the value of the global base register.
  398. /// Output instructions required to initialize the global base register,
  399. /// if necessary.
  400. SDNode *getGlobalBaseReg();
  401. /// Return a reference to the TargetMachine, casted to the target-specific
  402. /// type.
  403. const X86TargetMachine &getTargetMachine() const {
  404. return static_cast<const X86TargetMachine &>(TM);
  405. }
  406. /// Return a reference to the TargetInstrInfo, casted to the target-specific
  407. /// type.
  408. const X86InstrInfo *getInstrInfo() const {
  409. return Subtarget->getInstrInfo();
  410. }
  411. /// Address-mode matching performs shift-of-and to and-of-shift
  412. /// reassociation in order to expose more scaled addressing
  413. /// opportunities.
  414. bool ComplexPatternFuncMutatesDAG() const override {
  415. return true;
  416. }
  417. bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
  418. // Indicates we should prefer to use a non-temporal load for this load.
  419. bool useNonTemporalLoad(LoadSDNode *N) const {
  420. if (!N->isNonTemporal())
  421. return false;
  422. unsigned StoreSize = N->getMemoryVT().getStoreSize();
  423. if (N->getAlignment() < StoreSize)
  424. return false;
  425. switch (StoreSize) {
  426. default: llvm_unreachable("Unsupported store size");
  427. case 4:
  428. case 8:
  429. return false;
  430. case 16:
  431. return Subtarget->hasSSE41();
  432. case 32:
  433. return Subtarget->hasAVX2();
  434. case 64:
  435. return Subtarget->hasAVX512();
  436. }
  437. }
  438. bool foldLoadStoreIntoMemOperand(SDNode *Node);
  439. MachineSDNode *matchBEXTRFromAndImm(SDNode *Node);
  440. bool matchBitExtract(SDNode *Node);
  441. bool shrinkAndImmediate(SDNode *N);
  442. bool isMaskZeroExtended(SDNode *N) const;
  443. bool tryShiftAmountMod(SDNode *N);
  444. bool tryShrinkShlLogicImm(SDNode *N);
  445. bool tryVPTERNLOG(SDNode *N);
  446. bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB,
  447. SDNode *ParentC, SDValue A, SDValue B, SDValue C,
  448. uint8_t Imm);
  449. bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
  450. bool tryMatchBitSelect(SDNode *N);
  451. MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
  452. const SDLoc &dl, MVT VT, SDNode *Node);
  453. MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
  454. const SDLoc &dl, MVT VT, SDNode *Node,
  455. SDValue &InFlag);
  456. bool tryOptimizeRem8Extend(SDNode *N);
  457. bool onlyUsesZeroFlag(SDValue Flags) const;
  458. bool hasNoSignFlagUses(SDValue Flags) const;
  459. bool hasNoCarryFlagUses(SDValue Flags) const;
  460. };
  461. }
  462. // Returns true if this masked compare can be implemented legally with this
  463. // type.
  464. static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) {
  465. unsigned Opcode = N->getOpcode();
  466. if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM ||
  467. Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC ||
  468. Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) {
  469. // We can get 256-bit 8 element types here without VLX being enabled. When
  470. // this happens we will use 512-bit operations and the mask will not be
  471. // zero extended.
  472. EVT OpVT = N->getOperand(0).getValueType();
  473. // The first operand of X86ISD::STRICT_CMPM is chain, so we need to get the
  474. // second operand.
  475. if (Opcode == X86ISD::STRICT_CMPM)
  476. OpVT = N->getOperand(1).getValueType();
  477. if (OpVT.is256BitVector() || OpVT.is128BitVector())
  478. return Subtarget->hasVLX();
  479. return true;
  480. }
  481. // Scalar opcodes use 128 bit registers, but aren't subject to the VLX check.
  482. if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM ||
  483. Opcode == X86ISD::FSETCCM_SAE)
  484. return true;
  485. return false;
  486. }
  487. // Returns true if we can assume the writer of the mask has zero extended it
  488. // for us.
  489. bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const {
  490. // If this is an AND, check if we have a compare on either side. As long as
  491. // one side guarantees the mask is zero extended, the AND will preserve those
  492. // zeros.
  493. if (N->getOpcode() == ISD::AND)
  494. return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) ||
  495. isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget);
  496. return isLegalMaskCompare(N, Subtarget);
  497. }
  498. bool
  499. X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
  500. if (OptLevel == CodeGenOpt::None) return false;
  501. if (!N.hasOneUse())
  502. return false;
  503. if (N.getOpcode() != ISD::LOAD)
  504. return true;
  505. // Don't fold non-temporal loads if we have an instruction for them.
  506. if (useNonTemporalLoad(cast<LoadSDNode>(N)))
  507. return false;
  508. // If N is a load, do additional profitability checks.
  509. if (U == Root) {
  510. switch (U->getOpcode()) {
  511. default: break;
  512. case X86ISD::ADD:
  513. case X86ISD::ADC:
  514. case X86ISD::SUB:
  515. case X86ISD::SBB:
  516. case X86ISD::AND:
  517. case X86ISD::XOR:
  518. case X86ISD::OR:
  519. case ISD::ADD:
  520. case ISD::ADDCARRY:
  521. case ISD::AND:
  522. case ISD::OR:
  523. case ISD::XOR: {
  524. SDValue Op1 = U->getOperand(1);
  525. // If the other operand is a 8-bit immediate we should fold the immediate
  526. // instead. This reduces code size.
  527. // e.g.
  528. // movl 4(%esp), %eax
  529. // addl $4, %eax
  530. // vs.
  531. // movl $4, %eax
  532. // addl 4(%esp), %eax
  533. // The former is 2 bytes shorter. In case where the increment is 1, then
  534. // the saving can be 4 bytes (by using incl %eax).
  535. if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) {
  536. if (Imm->getAPIntValue().isSignedIntN(8))
  537. return false;
  538. // If this is a 64-bit AND with an immediate that fits in 32-bits,
  539. // prefer using the smaller and over folding the load. This is needed to
  540. // make sure immediates created by shrinkAndImmediate are always folded.
  541. // Ideally we would narrow the load during DAG combine and get the
  542. // best of both worlds.
  543. if (U->getOpcode() == ISD::AND &&
  544. Imm->getAPIntValue().getBitWidth() == 64 &&
  545. Imm->getAPIntValue().isIntN(32))
  546. return false;
  547. // If this really a zext_inreg that can be represented with a movzx
  548. // instruction, prefer that.
  549. // TODO: We could shrink the load and fold if it is non-volatile.
  550. if (U->getOpcode() == ISD::AND &&
  551. (Imm->getAPIntValue() == UINT8_MAX ||
  552. Imm->getAPIntValue() == UINT16_MAX ||
  553. Imm->getAPIntValue() == UINT32_MAX))
  554. return false;
  555. // ADD/SUB with can negate the immediate and use the opposite operation
  556. // to fit 128 into a sign extended 8 bit immediate.
  557. if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) &&
  558. (-Imm->getAPIntValue()).isSignedIntN(8))
  559. return false;
  560. if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) &&
  561. (-Imm->getAPIntValue()).isSignedIntN(8) &&
  562. hasNoCarryFlagUses(SDValue(U, 1)))
  563. return false;
  564. }
  565. // If the other operand is a TLS address, we should fold it instead.
  566. // This produces
  567. // movl %gs:0, %eax
  568. // leal i@NTPOFF(%eax), %eax
  569. // instead of
  570. // movl $i@NTPOFF, %eax
  571. // addl %gs:0, %eax
  572. // if the block also has an access to a second TLS address this will save
  573. // a load.
  574. // FIXME: This is probably also true for non-TLS addresses.
  575. if (Op1.getOpcode() == X86ISD::Wrapper) {
  576. SDValue Val = Op1.getOperand(0);
  577. if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
  578. return false;
  579. }
  580. // Don't fold load if this matches the BTS/BTR/BTC patterns.
  581. // BTS: (or X, (shl 1, n))
  582. // BTR: (and X, (rotl -2, n))
  583. // BTC: (xor X, (shl 1, n))
  584. if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) {
  585. if (U->getOperand(0).getOpcode() == ISD::SHL &&
  586. isOneConstant(U->getOperand(0).getOperand(0)))
  587. return false;
  588. if (U->getOperand(1).getOpcode() == ISD::SHL &&
  589. isOneConstant(U->getOperand(1).getOperand(0)))
  590. return false;
  591. }
  592. if (U->getOpcode() == ISD::AND) {
  593. SDValue U0 = U->getOperand(0);
  594. SDValue U1 = U->getOperand(1);
  595. if (U0.getOpcode() == ISD::ROTL) {
  596. auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0));
  597. if (C && C->getSExtValue() == -2)
  598. return false;
  599. }
  600. if (U1.getOpcode() == ISD::ROTL) {
  601. auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0));
  602. if (C && C->getSExtValue() == -2)
  603. return false;
  604. }
  605. }
  606. break;
  607. }
  608. case ISD::SHL:
  609. case ISD::SRA:
  610. case ISD::SRL:
  611. // Don't fold a load into a shift by immediate. The BMI2 instructions
  612. // support folding a load, but not an immediate. The legacy instructions
  613. // support folding an immediate, but can't fold a load. Folding an
  614. // immediate is preferable to folding a load.
  615. if (isa<ConstantSDNode>(U->getOperand(1)))
  616. return false;
  617. break;
  618. }
  619. }
  620. // Prevent folding a load if this can implemented with an insert_subreg or
  621. // a move that implicitly zeroes.
  622. if (Root->getOpcode() == ISD::INSERT_SUBVECTOR &&
  623. isNullConstant(Root->getOperand(2)) &&
  624. (Root->getOperand(0).isUndef() ||
  625. ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode())))
  626. return false;
  627. return true;
  628. }
  629. // Indicates it is profitable to form an AVX512 masked operation. Returning
  630. // false will favor a masked register-register masked move or vblendm and the
  631. // operation will be selected separately.
  632. bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const {
  633. assert(
  634. (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) &&
  635. "Unexpected opcode!");
  636. // If the operation has additional users, the operation will be duplicated.
  637. // Check the use count to prevent that.
  638. // FIXME: Are there cheap opcodes we might want to duplicate?
  639. return N->getOperand(1).hasOneUse();
  640. }
  641. /// Replace the original chain operand of the call with
  642. /// load's chain operand and move load below the call's chain operand.
  643. static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
  644. SDValue Call, SDValue OrigChain) {
  645. SmallVector<SDValue, 8> Ops;
  646. SDValue Chain = OrigChain.getOperand(0);
  647. if (Chain.getNode() == Load.getNode())
  648. Ops.push_back(Load.getOperand(0));
  649. else {
  650. assert(Chain.getOpcode() == ISD::TokenFactor &&
  651. "Unexpected chain operand");
  652. for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
  653. if (Chain.getOperand(i).getNode() == Load.getNode())
  654. Ops.push_back(Load.getOperand(0));
  655. else
  656. Ops.push_back(Chain.getOperand(i));
  657. SDValue NewChain =
  658. CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
  659. Ops.clear();
  660. Ops.push_back(NewChain);
  661. }
  662. Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
  663. CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
  664. CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
  665. Load.getOperand(1), Load.getOperand(2));
  666. Ops.clear();
  667. Ops.push_back(SDValue(Load.getNode(), 1));
  668. Ops.append(Call->op_begin() + 1, Call->op_end());
  669. CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
  670. }
  671. /// Return true if call address is a load and it can be
  672. /// moved below CALLSEQ_START and the chains leading up to the call.
  673. /// Return the CALLSEQ_START by reference as a second output.
  674. /// In the case of a tail call, there isn't a callseq node between the call
  675. /// chain and the load.
  676. static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
  677. // The transformation is somewhat dangerous if the call's chain was glued to
  678. // the call. After MoveBelowOrigChain the load is moved between the call and
  679. // the chain, this can create a cycle if the load is not folded. So it is
  680. // *really* important that we are sure the load will be folded.
  681. if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
  682. return false;
  683. LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
  684. if (!LD ||
  685. !LD->isSimple() ||
  686. LD->getAddressingMode() != ISD::UNINDEXED ||
  687. LD->getExtensionType() != ISD::NON_EXTLOAD)
  688. return false;
  689. // Now let's find the callseq_start.
  690. while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
  691. if (!Chain.hasOneUse())
  692. return false;
  693. Chain = Chain.getOperand(0);
  694. }
  695. if (!Chain.getNumOperands())
  696. return false;
  697. // Since we are not checking for AA here, conservatively abort if the chain
  698. // writes to memory. It's not safe to move the callee (a load) across a store.
  699. if (isa<MemSDNode>(Chain.getNode()) &&
  700. cast<MemSDNode>(Chain.getNode())->writeMem())
  701. return false;
  702. if (Chain.getOperand(0).getNode() == Callee.getNode())
  703. return true;
  704. if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
  705. Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
  706. Callee.getValue(1).hasOneUse())
  707. return true;
  708. return false;
  709. }
  710. static bool isEndbrImm64(uint64_t Imm) {
  711. // There may be some other prefix bytes between 0xF3 and 0x0F1EFA.
  712. // i.g: 0xF3660F1EFA, 0xF3670F1EFA
  713. if ((Imm & 0x00FFFFFF) != 0x0F1EFA)
  714. return false;
  715. uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64,
  716. 0x65, 0x66, 0x67, 0xf0, 0xf2};
  717. int i = 24; // 24bit 0x0F1EFA has matched
  718. while (i < 64) {
  719. uint8_t Byte = (Imm >> i) & 0xFF;
  720. if (Byte == 0xF3)
  721. return true;
  722. if (!llvm::is_contained(OptionalPrefixBytes, Byte))
  723. return false;
  724. i += 8;
  725. }
  726. return false;
  727. }
  728. void X86DAGToDAGISel::PreprocessISelDAG() {
  729. bool MadeChange = false;
  730. for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
  731. E = CurDAG->allnodes_end(); I != E; ) {
  732. SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
  733. // This is for CET enhancement.
  734. //
  735. // ENDBR32 and ENDBR64 have specific opcodes:
  736. // ENDBR32: F3 0F 1E FB
  737. // ENDBR64: F3 0F 1E FA
  738. // And we want that attackers won’t find unintended ENDBR32/64
  739. // opcode matches in the binary
  740. // Here’s an example:
  741. // If the compiler had to generate asm for the following code:
  742. // a = 0xF30F1EFA
  743. // it could, for example, generate:
  744. // mov 0xF30F1EFA, dword ptr[a]
  745. // In such a case, the binary would include a gadget that starts
  746. // with a fake ENDBR64 opcode. Therefore, we split such generation
  747. // into multiple operations, let it not shows in the binary
  748. if (N->getOpcode() == ISD::Constant) {
  749. MVT VT = N->getSimpleValueType(0);
  750. int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
  751. int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB;
  752. if (Imm == EndbrImm || isEndbrImm64(Imm)) {
  753. // Check that the cf-protection-branch is enabled.
  754. Metadata *CFProtectionBranch =
  755. MF->getMMI().getModule()->getModuleFlag("cf-protection-branch");
  756. if (CFProtectionBranch || IndirectBranchTracking) {
  757. SDLoc dl(N);
  758. SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true);
  759. Complement = CurDAG->getNOT(dl, Complement, VT);
  760. --I;
  761. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement);
  762. ++I;
  763. MadeChange = true;
  764. continue;
  765. }
  766. }
  767. }
  768. // If this is a target specific AND node with no flag usages, turn it back
  769. // into ISD::AND to enable test instruction matching.
  770. if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) {
  771. SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0),
  772. N->getOperand(0), N->getOperand(1));
  773. --I;
  774. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
  775. ++I;
  776. MadeChange = true;
  777. continue;
  778. }
  779. // Convert vector increment or decrement to sub/add with an all-ones
  780. // constant:
  781. // add X, <1, 1...> --> sub X, <-1, -1...>
  782. // sub X, <1, 1...> --> add X, <-1, -1...>
  783. // The all-ones vector constant can be materialized using a pcmpeq
  784. // instruction that is commonly recognized as an idiom (has no register
  785. // dependency), so that's better/smaller than loading a splat 1 constant.
  786. //
  787. // But don't do this if it would inhibit a potentially profitable load
  788. // folding opportunity for the other operand. That only occurs with the
  789. // intersection of:
  790. // (1) The other operand (op0) is load foldable.
  791. // (2) The op is an add (otherwise, we are *creating* an add and can still
  792. // load fold the other op).
  793. // (3) The target has AVX (otherwise, we have a destructive add and can't
  794. // load fold the other op without killing the constant op).
  795. // (4) The constant 1 vector has multiple uses (so it is profitable to load
  796. // into a register anyway).
  797. auto mayPreventLoadFold = [&]() {
  798. return X86::mayFoldLoad(N->getOperand(0), *Subtarget) &&
  799. N->getOpcode() == ISD::ADD && Subtarget->hasAVX() &&
  800. !N->getOperand(1).hasOneUse();
  801. };
  802. if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
  803. N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
  804. APInt SplatVal;
  805. if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
  806. SplatVal.isOne()) {
  807. SDLoc DL(N);
  808. MVT VT = N->getSimpleValueType(0);
  809. unsigned NumElts = VT.getSizeInBits() / 32;
  810. SDValue AllOnes =
  811. CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts));
  812. AllOnes = CurDAG->getBitcast(VT, AllOnes);
  813. unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
  814. SDValue Res =
  815. CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes);
  816. --I;
  817. CurDAG->ReplaceAllUsesWith(N, Res.getNode());
  818. ++I;
  819. MadeChange = true;
  820. continue;
  821. }
  822. }
  823. switch (N->getOpcode()) {
  824. case X86ISD::VBROADCAST: {
  825. MVT VT = N->getSimpleValueType(0);
  826. // Emulate v32i16/v64i8 broadcast without BWI.
  827. if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
  828. MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
  829. SDLoc dl(N);
  830. SDValue NarrowBCast =
  831. CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0));
  832. SDValue Res =
  833. CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
  834. NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
  835. unsigned Index = VT == MVT::v32i16 ? 16 : 32;
  836. Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
  837. CurDAG->getIntPtrConstant(Index, dl));
  838. --I;
  839. CurDAG->ReplaceAllUsesWith(N, Res.getNode());
  840. ++I;
  841. MadeChange = true;
  842. continue;
  843. }
  844. break;
  845. }
  846. case X86ISD::VBROADCAST_LOAD: {
  847. MVT VT = N->getSimpleValueType(0);
  848. // Emulate v32i16/v64i8 broadcast without BWI.
  849. if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) {
  850. MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8;
  851. auto *MemNode = cast<MemSDNode>(N);
  852. SDLoc dl(N);
  853. SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other);
  854. SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()};
  855. SDValue NarrowBCast = CurDAG->getMemIntrinsicNode(
  856. X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(),
  857. MemNode->getMemOperand());
  858. SDValue Res =
  859. CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT),
  860. NarrowBCast, CurDAG->getIntPtrConstant(0, dl));
  861. unsigned Index = VT == MVT::v32i16 ? 16 : 32;
  862. Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast,
  863. CurDAG->getIntPtrConstant(Index, dl));
  864. --I;
  865. SDValue To[] = {Res, NarrowBCast.getValue(1)};
  866. CurDAG->ReplaceAllUsesWith(N, To);
  867. ++I;
  868. MadeChange = true;
  869. continue;
  870. }
  871. break;
  872. }
  873. case ISD::VSELECT: {
  874. // Replace VSELECT with non-mask conditions with with BLENDV.
  875. if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
  876. break;
  877. assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
  878. SDValue Blendv =
  879. CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
  880. N->getOperand(0), N->getOperand(1), N->getOperand(2));
  881. --I;
  882. CurDAG->ReplaceAllUsesWith(N, Blendv.getNode());
  883. ++I;
  884. MadeChange = true;
  885. continue;
  886. }
  887. case ISD::FP_ROUND:
  888. case ISD::STRICT_FP_ROUND:
  889. case ISD::FP_TO_SINT:
  890. case ISD::FP_TO_UINT:
  891. case ISD::STRICT_FP_TO_SINT:
  892. case ISD::STRICT_FP_TO_UINT: {
  893. // Replace vector fp_to_s/uint with their X86 specific equivalent so we
  894. // don't need 2 sets of patterns.
  895. if (!N->getSimpleValueType(0).isVector())
  896. break;
  897. unsigned NewOpc;
  898. switch (N->getOpcode()) {
  899. default: llvm_unreachable("Unexpected opcode!");
  900. case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break;
  901. case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break;
  902. case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break;
  903. case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break;
  904. case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break;
  905. case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break;
  906. }
  907. SDValue Res;
  908. if (N->isStrictFPOpcode())
  909. Res =
  910. CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other},
  911. {N->getOperand(0), N->getOperand(1)});
  912. else
  913. Res =
  914. CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
  915. N->getOperand(0));
  916. --I;
  917. CurDAG->ReplaceAllUsesWith(N, Res.getNode());
  918. ++I;
  919. MadeChange = true;
  920. continue;
  921. }
  922. case ISD::SHL:
  923. case ISD::SRA:
  924. case ISD::SRL: {
  925. // Replace vector shifts with their X86 specific equivalent so we don't
  926. // need 2 sets of patterns.
  927. if (!N->getValueType(0).isVector())
  928. break;
  929. unsigned NewOpc;
  930. switch (N->getOpcode()) {
  931. default: llvm_unreachable("Unexpected opcode!");
  932. case ISD::SHL: NewOpc = X86ISD::VSHLV; break;
  933. case ISD::SRA: NewOpc = X86ISD::VSRAV; break;
  934. case ISD::SRL: NewOpc = X86ISD::VSRLV; break;
  935. }
  936. SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
  937. N->getOperand(0), N->getOperand(1));
  938. --I;
  939. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
  940. ++I;
  941. MadeChange = true;
  942. continue;
  943. }
  944. case ISD::ANY_EXTEND:
  945. case ISD::ANY_EXTEND_VECTOR_INREG: {
  946. // Replace vector any extend with the zero extend equivalents so we don't
  947. // need 2 sets of patterns. Ignore vXi1 extensions.
  948. if (!N->getValueType(0).isVector())
  949. break;
  950. unsigned NewOpc;
  951. if (N->getOperand(0).getScalarValueSizeInBits() == 1) {
  952. assert(N->getOpcode() == ISD::ANY_EXTEND &&
  953. "Unexpected opcode for mask vector!");
  954. NewOpc = ISD::SIGN_EXTEND;
  955. } else {
  956. NewOpc = N->getOpcode() == ISD::ANY_EXTEND
  957. ? ISD::ZERO_EXTEND
  958. : ISD::ZERO_EXTEND_VECTOR_INREG;
  959. }
  960. SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0),
  961. N->getOperand(0));
  962. --I;
  963. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
  964. ++I;
  965. MadeChange = true;
  966. continue;
  967. }
  968. case ISD::FCEIL:
  969. case ISD::STRICT_FCEIL:
  970. case ISD::FFLOOR:
  971. case ISD::STRICT_FFLOOR:
  972. case ISD::FTRUNC:
  973. case ISD::STRICT_FTRUNC:
  974. case ISD::FROUNDEVEN:
  975. case ISD::STRICT_FROUNDEVEN:
  976. case ISD::FNEARBYINT:
  977. case ISD::STRICT_FNEARBYINT:
  978. case ISD::FRINT:
  979. case ISD::STRICT_FRINT: {
  980. // Replace fp rounding with their X86 specific equivalent so we don't
  981. // need 2 sets of patterns.
  982. unsigned Imm;
  983. switch (N->getOpcode()) {
  984. default: llvm_unreachable("Unexpected opcode!");
  985. case ISD::STRICT_FCEIL:
  986. case ISD::FCEIL: Imm = 0xA; break;
  987. case ISD::STRICT_FFLOOR:
  988. case ISD::FFLOOR: Imm = 0x9; break;
  989. case ISD::STRICT_FTRUNC:
  990. case ISD::FTRUNC: Imm = 0xB; break;
  991. case ISD::STRICT_FROUNDEVEN:
  992. case ISD::FROUNDEVEN: Imm = 0x8; break;
  993. case ISD::STRICT_FNEARBYINT:
  994. case ISD::FNEARBYINT: Imm = 0xC; break;
  995. case ISD::STRICT_FRINT:
  996. case ISD::FRINT: Imm = 0x4; break;
  997. }
  998. SDLoc dl(N);
  999. bool IsStrict = N->isStrictFPOpcode();
  1000. SDValue Res;
  1001. if (IsStrict)
  1002. Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl,
  1003. {N->getValueType(0), MVT::Other},
  1004. {N->getOperand(0), N->getOperand(1),
  1005. CurDAG->getTargetConstant(Imm, dl, MVT::i32)});
  1006. else
  1007. Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0),
  1008. N->getOperand(0),
  1009. CurDAG->getTargetConstant(Imm, dl, MVT::i32));
  1010. --I;
  1011. CurDAG->ReplaceAllUsesWith(N, Res.getNode());
  1012. ++I;
  1013. MadeChange = true;
  1014. continue;
  1015. }
  1016. case X86ISD::FANDN:
  1017. case X86ISD::FAND:
  1018. case X86ISD::FOR:
  1019. case X86ISD::FXOR: {
  1020. // Widen scalar fp logic ops to vector to reduce isel patterns.
  1021. // FIXME: Can we do this during lowering/combine.
  1022. MVT VT = N->getSimpleValueType(0);
  1023. if (VT.isVector() || VT == MVT::f128)
  1024. break;
  1025. MVT VecVT = VT == MVT::f64 ? MVT::v2f64
  1026. : VT == MVT::f32 ? MVT::v4f32
  1027. : MVT::v8f16;
  1028. SDLoc dl(N);
  1029. SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
  1030. N->getOperand(0));
  1031. SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
  1032. N->getOperand(1));
  1033. SDValue Res;
  1034. if (Subtarget->hasSSE2()) {
  1035. EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger();
  1036. Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0);
  1037. Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1);
  1038. unsigned Opc;
  1039. switch (N->getOpcode()) {
  1040. default: llvm_unreachable("Unexpected opcode!");
  1041. case X86ISD::FANDN: Opc = X86ISD::ANDNP; break;
  1042. case X86ISD::FAND: Opc = ISD::AND; break;
  1043. case X86ISD::FOR: Opc = ISD::OR; break;
  1044. case X86ISD::FXOR: Opc = ISD::XOR; break;
  1045. }
  1046. Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1);
  1047. Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res);
  1048. } else {
  1049. Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1);
  1050. }
  1051. Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res,
  1052. CurDAG->getIntPtrConstant(0, dl));
  1053. --I;
  1054. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
  1055. ++I;
  1056. MadeChange = true;
  1057. continue;
  1058. }
  1059. }
  1060. if (OptLevel != CodeGenOpt::None &&
  1061. // Only do this when the target can fold the load into the call or
  1062. // jmp.
  1063. !Subtarget->useIndirectThunkCalls() &&
  1064. ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
  1065. (N->getOpcode() == X86ISD::TC_RETURN &&
  1066. (Subtarget->is64Bit() ||
  1067. !getTargetMachine().isPositionIndependent())))) {
  1068. /// Also try moving call address load from outside callseq_start to just
  1069. /// before the call to allow it to be folded.
  1070. ///
  1071. /// [Load chain]
  1072. /// ^
  1073. /// |
  1074. /// [Load]
  1075. /// ^ ^
  1076. /// | |
  1077. /// / \--
  1078. /// / |
  1079. ///[CALLSEQ_START] |
  1080. /// ^ |
  1081. /// | |
  1082. /// [LOAD/C2Reg] |
  1083. /// | |
  1084. /// \ /
  1085. /// \ /
  1086. /// [CALL]
  1087. bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
  1088. SDValue Chain = N->getOperand(0);
  1089. SDValue Load = N->getOperand(1);
  1090. if (!isCalleeLoad(Load, Chain, HasCallSeq))
  1091. continue;
  1092. moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
  1093. ++NumLoadMoved;
  1094. MadeChange = true;
  1095. continue;
  1096. }
  1097. // Lower fpround and fpextend nodes that target the FP stack to be store and
  1098. // load to the stack. This is a gross hack. We would like to simply mark
  1099. // these as being illegal, but when we do that, legalize produces these when
  1100. // it expands calls, then expands these in the same legalize pass. We would
  1101. // like dag combine to be able to hack on these between the call expansion
  1102. // and the node legalization. As such this pass basically does "really
  1103. // late" legalization of these inline with the X86 isel pass.
  1104. // FIXME: This should only happen when not compiled with -O0.
  1105. switch (N->getOpcode()) {
  1106. default: continue;
  1107. case ISD::FP_ROUND:
  1108. case ISD::FP_EXTEND:
  1109. {
  1110. MVT SrcVT = N->getOperand(0).getSimpleValueType();
  1111. MVT DstVT = N->getSimpleValueType(0);
  1112. // If any of the sources are vectors, no fp stack involved.
  1113. if (SrcVT.isVector() || DstVT.isVector())
  1114. continue;
  1115. // If the source and destination are SSE registers, then this is a legal
  1116. // conversion that should not be lowered.
  1117. const X86TargetLowering *X86Lowering =
  1118. static_cast<const X86TargetLowering *>(TLI);
  1119. bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
  1120. bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
  1121. if (SrcIsSSE && DstIsSSE)
  1122. continue;
  1123. if (!SrcIsSSE && !DstIsSSE) {
  1124. // If this is an FPStack extension, it is a noop.
  1125. if (N->getOpcode() == ISD::FP_EXTEND)
  1126. continue;
  1127. // If this is a value-preserving FPStack truncation, it is a noop.
  1128. if (N->getConstantOperandVal(1))
  1129. continue;
  1130. }
  1131. // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
  1132. // FPStack has extload and truncstore. SSE can fold direct loads into other
  1133. // operations. Based on this, decide what we want to do.
  1134. MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT;
  1135. SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
  1136. int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
  1137. MachinePointerInfo MPI =
  1138. MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
  1139. SDLoc dl(N);
  1140. // FIXME: optimize the case where the src/dest is a load or store?
  1141. SDValue Store = CurDAG->getTruncStore(
  1142. CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT);
  1143. SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store,
  1144. MemTmp, MPI, MemVT);
  1145. // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
  1146. // extload we created. This will cause general havok on the dag because
  1147. // anything below the conversion could be folded into other existing nodes.
  1148. // To avoid invalidating 'I', back it up to the convert node.
  1149. --I;
  1150. CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
  1151. break;
  1152. }
  1153. //The sequence of events for lowering STRICT_FP versions of these nodes requires
  1154. //dealing with the chain differently, as there is already a preexisting chain.
  1155. case ISD::STRICT_FP_ROUND:
  1156. case ISD::STRICT_FP_EXTEND:
  1157. {
  1158. MVT SrcVT = N->getOperand(1).getSimpleValueType();
  1159. MVT DstVT = N->getSimpleValueType(0);
  1160. // If any of the sources are vectors, no fp stack involved.
  1161. if (SrcVT.isVector() || DstVT.isVector())
  1162. continue;
  1163. // If the source and destination are SSE registers, then this is a legal
  1164. // conversion that should not be lowered.
  1165. const X86TargetLowering *X86Lowering =
  1166. static_cast<const X86TargetLowering *>(TLI);
  1167. bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
  1168. bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
  1169. if (SrcIsSSE && DstIsSSE)
  1170. continue;
  1171. if (!SrcIsSSE && !DstIsSSE) {
  1172. // If this is an FPStack extension, it is a noop.
  1173. if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
  1174. continue;
  1175. // If this is a value-preserving FPStack truncation, it is a noop.
  1176. if (N->getConstantOperandVal(2))
  1177. continue;
  1178. }
  1179. // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
  1180. // FPStack has extload and truncstore. SSE can fold direct loads into other
  1181. // operations. Based on this, decide what we want to do.
  1182. MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT;
  1183. SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
  1184. int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex();
  1185. MachinePointerInfo MPI =
  1186. MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI);
  1187. SDLoc dl(N);
  1188. // FIXME: optimize the case where the src/dest is a load or store?
  1189. //Since the operation is StrictFP, use the preexisting chain.
  1190. SDValue Store, Result;
  1191. if (!SrcIsSSE) {
  1192. SDVTList VTs = CurDAG->getVTList(MVT::Other);
  1193. SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp};
  1194. Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT,
  1195. MPI, /*Align*/ None,
  1196. MachineMemOperand::MOStore);
  1197. if (N->getFlags().hasNoFPExcept()) {
  1198. SDNodeFlags Flags = Store->getFlags();
  1199. Flags.setNoFPExcept(true);
  1200. Store->setFlags(Flags);
  1201. }
  1202. } else {
  1203. assert(SrcVT == MemVT && "Unexpected VT!");
  1204. Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp,
  1205. MPI);
  1206. }
  1207. if (!DstIsSSE) {
  1208. SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other);
  1209. SDValue Ops[] = {Store, MemTmp};
  1210. Result = CurDAG->getMemIntrinsicNode(
  1211. X86ISD::FLD, dl, VTs, Ops, MemVT, MPI,
  1212. /*Align*/ None, MachineMemOperand::MOLoad);
  1213. if (N->getFlags().hasNoFPExcept()) {
  1214. SDNodeFlags Flags = Result->getFlags();
  1215. Flags.setNoFPExcept(true);
  1216. Result->setFlags(Flags);
  1217. }
  1218. } else {
  1219. assert(DstVT == MemVT && "Unexpected VT!");
  1220. Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI);
  1221. }
  1222. // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
  1223. // extload we created. This will cause general havok on the dag because
  1224. // anything below the conversion could be folded into other existing nodes.
  1225. // To avoid invalidating 'I', back it up to the convert node.
  1226. --I;
  1227. CurDAG->ReplaceAllUsesWith(N, Result.getNode());
  1228. break;
  1229. }
  1230. }
  1231. // Now that we did that, the node is dead. Increment the iterator to the
  1232. // next node to process, then delete N.
  1233. ++I;
  1234. MadeChange = true;
  1235. }
  1236. // Remove any dead nodes that may have been left behind.
  1237. if (MadeChange)
  1238. CurDAG->RemoveDeadNodes();
  1239. }
  1240. // Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
  1241. bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
  1242. unsigned Opc = N->getMachineOpcode();
  1243. if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
  1244. Opc != X86::MOVSX64rr8)
  1245. return false;
  1246. SDValue N0 = N->getOperand(0);
  1247. // We need to be extracting the lower bit of an extend.
  1248. if (!N0.isMachineOpcode() ||
  1249. N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
  1250. N0.getConstantOperandVal(1) != X86::sub_8bit)
  1251. return false;
  1252. // We're looking for either a movsx or movzx to match the original opcode.
  1253. unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
  1254. : X86::MOVSX32rr8_NOREX;
  1255. SDValue N00 = N0.getOperand(0);
  1256. if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
  1257. return false;
  1258. if (Opc == X86::MOVSX64rr8) {
  1259. // If we had a sign extend from 8 to 64 bits. We still need to go from 32
  1260. // to 64.
  1261. MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
  1262. MVT::i64, N00);
  1263. ReplaceUses(N, Extend);
  1264. } else {
  1265. // Ok we can drop this extend and just use the original extend.
  1266. ReplaceUses(N, N00.getNode());
  1267. }
  1268. return true;
  1269. }
  1270. void X86DAGToDAGISel::PostprocessISelDAG() {
  1271. // Skip peepholes at -O0.
  1272. if (TM.getOptLevel() == CodeGenOpt::None)
  1273. return;
  1274. SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
  1275. bool MadeChange = false;
  1276. while (Position != CurDAG->allnodes_begin()) {
  1277. SDNode *N = &*--Position;
  1278. // Skip dead nodes and any non-machine opcodes.
  1279. if (N->use_empty() || !N->isMachineOpcode())
  1280. continue;
  1281. if (tryOptimizeRem8Extend(N)) {
  1282. MadeChange = true;
  1283. continue;
  1284. }
  1285. // Look for a TESTrr+ANDrr pattern where both operands of the test are
  1286. // the same. Rewrite to remove the AND.
  1287. unsigned Opc = N->getMachineOpcode();
  1288. if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
  1289. Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
  1290. N->getOperand(0) == N->getOperand(1) &&
  1291. N->isOnlyUserOf(N->getOperand(0).getNode()) &&
  1292. N->getOperand(0).isMachineOpcode()) {
  1293. SDValue And = N->getOperand(0);
  1294. unsigned N0Opc = And.getMachineOpcode();
  1295. if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
  1296. N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
  1297. MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
  1298. MVT::i32,
  1299. And.getOperand(0),
  1300. And.getOperand(1));
  1301. ReplaceUses(N, Test);
  1302. MadeChange = true;
  1303. continue;
  1304. }
  1305. if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
  1306. N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
  1307. unsigned NewOpc;
  1308. switch (N0Opc) {
  1309. case X86::AND8rm: NewOpc = X86::TEST8mr; break;
  1310. case X86::AND16rm: NewOpc = X86::TEST16mr; break;
  1311. case X86::AND32rm: NewOpc = X86::TEST32mr; break;
  1312. case X86::AND64rm: NewOpc = X86::TEST64mr; break;
  1313. }
  1314. // Need to swap the memory and register operand.
  1315. SDValue Ops[] = { And.getOperand(1),
  1316. And.getOperand(2),
  1317. And.getOperand(3),
  1318. And.getOperand(4),
  1319. And.getOperand(5),
  1320. And.getOperand(0),
  1321. And.getOperand(6) /* Chain */ };
  1322. MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N),
  1323. MVT::i32, MVT::Other, Ops);
  1324. CurDAG->setNodeMemRefs(
  1325. Test, cast<MachineSDNode>(And.getNode())->memoperands());
  1326. ReplaceUses(N, Test);
  1327. MadeChange = true;
  1328. continue;
  1329. }
  1330. }
  1331. // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is
  1332. // used. We're doing this late so we can prefer to fold the AND into masked
  1333. // comparisons. Doing that can be better for the live range of the mask
  1334. // register.
  1335. if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr ||
  1336. Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) &&
  1337. N->getOperand(0) == N->getOperand(1) &&
  1338. N->isOnlyUserOf(N->getOperand(0).getNode()) &&
  1339. N->getOperand(0).isMachineOpcode() &&
  1340. onlyUsesZeroFlag(SDValue(N, 0))) {
  1341. SDValue And = N->getOperand(0);
  1342. unsigned N0Opc = And.getMachineOpcode();
  1343. // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other
  1344. // KAND instructions and KTEST use the same ISA feature.
  1345. if (N0Opc == X86::KANDBrr ||
  1346. (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) ||
  1347. N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) {
  1348. unsigned NewOpc;
  1349. switch (Opc) {
  1350. default: llvm_unreachable("Unexpected opcode!");
  1351. case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break;
  1352. case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break;
  1353. case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break;
  1354. case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break;
  1355. }
  1356. MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N),
  1357. MVT::i32,
  1358. And.getOperand(0),
  1359. And.getOperand(1));
  1360. ReplaceUses(N, KTest);
  1361. MadeChange = true;
  1362. continue;
  1363. }
  1364. }
  1365. // Attempt to remove vectors moves that were inserted to zero upper bits.
  1366. if (Opc != TargetOpcode::SUBREG_TO_REG)
  1367. continue;
  1368. unsigned SubRegIdx = N->getConstantOperandVal(2);
  1369. if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm)
  1370. continue;
  1371. SDValue Move = N->getOperand(1);
  1372. if (!Move.isMachineOpcode())
  1373. continue;
  1374. // Make sure its one of the move opcodes we recognize.
  1375. switch (Move.getMachineOpcode()) {
  1376. default:
  1377. continue;
  1378. case X86::VMOVAPDrr: case X86::VMOVUPDrr:
  1379. case X86::VMOVAPSrr: case X86::VMOVUPSrr:
  1380. case X86::VMOVDQArr: case X86::VMOVDQUrr:
  1381. case X86::VMOVAPDYrr: case X86::VMOVUPDYrr:
  1382. case X86::VMOVAPSYrr: case X86::VMOVUPSYrr:
  1383. case X86::VMOVDQAYrr: case X86::VMOVDQUYrr:
  1384. case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr:
  1385. case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr:
  1386. case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr:
  1387. case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr:
  1388. case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr:
  1389. case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr:
  1390. case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr:
  1391. case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr:
  1392. break;
  1393. }
  1394. SDValue In = Move.getOperand(0);
  1395. if (!In.isMachineOpcode() ||
  1396. In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END)
  1397. continue;
  1398. // Make sure the instruction has a VEX, XOP, or EVEX prefix. This covers
  1399. // the SHA instructions which use a legacy encoding.
  1400. uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags;
  1401. if ((TSFlags & X86II::EncodingMask) != X86II::VEX &&
  1402. (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
  1403. (TSFlags & X86II::EncodingMask) != X86II::XOP)
  1404. continue;
  1405. // Producing instruction is another vector instruction. We can drop the
  1406. // move.
  1407. CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
  1408. MadeChange = true;
  1409. }
  1410. if (MadeChange)
  1411. CurDAG->RemoveDeadNodes();
  1412. }
  1413. /// Emit any code that needs to be executed only in the main function.
  1414. void X86DAGToDAGISel::emitSpecialCodeForMain() {
  1415. if (Subtarget->isTargetCygMing()) {
  1416. TargetLowering::ArgListTy Args;
  1417. auto &DL = CurDAG->getDataLayout();
  1418. TargetLowering::CallLoweringInfo CLI(*CurDAG);
  1419. CLI.setChain(CurDAG->getRoot())
  1420. .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
  1421. CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
  1422. std::move(Args));
  1423. const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
  1424. std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
  1425. CurDAG->setRoot(Result.second);
  1426. }
  1427. }
  1428. void X86DAGToDAGISel::emitFunctionEntryCode() {
  1429. // If this is main, emit special code for main.
  1430. const Function &F = MF->getFunction();
  1431. if (F.hasExternalLinkage() && F.getName() == "main")
  1432. emitSpecialCodeForMain();
  1433. }
  1434. static bool isDispSafeForFrameIndex(int64_t Val) {
  1435. // On 64-bit platforms, we can run into an issue where a frame index
  1436. // includes a displacement that, when added to the explicit displacement,
  1437. // will overflow the displacement field. Assuming that the frame index
  1438. // displacement fits into a 31-bit integer (which is only slightly more
  1439. // aggressive than the current fundamental assumption that it fits into
  1440. // a 32-bit integer), a 31-bit disp should always be safe.
  1441. return isInt<31>(Val);
  1442. }
  1443. bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
  1444. X86ISelAddressMode &AM) {
  1445. // We may have already matched a displacement and the caller just added the
  1446. // symbolic displacement. So we still need to do the checks even if Offset
  1447. // is zero.
  1448. int64_t Val = AM.Disp + Offset;
  1449. // Cannot combine ExternalSymbol displacements with integer offsets.
  1450. if (Val != 0 && (AM.ES || AM.MCSym))
  1451. return true;
  1452. CodeModel::Model M = TM.getCodeModel();
  1453. if (Subtarget->is64Bit()) {
  1454. if (Val != 0 &&
  1455. !X86::isOffsetSuitableForCodeModel(Val, M,
  1456. AM.hasSymbolicDisplacement()))
  1457. return true;
  1458. // In addition to the checks required for a register base, check that
  1459. // we do not try to use an unsafe Disp with a frame index.
  1460. if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
  1461. !isDispSafeForFrameIndex(Val))
  1462. return true;
  1463. }
  1464. AM.Disp = Val;
  1465. return false;
  1466. }
  1467. bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
  1468. bool AllowSegmentRegForX32) {
  1469. SDValue Address = N->getOperand(1);
  1470. // load gs:0 -> GS segment register.
  1471. // load fs:0 -> FS segment register.
  1472. //
  1473. // This optimization is generally valid because the GNU TLS model defines that
  1474. // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
  1475. // with 32-bit registers, as we get in ILP32 mode, those registers are first
  1476. // zero-extended to 64 bits and then added it to the base address, which gives
  1477. // unwanted results when the register holds a negative value.
  1478. // For more information see http://people.redhat.com/drepper/tls.pdf
  1479. if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) {
  1480. if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
  1481. !IndirectTlsSegRefs &&
  1482. (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
  1483. Subtarget->isTargetFuchsia())) {
  1484. if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
  1485. return true;
  1486. switch (N->getPointerInfo().getAddrSpace()) {
  1487. case X86AS::GS:
  1488. AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
  1489. return false;
  1490. case X86AS::FS:
  1491. AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
  1492. return false;
  1493. // Address space X86AS::SS is not handled here, because it is not used to
  1494. // address TLS areas.
  1495. }
  1496. }
  1497. }
  1498. return true;
  1499. }
  1500. /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
  1501. /// mode. These wrap things that will resolve down into a symbol reference.
  1502. /// If no match is possible, this returns true, otherwise it returns false.
  1503. bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
  1504. // If the addressing mode already has a symbol as the displacement, we can
  1505. // never match another symbol.
  1506. if (AM.hasSymbolicDisplacement())
  1507. return true;
  1508. bool IsRIPRelTLS = false;
  1509. bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
  1510. if (IsRIPRel) {
  1511. SDValue Val = N.getOperand(0);
  1512. if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
  1513. IsRIPRelTLS = true;
  1514. }
  1515. // We can't use an addressing mode in the 64-bit large code model.
  1516. // Global TLS addressing is an exception. In the medium code model,
  1517. // we use can use a mode when RIP wrappers are present.
  1518. // That signifies access to globals that are known to be "near",
  1519. // such as the GOT itself.
  1520. CodeModel::Model M = TM.getCodeModel();
  1521. if (Subtarget->is64Bit() &&
  1522. ((M == CodeModel::Large && !IsRIPRelTLS) ||
  1523. (M == CodeModel::Medium && !IsRIPRel)))
  1524. return true;
  1525. // Base and index reg must be 0 in order to use %rip as base.
  1526. if (IsRIPRel && AM.hasBaseOrIndexReg())
  1527. return true;
  1528. // Make a local copy in case we can't do this fold.
  1529. X86ISelAddressMode Backup = AM;
  1530. int64_t Offset = 0;
  1531. SDValue N0 = N.getOperand(0);
  1532. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
  1533. AM.GV = G->getGlobal();
  1534. AM.SymbolFlags = G->getTargetFlags();
  1535. Offset = G->getOffset();
  1536. } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
  1537. AM.CP = CP->getConstVal();
  1538. AM.Alignment = CP->getAlign();
  1539. AM.SymbolFlags = CP->getTargetFlags();
  1540. Offset = CP->getOffset();
  1541. } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
  1542. AM.ES = S->getSymbol();
  1543. AM.SymbolFlags = S->getTargetFlags();
  1544. } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
  1545. AM.MCSym = S->getMCSymbol();
  1546. } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
  1547. AM.JT = J->getIndex();
  1548. AM.SymbolFlags = J->getTargetFlags();
  1549. } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
  1550. AM.BlockAddr = BA->getBlockAddress();
  1551. AM.SymbolFlags = BA->getTargetFlags();
  1552. Offset = BA->getOffset();
  1553. } else
  1554. llvm_unreachable("Unhandled symbol reference node.");
  1555. if (foldOffsetIntoAddress(Offset, AM)) {
  1556. AM = Backup;
  1557. return true;
  1558. }
  1559. if (IsRIPRel)
  1560. AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
  1561. // Commit the changes now that we know this fold is safe.
  1562. return false;
  1563. }
  1564. /// Add the specified node to the specified addressing mode, returning true if
  1565. /// it cannot be done. This just pattern matches for the addressing mode.
  1566. bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
  1567. if (matchAddressRecursively(N, AM, 0))
  1568. return true;
  1569. // Post-processing: Make a second attempt to fold a load, if we now know
  1570. // that there will not be any other register. This is only performed for
  1571. // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
  1572. // any foldable load the first time.
  1573. if (Subtarget->isTarget64BitILP32() &&
  1574. AM.BaseType == X86ISelAddressMode::RegBase &&
  1575. AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) {
  1576. SDValue Save_Base_Reg = AM.Base_Reg;
  1577. if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
  1578. AM.Base_Reg = SDValue();
  1579. if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true))
  1580. AM.Base_Reg = Save_Base_Reg;
  1581. }
  1582. }
  1583. // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
  1584. // a smaller encoding and avoids a scaled-index.
  1585. if (AM.Scale == 2 &&
  1586. AM.BaseType == X86ISelAddressMode::RegBase &&
  1587. AM.Base_Reg.getNode() == nullptr) {
  1588. AM.Base_Reg = AM.IndexReg;
  1589. AM.Scale = 1;
  1590. }
  1591. // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
  1592. // because it has a smaller encoding.
  1593. // TODO: Which other code models can use this?
  1594. switch (TM.getCodeModel()) {
  1595. default: break;
  1596. case CodeModel::Small:
  1597. case CodeModel::Kernel:
  1598. if (Subtarget->is64Bit() &&
  1599. AM.Scale == 1 &&
  1600. AM.BaseType == X86ISelAddressMode::RegBase &&
  1601. AM.Base_Reg.getNode() == nullptr &&
  1602. AM.IndexReg.getNode() == nullptr &&
  1603. AM.SymbolFlags == X86II::MO_NO_FLAG &&
  1604. AM.hasSymbolicDisplacement())
  1605. AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
  1606. break;
  1607. }
  1608. return false;
  1609. }
  1610. bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
  1611. unsigned Depth) {
  1612. // Add an artificial use to this node so that we can keep track of
  1613. // it if it gets CSE'd with a different node.
  1614. HandleSDNode Handle(N);
  1615. X86ISelAddressMode Backup = AM;
  1616. if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
  1617. !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
  1618. return false;
  1619. AM = Backup;
  1620. // Try again after commutating the operands.
  1621. if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM,
  1622. Depth + 1) &&
  1623. !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1))
  1624. return false;
  1625. AM = Backup;
  1626. // If we couldn't fold both operands into the address at the same time,
  1627. // see if we can just put each operand into a register and fold at least
  1628. // the add.
  1629. if (AM.BaseType == X86ISelAddressMode::RegBase &&
  1630. !AM.Base_Reg.getNode() &&
  1631. !AM.IndexReg.getNode()) {
  1632. N = Handle.getValue();
  1633. AM.Base_Reg = N.getOperand(0);
  1634. AM.IndexReg = N.getOperand(1);
  1635. AM.Scale = 1;
  1636. return false;
  1637. }
  1638. N = Handle.getValue();
  1639. return true;
  1640. }
  1641. // Insert a node into the DAG at least before the Pos node's position. This
  1642. // will reposition the node as needed, and will assign it a node ID that is <=
  1643. // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
  1644. // IDs! The selection DAG must no longer depend on their uniqueness when this
  1645. // is used.
  1646. static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
  1647. if (N->getNodeId() == -1 ||
  1648. (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) >
  1649. SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) {
  1650. DAG.RepositionNode(Pos->getIterator(), N.getNode());
  1651. // Mark Node as invalid for pruning as after this it may be a successor to a
  1652. // selected node but otherwise be in the same position of Pos.
  1653. // Conservatively mark it with the same -abs(Id) to assure node id
  1654. // invariant is preserved.
  1655. N->setNodeId(Pos->getNodeId());
  1656. SelectionDAGISel::InvalidateNodeId(N.getNode());
  1657. }
  1658. }
  1659. // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
  1660. // safe. This allows us to convert the shift and and into an h-register
  1661. // extract and a scaled index. Returns false if the simplification is
  1662. // performed.
  1663. static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
  1664. uint64_t Mask,
  1665. SDValue Shift, SDValue X,
  1666. X86ISelAddressMode &AM) {
  1667. if (Shift.getOpcode() != ISD::SRL ||
  1668. !isa<ConstantSDNode>(Shift.getOperand(1)) ||
  1669. !Shift.hasOneUse())
  1670. return true;
  1671. int ScaleLog = 8 - Shift.getConstantOperandVal(1);
  1672. if (ScaleLog <= 0 || ScaleLog >= 4 ||
  1673. Mask != (0xffu << ScaleLog))
  1674. return true;
  1675. MVT VT = N.getSimpleValueType();
  1676. SDLoc DL(N);
  1677. SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
  1678. SDValue NewMask = DAG.getConstant(0xff, DL, VT);
  1679. SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
  1680. SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
  1681. SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
  1682. SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
  1683. // Insert the new nodes into the topological ordering. We must do this in
  1684. // a valid topological ordering as nothing is going to go back and re-sort
  1685. // these nodes. We continually insert before 'N' in sequence as this is
  1686. // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
  1687. // hierarchy left to express.
  1688. insertDAGNode(DAG, N, Eight);
  1689. insertDAGNode(DAG, N, Srl);
  1690. insertDAGNode(DAG, N, NewMask);
  1691. insertDAGNode(DAG, N, And);
  1692. insertDAGNode(DAG, N, ShlCount);
  1693. insertDAGNode(DAG, N, Shl);
  1694. DAG.ReplaceAllUsesWith(N, Shl);
  1695. DAG.RemoveDeadNode(N.getNode());
  1696. AM.IndexReg = And;
  1697. AM.Scale = (1 << ScaleLog);
  1698. return false;
  1699. }
  1700. // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
  1701. // allows us to fold the shift into this addressing mode. Returns false if the
  1702. // transform succeeded.
  1703. static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
  1704. X86ISelAddressMode &AM) {
  1705. SDValue Shift = N.getOperand(0);
  1706. // Use a signed mask so that shifting right will insert sign bits. These
  1707. // bits will be removed when we shift the result left so it doesn't matter
  1708. // what we use. This might allow a smaller immediate encoding.
  1709. int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue();
  1710. // If we have an any_extend feeding the AND, look through it to see if there
  1711. // is a shift behind it. But only if the AND doesn't use the extended bits.
  1712. // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
  1713. bool FoundAnyExtend = false;
  1714. if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
  1715. Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
  1716. isUInt<32>(Mask)) {
  1717. FoundAnyExtend = true;
  1718. Shift = Shift.getOperand(0);
  1719. }
  1720. if (Shift.getOpcode() != ISD::SHL ||
  1721. !isa<ConstantSDNode>(Shift.getOperand(1)))
  1722. return true;
  1723. SDValue X = Shift.getOperand(0);
  1724. // Not likely to be profitable if either the AND or SHIFT node has more
  1725. // than one use (unless all uses are for address computation). Besides,
  1726. // isel mechanism requires their node ids to be reused.
  1727. if (!N.hasOneUse() || !Shift.hasOneUse())
  1728. return true;
  1729. // Verify that the shift amount is something we can fold.
  1730. unsigned ShiftAmt = Shift.getConstantOperandVal(1);
  1731. if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
  1732. return true;
  1733. MVT VT = N.getSimpleValueType();
  1734. SDLoc DL(N);
  1735. if (FoundAnyExtend) {
  1736. SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
  1737. insertDAGNode(DAG, N, NewX);
  1738. X = NewX;
  1739. }
  1740. SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
  1741. SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
  1742. SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
  1743. // Insert the new nodes into the topological ordering. We must do this in
  1744. // a valid topological ordering as nothing is going to go back and re-sort
  1745. // these nodes. We continually insert before 'N' in sequence as this is
  1746. // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
  1747. // hierarchy left to express.
  1748. insertDAGNode(DAG, N, NewMask);
  1749. insertDAGNode(DAG, N, NewAnd);
  1750. insertDAGNode(DAG, N, NewShift);
  1751. DAG.ReplaceAllUsesWith(N, NewShift);
  1752. DAG.RemoveDeadNode(N.getNode());
  1753. AM.Scale = 1 << ShiftAmt;
  1754. AM.IndexReg = NewAnd;
  1755. return false;
  1756. }
  1757. // Implement some heroics to detect shifts of masked values where the mask can
  1758. // be replaced by extending the shift and undoing that in the addressing mode
  1759. // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
  1760. // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
  1761. // the addressing mode. This results in code such as:
  1762. //
  1763. // int f(short *y, int *lookup_table) {
  1764. // ...
  1765. // return *y + lookup_table[*y >> 11];
  1766. // }
  1767. //
  1768. // Turning into:
  1769. // movzwl (%rdi), %eax
  1770. // movl %eax, %ecx
  1771. // shrl $11, %ecx
  1772. // addl (%rsi,%rcx,4), %eax
  1773. //
  1774. // Instead of:
  1775. // movzwl (%rdi), %eax
  1776. // movl %eax, %ecx
  1777. // shrl $9, %ecx
  1778. // andl $124, %rcx
  1779. // addl (%rsi,%rcx), %eax
  1780. //
  1781. // Note that this function assumes the mask is provided as a mask *after* the
  1782. // value is shifted. The input chain may or may not match that, but computing
  1783. // such a mask is trivial.
  1784. static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
  1785. uint64_t Mask,
  1786. SDValue Shift, SDValue X,
  1787. X86ISelAddressMode &AM) {
  1788. if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
  1789. !isa<ConstantSDNode>(Shift.getOperand(1)))
  1790. return true;
  1791. unsigned ShiftAmt = Shift.getConstantOperandVal(1);
  1792. unsigned MaskLZ = countLeadingZeros(Mask);
  1793. unsigned MaskTZ = countTrailingZeros(Mask);
  1794. // The amount of shift we're trying to fit into the addressing mode is taken
  1795. // from the trailing zeros of the mask.
  1796. unsigned AMShiftAmt = MaskTZ;
  1797. // There is nothing we can do here unless the mask is removing some bits.
  1798. // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
  1799. if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
  1800. // We also need to ensure that mask is a continuous run of bits.
  1801. if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
  1802. // Scale the leading zero count down based on the actual size of the value.
  1803. // Also scale it down based on the size of the shift.
  1804. unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
  1805. if (MaskLZ < ScaleDown)
  1806. return true;
  1807. MaskLZ -= ScaleDown;
  1808. // The final check is to ensure that any masked out high bits of X are
  1809. // already known to be zero. Otherwise, the mask has a semantic impact
  1810. // other than masking out a couple of low bits. Unfortunately, because of
  1811. // the mask, zero extensions will be removed from operands in some cases.
  1812. // This code works extra hard to look through extensions because we can
  1813. // replace them with zero extensions cheaply if necessary.
  1814. bool ReplacingAnyExtend = false;
  1815. if (X.getOpcode() == ISD::ANY_EXTEND) {
  1816. unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
  1817. X.getOperand(0).getSimpleValueType().getSizeInBits();
  1818. // Assume that we'll replace the any-extend with a zero-extend, and
  1819. // narrow the search to the extended value.
  1820. X = X.getOperand(0);
  1821. MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
  1822. ReplacingAnyExtend = true;
  1823. }
  1824. APInt MaskedHighBits =
  1825. APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
  1826. KnownBits Known = DAG.computeKnownBits(X);
  1827. if (MaskedHighBits != Known.Zero) return true;
  1828. // We've identified a pattern that can be transformed into a single shift
  1829. // and an addressing mode. Make it so.
  1830. MVT VT = N.getSimpleValueType();
  1831. if (ReplacingAnyExtend) {
  1832. assert(X.getValueType() != VT);
  1833. // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
  1834. SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
  1835. insertDAGNode(DAG, N, NewX);
  1836. X = NewX;
  1837. }
  1838. SDLoc DL(N);
  1839. SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
  1840. SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
  1841. SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
  1842. SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
  1843. // Insert the new nodes into the topological ordering. We must do this in
  1844. // a valid topological ordering as nothing is going to go back and re-sort
  1845. // these nodes. We continually insert before 'N' in sequence as this is
  1846. // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
  1847. // hierarchy left to express.
  1848. insertDAGNode(DAG, N, NewSRLAmt);
  1849. insertDAGNode(DAG, N, NewSRL);
  1850. insertDAGNode(DAG, N, NewSHLAmt);
  1851. insertDAGNode(DAG, N, NewSHL);
  1852. DAG.ReplaceAllUsesWith(N, NewSHL);
  1853. DAG.RemoveDeadNode(N.getNode());
  1854. AM.Scale = 1 << AMShiftAmt;
  1855. AM.IndexReg = NewSRL;
  1856. return false;
  1857. }
  1858. // Transform "(X >> SHIFT) & (MASK << C1)" to
  1859. // "((X >> (SHIFT + C1)) & (MASK)) << C1". Everything before the SHL will be
  1860. // matched to a BEXTR later. Returns false if the simplification is performed.
  1861. static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
  1862. uint64_t Mask,
  1863. SDValue Shift, SDValue X,
  1864. X86ISelAddressMode &AM,
  1865. const X86Subtarget &Subtarget) {
  1866. if (Shift.getOpcode() != ISD::SRL ||
  1867. !isa<ConstantSDNode>(Shift.getOperand(1)) ||
  1868. !Shift.hasOneUse() || !N.hasOneUse())
  1869. return true;
  1870. // Only do this if BEXTR will be matched by matchBEXTRFromAndImm.
  1871. if (!Subtarget.hasTBM() &&
  1872. !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
  1873. return true;
  1874. // We need to ensure that mask is a continuous run of bits.
  1875. if (!isShiftedMask_64(Mask)) return true;
  1876. unsigned ShiftAmt = Shift.getConstantOperandVal(1);
  1877. // The amount of shift we're trying to fit into the addressing mode is taken
  1878. // from the trailing zeros of the mask.
  1879. unsigned AMShiftAmt = countTrailingZeros(Mask);
  1880. // There is nothing we can do here unless the mask is removing some bits.
  1881. // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
  1882. if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
  1883. MVT VT = N.getSimpleValueType();
  1884. SDLoc DL(N);
  1885. SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
  1886. SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
  1887. SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT);
  1888. SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask);
  1889. SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
  1890. SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt);
  1891. // Insert the new nodes into the topological ordering. We must do this in
  1892. // a valid topological ordering as nothing is going to go back and re-sort
  1893. // these nodes. We continually insert before 'N' in sequence as this is
  1894. // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
  1895. // hierarchy left to express.
  1896. insertDAGNode(DAG, N, NewSRLAmt);
  1897. insertDAGNode(DAG, N, NewSRL);
  1898. insertDAGNode(DAG, N, NewMask);
  1899. insertDAGNode(DAG, N, NewAnd);
  1900. insertDAGNode(DAG, N, NewSHLAmt);
  1901. insertDAGNode(DAG, N, NewSHL);
  1902. DAG.ReplaceAllUsesWith(N, NewSHL);
  1903. DAG.RemoveDeadNode(N.getNode());
  1904. AM.Scale = 1 << AMShiftAmt;
  1905. AM.IndexReg = NewAnd;
  1906. return false;
  1907. }
  1908. bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
  1909. unsigned Depth) {
  1910. SDLoc dl(N);
  1911. LLVM_DEBUG({
  1912. dbgs() << "MatchAddress: ";
  1913. AM.dump(CurDAG);
  1914. });
  1915. // Limit recursion.
  1916. if (Depth > 5)
  1917. return matchAddressBase(N, AM);
  1918. // If this is already a %rip relative address, we can only merge immediates
  1919. // into it. Instead of handling this in every case, we handle it here.
  1920. // RIP relative addressing: %rip + 32-bit displacement!
  1921. if (AM.isRIPRelative()) {
  1922. // FIXME: JumpTable and ExternalSymbol address currently don't like
  1923. // displacements. It isn't very important, but this should be fixed for
  1924. // consistency.
  1925. if (!(AM.ES || AM.MCSym) && AM.JT != -1)
  1926. return true;
  1927. if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
  1928. if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
  1929. return false;
  1930. return true;
  1931. }
  1932. switch (N.getOpcode()) {
  1933. default: break;
  1934. case ISD::LOCAL_RECOVER: {
  1935. if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
  1936. if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
  1937. // Use the symbol and don't prefix it.
  1938. AM.MCSym = ESNode->getMCSymbol();
  1939. return false;
  1940. }
  1941. break;
  1942. }
  1943. case ISD::Constant: {
  1944. uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
  1945. if (!foldOffsetIntoAddress(Val, AM))
  1946. return false;
  1947. break;
  1948. }
  1949. case X86ISD::Wrapper:
  1950. case X86ISD::WrapperRIP:
  1951. if (!matchWrapper(N, AM))
  1952. return false;
  1953. break;
  1954. case ISD::LOAD:
  1955. if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
  1956. return false;
  1957. break;
  1958. case ISD::FrameIndex:
  1959. if (AM.BaseType == X86ISelAddressMode::RegBase &&
  1960. AM.Base_Reg.getNode() == nullptr &&
  1961. (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
  1962. AM.BaseType = X86ISelAddressMode::FrameIndexBase;
  1963. AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
  1964. return false;
  1965. }
  1966. break;
  1967. case ISD::SHL:
  1968. if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
  1969. break;
  1970. if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
  1971. unsigned Val = CN->getZExtValue();
  1972. // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
  1973. // that the base operand remains free for further matching. If
  1974. // the base doesn't end up getting used, a post-processing step
  1975. // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
  1976. if (Val == 1 || Val == 2 || Val == 3) {
  1977. AM.Scale = 1 << Val;
  1978. SDValue ShVal = N.getOperand(0);
  1979. // Okay, we know that we have a scale by now. However, if the scaled
  1980. // value is an add of something and a constant, we can fold the
  1981. // constant into the disp field here.
  1982. if (CurDAG->isBaseWithConstantOffset(ShVal)) {
  1983. AM.IndexReg = ShVal.getOperand(0);
  1984. ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1));
  1985. uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
  1986. if (!foldOffsetIntoAddress(Disp, AM))
  1987. return false;
  1988. }
  1989. AM.IndexReg = ShVal;
  1990. return false;
  1991. }
  1992. }
  1993. break;
  1994. case ISD::SRL: {
  1995. // Scale must not be used already.
  1996. if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
  1997. // We only handle up to 64-bit values here as those are what matter for
  1998. // addressing mode optimizations.
  1999. assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
  2000. "Unexpected value size!");
  2001. SDValue And = N.getOperand(0);
  2002. if (And.getOpcode() != ISD::AND) break;
  2003. SDValue X = And.getOperand(0);
  2004. // The mask used for the transform is expected to be post-shift, but we
  2005. // found the shift first so just apply the shift to the mask before passing
  2006. // it down.
  2007. if (!isa<ConstantSDNode>(N.getOperand(1)) ||
  2008. !isa<ConstantSDNode>(And.getOperand(1)))
  2009. break;
  2010. uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
  2011. // Try to fold the mask and shift into the scale, and return false if we
  2012. // succeed.
  2013. if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
  2014. return false;
  2015. break;
  2016. }
  2017. case ISD::SMUL_LOHI:
  2018. case ISD::UMUL_LOHI:
  2019. // A mul_lohi where we need the low part can be folded as a plain multiply.
  2020. if (N.getResNo() != 0) break;
  2021. LLVM_FALLTHROUGH;
  2022. case ISD::MUL:
  2023. case X86ISD::MUL_IMM:
  2024. // X*[3,5,9] -> X+X*[2,4,8]
  2025. if (AM.BaseType == X86ISelAddressMode::RegBase &&
  2026. AM.Base_Reg.getNode() == nullptr &&
  2027. AM.IndexReg.getNode() == nullptr) {
  2028. if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1)))
  2029. if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
  2030. CN->getZExtValue() == 9) {
  2031. AM.Scale = unsigned(CN->getZExtValue())-1;
  2032. SDValue MulVal = N.getOperand(0);
  2033. SDValue Reg;
  2034. // Okay, we know that we have a scale by now. However, if the scaled
  2035. // value is an add of something and a constant, we can fold the
  2036. // constant into the disp field here.
  2037. if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
  2038. isa<ConstantSDNode>(MulVal.getOperand(1))) {
  2039. Reg = MulVal.getOperand(0);
  2040. ConstantSDNode *AddVal =
  2041. cast<ConstantSDNode>(MulVal.getOperand(1));
  2042. uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
  2043. if (foldOffsetIntoAddress(Disp, AM))
  2044. Reg = N.getOperand(0);
  2045. } else {
  2046. Reg = N.getOperand(0);
  2047. }
  2048. AM.IndexReg = AM.Base_Reg = Reg;
  2049. return false;
  2050. }
  2051. }
  2052. break;
  2053. case ISD::SUB: {
  2054. // Given A-B, if A can be completely folded into the address and
  2055. // the index field with the index field unused, use -B as the index.
  2056. // This is a win if a has multiple parts that can be folded into
  2057. // the address. Also, this saves a mov if the base register has
  2058. // other uses, since it avoids a two-address sub instruction, however
  2059. // it costs an additional mov if the index register has other uses.
  2060. // Add an artificial use to this node so that we can keep track of
  2061. // it if it gets CSE'd with a different node.
  2062. HandleSDNode Handle(N);
  2063. // Test if the LHS of the sub can be folded.
  2064. X86ISelAddressMode Backup = AM;
  2065. if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) {
  2066. N = Handle.getValue();
  2067. AM = Backup;
  2068. break;
  2069. }
  2070. N = Handle.getValue();
  2071. // Test if the index field is free for use.
  2072. if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
  2073. AM = Backup;
  2074. break;
  2075. }
  2076. int Cost = 0;
  2077. SDValue RHS = N.getOperand(1);
  2078. // If the RHS involves a register with multiple uses, this
  2079. // transformation incurs an extra mov, due to the neg instruction
  2080. // clobbering its operand.
  2081. if (!RHS.getNode()->hasOneUse() ||
  2082. RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
  2083. RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
  2084. RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
  2085. (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
  2086. RHS.getOperand(0).getValueType() == MVT::i32))
  2087. ++Cost;
  2088. // If the base is a register with multiple uses, this
  2089. // transformation may save a mov.
  2090. if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() &&
  2091. !AM.Base_Reg.getNode()->hasOneUse()) ||
  2092. AM.BaseType == X86ISelAddressMode::FrameIndexBase)
  2093. --Cost;
  2094. // If the folded LHS was interesting, this transformation saves
  2095. // address arithmetic.
  2096. if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
  2097. ((AM.Disp != 0) && (Backup.Disp == 0)) +
  2098. (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
  2099. --Cost;
  2100. // If it doesn't look like it may be an overall win, don't do it.
  2101. if (Cost >= 0) {
  2102. AM = Backup;
  2103. break;
  2104. }
  2105. // Ok, the transformation is legal and appears profitable. Go for it.
  2106. // Negation will be emitted later to avoid creating dangling nodes if this
  2107. // was an unprofitable LEA.
  2108. AM.IndexReg = RHS;
  2109. AM.NegateIndex = true;
  2110. AM.Scale = 1;
  2111. return false;
  2112. }
  2113. case ISD::ADD:
  2114. if (!matchAdd(N, AM, Depth))
  2115. return false;
  2116. break;
  2117. case ISD::OR:
  2118. // We want to look through a transform in InstCombine and DAGCombiner that
  2119. // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
  2120. // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
  2121. // An 'lea' can then be used to match the shift (multiply) and add:
  2122. // and $1, %esi
  2123. // lea (%rsi, %rdi, 8), %rax
  2124. if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
  2125. !matchAdd(N, AM, Depth))
  2126. return false;
  2127. break;
  2128. case ISD::AND: {
  2129. // Perform some heroic transforms on an and of a constant-count shift
  2130. // with a constant to enable use of the scaled offset field.
  2131. // Scale must not be used already.
  2132. if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
  2133. // We only handle up to 64-bit values here as those are what matter for
  2134. // addressing mode optimizations.
  2135. assert(N.getSimpleValueType().getSizeInBits() <= 64 &&
  2136. "Unexpected value size!");
  2137. if (!isa<ConstantSDNode>(N.getOperand(1)))
  2138. break;
  2139. if (N.getOperand(0).getOpcode() == ISD::SRL) {
  2140. SDValue Shift = N.getOperand(0);
  2141. SDValue X = Shift.getOperand(0);
  2142. uint64_t Mask = N.getConstantOperandVal(1);
  2143. // Try to fold the mask and shift into an extract and scale.
  2144. if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
  2145. return false;
  2146. // Try to fold the mask and shift directly into the scale.
  2147. if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
  2148. return false;
  2149. // Try to fold the mask and shift into BEXTR and scale.
  2150. if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget))
  2151. return false;
  2152. }
  2153. // Try to swap the mask and shift to place shifts which can be done as
  2154. // a scale on the outside of the mask.
  2155. if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM))
  2156. return false;
  2157. break;
  2158. }
  2159. case ISD::ZERO_EXTEND: {
  2160. // Try to widen a zexted shift left to the same size as its use, so we can
  2161. // match the shift as a scale factor.
  2162. if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
  2163. break;
  2164. if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
  2165. break;
  2166. // Give up if the shift is not a valid scale factor [1,2,3].
  2167. SDValue Shl = N.getOperand(0);
  2168. auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
  2169. if (!ShAmtC || ShAmtC->getZExtValue() > 3)
  2170. break;
  2171. // The narrow shift must only shift out zero bits (it must be 'nuw').
  2172. // That makes it safe to widen to the destination type.
  2173. APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(),
  2174. ShAmtC->getZExtValue());
  2175. if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
  2176. break;
  2177. // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
  2178. MVT VT = N.getSimpleValueType();
  2179. SDLoc DL(N);
  2180. SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
  2181. SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
  2182. // Convert the shift to scale factor.
  2183. AM.Scale = 1 << ShAmtC->getZExtValue();
  2184. AM.IndexReg = Zext;
  2185. insertDAGNode(*CurDAG, N, Zext);
  2186. insertDAGNode(*CurDAG, N, NewShl);
  2187. CurDAG->ReplaceAllUsesWith(N, NewShl);
  2188. CurDAG->RemoveDeadNode(N.getNode());
  2189. return false;
  2190. }
  2191. }
  2192. return matchAddressBase(N, AM);
  2193. }
  2194. /// Helper for MatchAddress. Add the specified node to the
  2195. /// specified addressing mode without any further recursion.
  2196. bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
  2197. // Is the base register already occupied?
  2198. if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
  2199. // If so, check to see if the scale index register is set.
  2200. if (!AM.IndexReg.getNode()) {
  2201. AM.IndexReg = N;
  2202. AM.Scale = 1;
  2203. return false;
  2204. }
  2205. // Otherwise, we cannot select it.
  2206. return true;
  2207. }
  2208. // Default, generate it as a register.
  2209. AM.BaseType = X86ISelAddressMode::RegBase;
  2210. AM.Base_Reg = N;
  2211. return false;
  2212. }
  2213. bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
  2214. X86ISelAddressMode &AM,
  2215. unsigned Depth) {
  2216. SDLoc dl(N);
  2217. LLVM_DEBUG({
  2218. dbgs() << "MatchVectorAddress: ";
  2219. AM.dump(CurDAG);
  2220. });
  2221. // Limit recursion.
  2222. if (Depth > 5)
  2223. return matchAddressBase(N, AM);
  2224. // TODO: Support other operations.
  2225. switch (N.getOpcode()) {
  2226. case ISD::Constant: {
  2227. uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
  2228. if (!foldOffsetIntoAddress(Val, AM))
  2229. return false;
  2230. break;
  2231. }
  2232. case X86ISD::Wrapper:
  2233. if (!matchWrapper(N, AM))
  2234. return false;
  2235. break;
  2236. case ISD::ADD: {
  2237. // Add an artificial use to this node so that we can keep track of
  2238. // it if it gets CSE'd with a different node.
  2239. HandleSDNode Handle(N);
  2240. X86ISelAddressMode Backup = AM;
  2241. if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) &&
  2242. !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
  2243. Depth + 1))
  2244. return false;
  2245. AM = Backup;
  2246. // Try again after commuting the operands.
  2247. if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
  2248. Depth + 1) &&
  2249. !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
  2250. Depth + 1))
  2251. return false;
  2252. AM = Backup;
  2253. N = Handle.getValue();
  2254. break;
  2255. }
  2256. }
  2257. return matchAddressBase(N, AM);
  2258. }
  2259. /// Helper for selectVectorAddr. Handles things that can be folded into a
  2260. /// gather/scatter address. The index register and scale should have already
  2261. /// been handled.
  2262. bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
  2263. return matchVectorAddressRecursively(N, AM, 0);
  2264. }
  2265. bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
  2266. SDValue IndexOp, SDValue ScaleOp,
  2267. SDValue &Base, SDValue &Scale,
  2268. SDValue &Index, SDValue &Disp,
  2269. SDValue &Segment) {
  2270. X86ISelAddressMode AM;
  2271. AM.IndexReg = IndexOp;
  2272. AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
  2273. unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace();
  2274. if (AddrSpace == X86AS::GS)
  2275. AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
  2276. if (AddrSpace == X86AS::FS)
  2277. AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
  2278. if (AddrSpace == X86AS::SS)
  2279. AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
  2280. SDLoc DL(BasePtr);
  2281. MVT VT = BasePtr.getSimpleValueType();
  2282. // Try to match into the base and displacement fields.
  2283. if (matchVectorAddress(BasePtr, AM))
  2284. return false;
  2285. getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
  2286. return true;
  2287. }
  2288. /// Returns true if it is able to pattern match an addressing mode.
  2289. /// It returns the operands which make up the maximal addressing mode it can
  2290. /// match by reference.
  2291. ///
  2292. /// Parent is the parent node of the addr operand that is being matched. It
  2293. /// is always a load, store, atomic node, or null. It is only null when
  2294. /// checking memory operands for inline asm nodes.
  2295. bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
  2296. SDValue &Scale, SDValue &Index,
  2297. SDValue &Disp, SDValue &Segment) {
  2298. X86ISelAddressMode AM;
  2299. if (Parent &&
  2300. // This list of opcodes are all the nodes that have an "addr:$ptr" operand
  2301. // that are not a MemSDNode, and thus don't have proper addrspace info.
  2302. Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
  2303. Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
  2304. Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
  2305. Parent->getOpcode() != X86ISD::ENQCMD && // Fixme
  2306. Parent->getOpcode() != X86ISD::ENQCMDS && // Fixme
  2307. Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
  2308. Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
  2309. unsigned AddrSpace =
  2310. cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
  2311. if (AddrSpace == X86AS::GS)
  2312. AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
  2313. if (AddrSpace == X86AS::FS)
  2314. AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
  2315. if (AddrSpace == X86AS::SS)
  2316. AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
  2317. }
  2318. // Save the DL and VT before calling matchAddress, it can invalidate N.
  2319. SDLoc DL(N);
  2320. MVT VT = N.getSimpleValueType();
  2321. if (matchAddress(N, AM))
  2322. return false;
  2323. getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
  2324. return true;
  2325. }
  2326. bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
  2327. // In static codegen with small code model, we can get the address of a label
  2328. // into a register with 'movl'
  2329. if (N->getOpcode() != X86ISD::Wrapper)
  2330. return false;
  2331. N = N.getOperand(0);
  2332. // At least GNU as does not accept 'movl' for TPOFF relocations.
  2333. // FIXME: We could use 'movl' when we know we are targeting MC.
  2334. if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
  2335. return false;
  2336. Imm = N;
  2337. if (N->getOpcode() != ISD::TargetGlobalAddress)
  2338. return TM.getCodeModel() == CodeModel::Small;
  2339. Optional<ConstantRange> CR =
  2340. cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
  2341. if (!CR)
  2342. return TM.getCodeModel() == CodeModel::Small;
  2343. return CR->getUnsignedMax().ult(1ull << 32);
  2344. }
  2345. bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
  2346. SDValue &Scale, SDValue &Index,
  2347. SDValue &Disp, SDValue &Segment) {
  2348. // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
  2349. SDLoc DL(N);
  2350. if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
  2351. return false;
  2352. RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
  2353. if (RN && RN->getReg() == 0)
  2354. Base = CurDAG->getRegister(0, MVT::i64);
  2355. else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) {
  2356. // Base could already be %rip, particularly in the x32 ABI.
  2357. SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
  2358. MVT::i64), 0);
  2359. Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
  2360. Base);
  2361. }
  2362. RN = dyn_cast<RegisterSDNode>(Index);
  2363. if (RN && RN->getReg() == 0)
  2364. Index = CurDAG->getRegister(0, MVT::i64);
  2365. else {
  2366. assert(Index.getValueType() == MVT::i32 &&
  2367. "Expect to be extending 32-bit registers for use in LEA");
  2368. SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL,
  2369. MVT::i64), 0);
  2370. Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef,
  2371. Index);
  2372. }
  2373. return true;
  2374. }
  2375. /// Calls SelectAddr and determines if the maximal addressing
  2376. /// mode it matches can be cost effectively emitted as an LEA instruction.
  2377. bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
  2378. SDValue &Base, SDValue &Scale,
  2379. SDValue &Index, SDValue &Disp,
  2380. SDValue &Segment) {
  2381. X86ISelAddressMode AM;
  2382. // Save the DL and VT before calling matchAddress, it can invalidate N.
  2383. SDLoc DL(N);
  2384. MVT VT = N.getSimpleValueType();
  2385. // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
  2386. // segments.
  2387. SDValue Copy = AM.Segment;
  2388. SDValue T = CurDAG->getRegister(0, MVT::i32);
  2389. AM.Segment = T;
  2390. if (matchAddress(N, AM))
  2391. return false;
  2392. assert (T == AM.Segment);
  2393. AM.Segment = Copy;
  2394. unsigned Complexity = 0;
  2395. if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode())
  2396. Complexity = 1;
  2397. else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
  2398. Complexity = 4;
  2399. if (AM.IndexReg.getNode())
  2400. Complexity++;
  2401. // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
  2402. // a simple shift.
  2403. if (AM.Scale > 1)
  2404. Complexity++;
  2405. // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
  2406. // to a LEA. This is determined with some experimentation but is by no means
  2407. // optimal (especially for code size consideration). LEA is nice because of
  2408. // its three-address nature. Tweak the cost function again when we can run
  2409. // convertToThreeAddress() at register allocation time.
  2410. if (AM.hasSymbolicDisplacement()) {
  2411. // For X86-64, always use LEA to materialize RIP-relative addresses.
  2412. if (Subtarget->is64Bit())
  2413. Complexity = 4;
  2414. else
  2415. Complexity += 2;
  2416. }
  2417. // Heuristic: try harder to form an LEA from ADD if the operands set flags.
  2418. // Unlike ADD, LEA does not affect flags, so we will be less likely to require
  2419. // duplicating flag-producing instructions later in the pipeline.
  2420. if (N.getOpcode() == ISD::ADD) {
  2421. auto isMathWithFlags = [](SDValue V) {
  2422. switch (V.getOpcode()) {
  2423. case X86ISD::ADD:
  2424. case X86ISD::SUB:
  2425. case X86ISD::ADC:
  2426. case X86ISD::SBB:
  2427. /* TODO: These opcodes can be added safely, but we may want to justify
  2428. their inclusion for different reasons (better for reg-alloc).
  2429. case X86ISD::SMUL:
  2430. case X86ISD::UMUL:
  2431. case X86ISD::OR:
  2432. case X86ISD::XOR:
  2433. case X86ISD::AND:
  2434. */
  2435. // Value 1 is the flag output of the node - verify it's not dead.
  2436. return !SDValue(V.getNode(), 1).use_empty();
  2437. default:
  2438. return false;
  2439. }
  2440. };
  2441. // TODO: This could be an 'or' rather than 'and' to make the transform more
  2442. // likely to happen. We might want to factor in whether there's a
  2443. // load folding opportunity for the math op that disappears with LEA.
  2444. if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
  2445. Complexity++;
  2446. }
  2447. if (AM.Disp)
  2448. Complexity++;
  2449. // If it isn't worth using an LEA, reject it.
  2450. if (Complexity <= 2)
  2451. return false;
  2452. getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment);
  2453. return true;
  2454. }
  2455. /// This is only run on TargetGlobalTLSAddress nodes.
  2456. bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
  2457. SDValue &Scale, SDValue &Index,
  2458. SDValue &Disp, SDValue &Segment) {
  2459. assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
  2460. const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
  2461. X86ISelAddressMode AM;
  2462. AM.GV = GA->getGlobal();
  2463. AM.Disp += GA->getOffset();
  2464. AM.SymbolFlags = GA->getTargetFlags();
  2465. if (Subtarget->is32Bit()) {
  2466. AM.Scale = 1;
  2467. AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
  2468. }
  2469. MVT VT = N.getSimpleValueType();
  2470. getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment);
  2471. return true;
  2472. }
  2473. bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
  2474. // Keep track of the original value type and whether this value was
  2475. // truncated. If we see a truncation from pointer type to VT that truncates
  2476. // bits that are known to be zero, we can use a narrow reference.
  2477. EVT VT = N.getValueType();
  2478. bool WasTruncated = false;
  2479. if (N.getOpcode() == ISD::TRUNCATE) {
  2480. WasTruncated = true;
  2481. N = N.getOperand(0);
  2482. }
  2483. if (N.getOpcode() != X86ISD::Wrapper)
  2484. return false;
  2485. // We can only use non-GlobalValues as immediates if they were not truncated,
  2486. // as we do not have any range information. If we have a GlobalValue and the
  2487. // address was not truncated, we can select it as an operand directly.
  2488. unsigned Opc = N.getOperand(0)->getOpcode();
  2489. if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
  2490. Op = N.getOperand(0);
  2491. // We can only select the operand directly if we didn't have to look past a
  2492. // truncate.
  2493. return !WasTruncated;
  2494. }
  2495. // Check that the global's range fits into VT.
  2496. auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
  2497. Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
  2498. if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
  2499. return false;
  2500. // Okay, we can use a narrow reference.
  2501. Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
  2502. GA->getOffset(), GA->getTargetFlags());
  2503. return true;
  2504. }
  2505. bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N,
  2506. SDValue &Base, SDValue &Scale,
  2507. SDValue &Index, SDValue &Disp,
  2508. SDValue &Segment) {
  2509. assert(Root && P && "Unknown root/parent nodes");
  2510. if (!ISD::isNON_EXTLoad(N.getNode()) ||
  2511. !IsProfitableToFold(N, P, Root) ||
  2512. !IsLegalToFold(N, P, Root, OptLevel))
  2513. return false;
  2514. return selectAddr(N.getNode(),
  2515. N.getOperand(1), Base, Scale, Index, Disp, Segment);
  2516. }
  2517. bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N,
  2518. SDValue &Base, SDValue &Scale,
  2519. SDValue &Index, SDValue &Disp,
  2520. SDValue &Segment) {
  2521. assert(Root && P && "Unknown root/parent nodes");
  2522. if (N->getOpcode() != X86ISD::VBROADCAST_LOAD ||
  2523. !IsProfitableToFold(N, P, Root) ||
  2524. !IsLegalToFold(N, P, Root, OptLevel))
  2525. return false;
  2526. return selectAddr(N.getNode(),
  2527. N.getOperand(1), Base, Scale, Index, Disp, Segment);
  2528. }
  2529. /// Return an SDNode that returns the value of the global base register.
  2530. /// Output instructions required to initialize the global base register,
  2531. /// if necessary.
  2532. SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
  2533. unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
  2534. auto &DL = MF->getDataLayout();
  2535. return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
  2536. }
  2537. bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
  2538. if (N->getOpcode() == ISD::TRUNCATE)
  2539. N = N->getOperand(0).getNode();
  2540. if (N->getOpcode() != X86ISD::Wrapper)
  2541. return false;
  2542. auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
  2543. if (!GA)
  2544. return false;
  2545. Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
  2546. if (!CR)
  2547. return Width == 32 && TM.getCodeModel() == CodeModel::Small;
  2548. return CR->getSignedMin().sge(-1ull << Width) &&
  2549. CR->getSignedMax().slt(1ull << Width);
  2550. }
  2551. static X86::CondCode getCondFromNode(SDNode *N) {
  2552. assert(N->isMachineOpcode() && "Unexpected node");
  2553. X86::CondCode CC = X86::COND_INVALID;
  2554. unsigned Opc = N->getMachineOpcode();
  2555. if (Opc == X86::JCC_1)
  2556. CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1));
  2557. else if (Opc == X86::SETCCr)
  2558. CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0));
  2559. else if (Opc == X86::SETCCm)
  2560. CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5));
  2561. else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr ||
  2562. Opc == X86::CMOV64rr)
  2563. CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2));
  2564. else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm ||
  2565. Opc == X86::CMOV64rm)
  2566. CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6));
  2567. return CC;
  2568. }
  2569. /// Test whether the given X86ISD::CMP node has any users that use a flag
  2570. /// other than ZF.
  2571. bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const {
  2572. // Examine each user of the node.
  2573. for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
  2574. UI != UE; ++UI) {
  2575. // Only check things that use the flags.
  2576. if (UI.getUse().getResNo() != Flags.getResNo())
  2577. continue;
  2578. // Only examine CopyToReg uses that copy to EFLAGS.
  2579. if (UI->getOpcode() != ISD::CopyToReg ||
  2580. cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
  2581. return false;
  2582. // Examine each user of the CopyToReg use.
  2583. for (SDNode::use_iterator FlagUI = UI->use_begin(),
  2584. FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
  2585. // Only examine the Flag result.
  2586. if (FlagUI.getUse().getResNo() != 1) continue;
  2587. // Anything unusual: assume conservatively.
  2588. if (!FlagUI->isMachineOpcode()) return false;
  2589. // Examine the condition code of the user.
  2590. X86::CondCode CC = getCondFromNode(*FlagUI);
  2591. switch (CC) {
  2592. // Comparisons which only use the zero flag.
  2593. case X86::COND_E: case X86::COND_NE:
  2594. continue;
  2595. // Anything else: assume conservatively.
  2596. default:
  2597. return false;
  2598. }
  2599. }
  2600. }
  2601. return true;
  2602. }
  2603. /// Test whether the given X86ISD::CMP node has any uses which require the SF
  2604. /// flag to be accurate.
  2605. bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
  2606. // Examine each user of the node.
  2607. for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
  2608. UI != UE; ++UI) {
  2609. // Only check things that use the flags.
  2610. if (UI.getUse().getResNo() != Flags.getResNo())
  2611. continue;
  2612. // Only examine CopyToReg uses that copy to EFLAGS.
  2613. if (UI->getOpcode() != ISD::CopyToReg ||
  2614. cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
  2615. return false;
  2616. // Examine each user of the CopyToReg use.
  2617. for (SDNode::use_iterator FlagUI = UI->use_begin(),
  2618. FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
  2619. // Only examine the Flag result.
  2620. if (FlagUI.getUse().getResNo() != 1) continue;
  2621. // Anything unusual: assume conservatively.
  2622. if (!FlagUI->isMachineOpcode()) return false;
  2623. // Examine the condition code of the user.
  2624. X86::CondCode CC = getCondFromNode(*FlagUI);
  2625. switch (CC) {
  2626. // Comparisons which don't examine the SF flag.
  2627. case X86::COND_A: case X86::COND_AE:
  2628. case X86::COND_B: case X86::COND_BE:
  2629. case X86::COND_E: case X86::COND_NE:
  2630. case X86::COND_O: case X86::COND_NO:
  2631. case X86::COND_P: case X86::COND_NP:
  2632. continue;
  2633. // Anything else: assume conservatively.
  2634. default:
  2635. return false;
  2636. }
  2637. }
  2638. }
  2639. return true;
  2640. }
  2641. static bool mayUseCarryFlag(X86::CondCode CC) {
  2642. switch (CC) {
  2643. // Comparisons which don't examine the CF flag.
  2644. case X86::COND_O: case X86::COND_NO:
  2645. case X86::COND_E: case X86::COND_NE:
  2646. case X86::COND_S: case X86::COND_NS:
  2647. case X86::COND_P: case X86::COND_NP:
  2648. case X86::COND_L: case X86::COND_GE:
  2649. case X86::COND_G: case X86::COND_LE:
  2650. return false;
  2651. // Anything else: assume conservatively.
  2652. default:
  2653. return true;
  2654. }
  2655. }
  2656. /// Test whether the given node which sets flags has any uses which require the
  2657. /// CF flag to be accurate.
  2658. bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
  2659. // Examine each user of the node.
  2660. for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
  2661. UI != UE; ++UI) {
  2662. // Only check things that use the flags.
  2663. if (UI.getUse().getResNo() != Flags.getResNo())
  2664. continue;
  2665. unsigned UIOpc = UI->getOpcode();
  2666. if (UIOpc == ISD::CopyToReg) {
  2667. // Only examine CopyToReg uses that copy to EFLAGS.
  2668. if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
  2669. return false;
  2670. // Examine each user of the CopyToReg use.
  2671. for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
  2672. FlagUI != FlagUE; ++FlagUI) {
  2673. // Only examine the Flag result.
  2674. if (FlagUI.getUse().getResNo() != 1)
  2675. continue;
  2676. // Anything unusual: assume conservatively.
  2677. if (!FlagUI->isMachineOpcode())
  2678. return false;
  2679. // Examine the condition code of the user.
  2680. X86::CondCode CC = getCondFromNode(*FlagUI);
  2681. if (mayUseCarryFlag(CC))
  2682. return false;
  2683. }
  2684. // This CopyToReg is ok. Move on to the next user.
  2685. continue;
  2686. }
  2687. // This might be an unselected node. So look for the pre-isel opcodes that
  2688. // use flags.
  2689. unsigned CCOpNo;
  2690. switch (UIOpc) {
  2691. default:
  2692. // Something unusual. Be conservative.
  2693. return false;
  2694. case X86ISD::SETCC: CCOpNo = 0; break;
  2695. case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
  2696. case X86ISD::CMOV: CCOpNo = 2; break;
  2697. case X86ISD::BRCOND: CCOpNo = 2; break;
  2698. }
  2699. X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
  2700. if (mayUseCarryFlag(CC))
  2701. return false;
  2702. }
  2703. return true;
  2704. }
  2705. /// Check whether or not the chain ending in StoreNode is suitable for doing
  2706. /// the {load; op; store} to modify transformation.
  2707. static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
  2708. SDValue StoredVal, SelectionDAG *CurDAG,
  2709. unsigned LoadOpNo,
  2710. LoadSDNode *&LoadNode,
  2711. SDValue &InputChain) {
  2712. // Is the stored value result 0 of the operation?
  2713. if (StoredVal.getResNo() != 0) return false;
  2714. // Are there other uses of the operation other than the store?
  2715. if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
  2716. // Is the store non-extending and non-indexed?
  2717. if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
  2718. return false;
  2719. SDValue Load = StoredVal->getOperand(LoadOpNo);
  2720. // Is the stored value a non-extending and non-indexed load?
  2721. if (!ISD::isNormalLoad(Load.getNode())) return false;
  2722. // Return LoadNode by reference.
  2723. LoadNode = cast<LoadSDNode>(Load);
  2724. // Is store the only read of the loaded value?
  2725. if (!Load.hasOneUse())
  2726. return false;
  2727. // Is the address of the store the same as the load?
  2728. if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
  2729. LoadNode->getOffset() != StoreNode->getOffset())
  2730. return false;
  2731. bool FoundLoad = false;
  2732. SmallVector<SDValue, 4> ChainOps;
  2733. SmallVector<const SDNode *, 4> LoopWorklist;
  2734. SmallPtrSet<const SDNode *, 16> Visited;
  2735. const unsigned int Max = 1024;
  2736. // Visualization of Load-Op-Store fusion:
  2737. // -------------------------
  2738. // Legend:
  2739. // *-lines = Chain operand dependencies.
  2740. // |-lines = Normal operand dependencies.
  2741. // Dependencies flow down and right. n-suffix references multiple nodes.
  2742. //
  2743. // C Xn C
  2744. // * * *
  2745. // * * *
  2746. // Xn A-LD Yn TF Yn
  2747. // * * \ | * |
  2748. // * * \ | * |
  2749. // * * \ | => A--LD_OP_ST
  2750. // * * \| \
  2751. // TF OP \
  2752. // * | \ Zn
  2753. // * | \
  2754. // A-ST Zn
  2755. //
  2756. // This merge induced dependences from: #1: Xn -> LD, OP, Zn
  2757. // #2: Yn -> LD
  2758. // #3: ST -> Zn
  2759. // Ensure the transform is safe by checking for the dual
  2760. // dependencies to make sure we do not induce a loop.
  2761. // As LD is a predecessor to both OP and ST we can do this by checking:
  2762. // a). if LD is a predecessor to a member of Xn or Yn.
  2763. // b). if a Zn is a predecessor to ST.
  2764. // However, (b) can only occur through being a chain predecessor to
  2765. // ST, which is the same as Zn being a member or predecessor of Xn,
  2766. // which is a subset of LD being a predecessor of Xn. So it's
  2767. // subsumed by check (a).
  2768. SDValue Chain = StoreNode->getChain();
  2769. // Gather X elements in ChainOps.
  2770. if (Chain == Load.getValue(1)) {
  2771. FoundLoad = true;
  2772. ChainOps.push_back(Load.getOperand(0));
  2773. } else if (Chain.getOpcode() == ISD::TokenFactor) {
  2774. for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
  2775. SDValue Op = Chain.getOperand(i);
  2776. if (Op == Load.getValue(1)) {
  2777. FoundLoad = true;
  2778. // Drop Load, but keep its chain. No cycle check necessary.
  2779. ChainOps.push_back(Load.getOperand(0));
  2780. continue;
  2781. }
  2782. LoopWorklist.push_back(Op.getNode());
  2783. ChainOps.push_back(Op);
  2784. }
  2785. }
  2786. if (!FoundLoad)
  2787. return false;
  2788. // Worklist is currently Xn. Add Yn to worklist.
  2789. for (SDValue Op : StoredVal->ops())
  2790. if (Op.getNode() != LoadNode)
  2791. LoopWorklist.push_back(Op.getNode());
  2792. // Check (a) if Load is a predecessor to Xn + Yn
  2793. if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max,
  2794. true))
  2795. return false;
  2796. InputChain =
  2797. CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps);
  2798. return true;
  2799. }
  2800. // Change a chain of {load; op; store} of the same value into a simple op
  2801. // through memory of that value, if the uses of the modified value and its
  2802. // address are suitable.
  2803. //
  2804. // The tablegen pattern memory operand pattern is currently not able to match
  2805. // the case where the EFLAGS on the original operation are used.
  2806. //
  2807. // To move this to tablegen, we'll need to improve tablegen to allow flags to
  2808. // be transferred from a node in the pattern to the result node, probably with
  2809. // a new keyword. For example, we have this
  2810. // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
  2811. // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
  2812. // (implicit EFLAGS)]>;
  2813. // but maybe need something like this
  2814. // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
  2815. // [(store (add (loadi64 addr:$dst), -1), addr:$dst),
  2816. // (transferrable EFLAGS)]>;
  2817. //
  2818. // Until then, we manually fold these and instruction select the operation
  2819. // here.
  2820. bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
  2821. StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
  2822. SDValue StoredVal = StoreNode->getOperand(1);
  2823. unsigned Opc = StoredVal->getOpcode();
  2824. // Before we try to select anything, make sure this is memory operand size
  2825. // and opcode we can handle. Note that this must match the code below that
  2826. // actually lowers the opcodes.
  2827. EVT MemVT = StoreNode->getMemoryVT();
  2828. if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 &&
  2829. MemVT != MVT::i8)
  2830. return false;
  2831. bool IsCommutable = false;
  2832. bool IsNegate = false;
  2833. switch (Opc) {
  2834. default:
  2835. return false;
  2836. case X86ISD::SUB:
  2837. IsNegate = isNullConstant(StoredVal.getOperand(0));
  2838. break;
  2839. case X86ISD::SBB:
  2840. break;
  2841. case X86ISD::ADD:
  2842. case X86ISD::ADC:
  2843. case X86ISD::AND:
  2844. case X86ISD::OR:
  2845. case X86ISD::XOR:
  2846. IsCommutable = true;
  2847. break;
  2848. }
  2849. unsigned LoadOpNo = IsNegate ? 1 : 0;
  2850. LoadSDNode *LoadNode = nullptr;
  2851. SDValue InputChain;
  2852. if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
  2853. LoadNode, InputChain)) {
  2854. if (!IsCommutable)
  2855. return false;
  2856. // This operation is commutable, try the other operand.
  2857. LoadOpNo = 1;
  2858. if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo,
  2859. LoadNode, InputChain))
  2860. return false;
  2861. }
  2862. SDValue Base, Scale, Index, Disp, Segment;
  2863. if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp,
  2864. Segment))
  2865. return false;
  2866. auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
  2867. unsigned Opc8) {
  2868. switch (MemVT.getSimpleVT().SimpleTy) {
  2869. case MVT::i64:
  2870. return Opc64;
  2871. case MVT::i32:
  2872. return Opc32;
  2873. case MVT::i16:
  2874. return Opc16;
  2875. case MVT::i8:
  2876. return Opc8;
  2877. default:
  2878. llvm_unreachable("Invalid size!");
  2879. }
  2880. };
  2881. MachineSDNode *Result;
  2882. switch (Opc) {
  2883. case X86ISD::SUB:
  2884. // Handle negate.
  2885. if (IsNegate) {
  2886. unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m,
  2887. X86::NEG8m);
  2888. const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
  2889. Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
  2890. MVT::Other, Ops);
  2891. break;
  2892. }
  2893. LLVM_FALLTHROUGH;
  2894. case X86ISD::ADD:
  2895. // Try to match inc/dec.
  2896. if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) {
  2897. bool IsOne = isOneConstant(StoredVal.getOperand(1));
  2898. bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
  2899. // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
  2900. if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
  2901. unsigned NewOpc =
  2902. ((Opc == X86ISD::ADD) == IsOne)
  2903. ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
  2904. : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
  2905. const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
  2906. Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
  2907. MVT::Other, Ops);
  2908. break;
  2909. }
  2910. }
  2911. LLVM_FALLTHROUGH;
  2912. case X86ISD::ADC:
  2913. case X86ISD::SBB:
  2914. case X86ISD::AND:
  2915. case X86ISD::OR:
  2916. case X86ISD::XOR: {
  2917. auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
  2918. switch (Opc) {
  2919. case X86ISD::ADD:
  2920. return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
  2921. X86::ADD8mr);
  2922. case X86ISD::ADC:
  2923. return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr,
  2924. X86::ADC8mr);
  2925. case X86ISD::SUB:
  2926. return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
  2927. X86::SUB8mr);
  2928. case X86ISD::SBB:
  2929. return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr,
  2930. X86::SBB8mr);
  2931. case X86ISD::AND:
  2932. return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr,
  2933. X86::AND8mr);
  2934. case X86ISD::OR:
  2935. return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr);
  2936. case X86ISD::XOR:
  2937. return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr,
  2938. X86::XOR8mr);
  2939. default:
  2940. llvm_unreachable("Invalid opcode!");
  2941. }
  2942. };
  2943. auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
  2944. switch (Opc) {
  2945. case X86ISD::ADD:
  2946. return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
  2947. case X86ISD::ADC:
  2948. return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
  2949. case X86ISD::SUB:
  2950. return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
  2951. case X86ISD::SBB:
  2952. return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
  2953. case X86ISD::AND:
  2954. return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
  2955. case X86ISD::OR:
  2956. return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
  2957. case X86ISD::XOR:
  2958. return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
  2959. default:
  2960. llvm_unreachable("Invalid opcode!");
  2961. }
  2962. };
  2963. auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
  2964. switch (Opc) {
  2965. case X86ISD::ADD:
  2966. return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
  2967. X86::ADD8mi);
  2968. case X86ISD::ADC:
  2969. return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi,
  2970. X86::ADC8mi);
  2971. case X86ISD::SUB:
  2972. return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
  2973. X86::SUB8mi);
  2974. case X86ISD::SBB:
  2975. return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi,
  2976. X86::SBB8mi);
  2977. case X86ISD::AND:
  2978. return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi,
  2979. X86::AND8mi);
  2980. case X86ISD::OR:
  2981. return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi,
  2982. X86::OR8mi);
  2983. case X86ISD::XOR:
  2984. return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi,
  2985. X86::XOR8mi);
  2986. default:
  2987. llvm_unreachable("Invalid opcode!");
  2988. }
  2989. };
  2990. unsigned NewOpc = SelectRegOpcode(Opc);
  2991. SDValue Operand = StoredVal->getOperand(1-LoadOpNo);
  2992. // See if the operand is a constant that we can fold into an immediate
  2993. // operand.
  2994. if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
  2995. int64_t OperandV = OperandC->getSExtValue();
  2996. // Check if we can shrink the operand enough to fit in an immediate (or
  2997. // fit into a smaller immediate) by negating it and switching the
  2998. // operation.
  2999. if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) &&
  3000. ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) ||
  3001. (MemVT == MVT::i64 && !isInt<32>(OperandV) &&
  3002. isInt<32>(-OperandV))) &&
  3003. hasNoCarryFlagUses(StoredVal.getValue(1))) {
  3004. OperandV = -OperandV;
  3005. Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
  3006. }
  3007. // First try to fit this into an Imm8 operand. If it doesn't fit, then try
  3008. // the larger immediate operand.
  3009. if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
  3010. Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
  3011. NewOpc = SelectImm8Opcode(Opc);
  3012. } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
  3013. Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
  3014. NewOpc = SelectImmOpcode(Opc);
  3015. }
  3016. }
  3017. if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) {
  3018. SDValue CopyTo =
  3019. CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS,
  3020. StoredVal.getOperand(2), SDValue());
  3021. const SDValue Ops[] = {Base, Scale, Index, Disp,
  3022. Segment, Operand, CopyTo, CopyTo.getValue(1)};
  3023. Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
  3024. Ops);
  3025. } else {
  3026. const SDValue Ops[] = {Base, Scale, Index, Disp,
  3027. Segment, Operand, InputChain};
  3028. Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other,
  3029. Ops);
  3030. }
  3031. break;
  3032. }
  3033. default:
  3034. llvm_unreachable("Invalid opcode!");
  3035. }
  3036. MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(),
  3037. LoadNode->getMemOperand()};
  3038. CurDAG->setNodeMemRefs(Result, MemOps);
  3039. // Update Load Chain uses as well.
  3040. ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1));
  3041. ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
  3042. ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
  3043. CurDAG->RemoveDeadNode(Node);
  3044. return true;
  3045. }
  3046. // See if this is an X & Mask that we can match to BEXTR/BZHI.
  3047. // Where Mask is one of the following patterns:
  3048. // a) x & (1 << nbits) - 1
  3049. // b) x & ~(-1 << nbits)
  3050. // c) x & (-1 >> (32 - y))
  3051. // d) x << (32 - y) >> (32 - y)
  3052. bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
  3053. assert(
  3054. (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
  3055. "Should be either an and-mask, or right-shift after clearing high bits.");
  3056. // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
  3057. if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
  3058. return false;
  3059. MVT NVT = Node->getSimpleValueType(0);
  3060. // Only supported for 32 and 64 bits.
  3061. if (NVT != MVT::i32 && NVT != MVT::i64)
  3062. return false;
  3063. SDValue NBits;
  3064. bool NegateNBits;
  3065. // If we have BMI2's BZHI, we are ok with muti-use patterns.
  3066. // Else, if we only have BMI1's BEXTR, we require one-use.
  3067. const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
  3068. auto checkUses = [AllowExtraUsesByDefault](SDValue Op, unsigned NUses,
  3069. Optional<bool> AllowExtraUses) {
  3070. return AllowExtraUses.getValueOr(AllowExtraUsesByDefault) ||
  3071. Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
  3072. };
  3073. auto checkOneUse = [checkUses](SDValue Op,
  3074. Optional<bool> AllowExtraUses = None) {
  3075. return checkUses(Op, 1, AllowExtraUses);
  3076. };
  3077. auto checkTwoUse = [checkUses](SDValue Op,
  3078. Optional<bool> AllowExtraUses = None) {
  3079. return checkUses(Op, 2, AllowExtraUses);
  3080. };
  3081. auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
  3082. if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
  3083. assert(V.getSimpleValueType() == MVT::i32 &&
  3084. V.getOperand(0).getSimpleValueType() == MVT::i64 &&
  3085. "Expected i64 -> i32 truncation");
  3086. V = V.getOperand(0);
  3087. }
  3088. return V;
  3089. };
  3090. // a) x & ((1 << nbits) + (-1))
  3091. auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
  3092. &NegateNBits](SDValue Mask) -> bool {
  3093. // Match `add`. Must only have one use!
  3094. if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
  3095. return false;
  3096. // We should be adding all-ones constant (i.e. subtracting one.)
  3097. if (!isAllOnesConstant(Mask->getOperand(1)))
  3098. return false;
  3099. // Match `1 << nbits`. Might be truncated. Must only have one use!
  3100. SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
  3101. if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
  3102. return false;
  3103. if (!isOneConstant(M0->getOperand(0)))
  3104. return false;
  3105. NBits = M0->getOperand(1);
  3106. NegateNBits = false;
  3107. return true;
  3108. };
  3109. auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) {
  3110. V = peekThroughOneUseTruncation(V);
  3111. return CurDAG->MaskedValueIsAllOnes(
  3112. V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(),
  3113. NVT.getSizeInBits()));
  3114. };
  3115. // b) x & ~(-1 << nbits)
  3116. auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
  3117. &NBits, &NegateNBits](SDValue Mask) -> bool {
  3118. // Match `~()`. Must only have one use!
  3119. if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
  3120. return false;
  3121. // The -1 only has to be all-ones for the final Node's NVT.
  3122. if (!isAllOnes(Mask->getOperand(1)))
  3123. return false;
  3124. // Match `-1 << nbits`. Might be truncated. Must only have one use!
  3125. SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0));
  3126. if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0))
  3127. return false;
  3128. // The -1 only has to be all-ones for the final Node's NVT.
  3129. if (!isAllOnes(M0->getOperand(0)))
  3130. return false;
  3131. NBits = M0->getOperand(1);
  3132. NegateNBits = false;
  3133. return true;
  3134. };
  3135. // Try to match potentially-truncated shift amount as `(bitwidth - y)`,
  3136. // or leave the shift amount as-is, but then we'll have to negate it.
  3137. auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt,
  3138. unsigned Bitwidth) {
  3139. NBits = ShiftAmt;
  3140. NegateNBits = true;
  3141. // Skip over a truncate of the shift amount, if any.
  3142. if (NBits.getOpcode() == ISD::TRUNCATE)
  3143. NBits = NBits.getOperand(0);
  3144. // Try to match the shift amount as (bitwidth - y). It should go away, too.
  3145. // If it doesn't match, that's fine, we'll just negate it ourselves.
  3146. if (NBits.getOpcode() != ISD::SUB)
  3147. return;
  3148. auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
  3149. if (!V0 || V0->getZExtValue() != Bitwidth)
  3150. return;
  3151. NBits = NBits.getOperand(1);
  3152. NegateNBits = false;
  3153. };
  3154. // c) x & (-1 >> z) but then we'll have to subtract z from bitwidth
  3155. // or
  3156. // c) x & (-1 >> (32 - y))
  3157. auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
  3158. canonicalizeShiftAmt](SDValue Mask) -> bool {
  3159. // The mask itself may be truncated.
  3160. Mask = peekThroughOneUseTruncation(Mask);
  3161. unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
  3162. // Match `l>>`. Must only have one use!
  3163. if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask))
  3164. return false;
  3165. // We should be shifting truly all-ones constant.
  3166. if (!isAllOnesConstant(Mask.getOperand(0)))
  3167. return false;
  3168. SDValue M1 = Mask.getOperand(1);
  3169. // The shift amount should not be used externally.
  3170. if (!checkOneUse(M1))
  3171. return false;
  3172. canonicalizeShiftAmt(M1, Bitwidth);
  3173. // Pattern c. is non-canonical, and is expanded into pattern d. iff there
  3174. // is no extra use of the mask. Clearly, there was one since we are here.
  3175. // But at the same time, if we need to negate the shift amount,
  3176. // then we don't want the mask to stick around, else it's unprofitable.
  3177. return !NegateNBits;
  3178. };
  3179. SDValue X;
  3180. // d) x << z >> z but then we'll have to subtract z from bitwidth
  3181. // or
  3182. // d) x << (32 - y) >> (32 - y)
  3183. auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
  3184. AllowExtraUsesByDefault, &NegateNBits,
  3185. &X](SDNode *Node) -> bool {
  3186. if (Node->getOpcode() != ISD::SRL)
  3187. return false;
  3188. SDValue N0 = Node->getOperand(0);
  3189. if (N0->getOpcode() != ISD::SHL)
  3190. return false;
  3191. unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
  3192. SDValue N1 = Node->getOperand(1);
  3193. SDValue N01 = N0->getOperand(1);
  3194. // Both of the shifts must be by the exact same value.
  3195. if (N1 != N01)
  3196. return false;
  3197. canonicalizeShiftAmt(N1, Bitwidth);
  3198. // There should not be any external uses of the inner shift / shift amount.
  3199. // Note that while we are generally okay with external uses given BMI2,
  3200. // iff we need to negate the shift amount, we are not okay with extra uses.
  3201. const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
  3202. if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
  3203. return false;
  3204. X = N0->getOperand(0);
  3205. return true;
  3206. };
  3207. auto matchLowBitMask = [matchPatternA, matchPatternB,
  3208. matchPatternC](SDValue Mask) -> bool {
  3209. return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask);
  3210. };
  3211. if (Node->getOpcode() == ISD::AND) {
  3212. X = Node->getOperand(0);
  3213. SDValue Mask = Node->getOperand(1);
  3214. if (matchLowBitMask(Mask)) {
  3215. // Great.
  3216. } else {
  3217. std::swap(X, Mask);
  3218. if (!matchLowBitMask(Mask))
  3219. return false;
  3220. }
  3221. } else if (!matchPatternD(Node))
  3222. return false;
  3223. // If we need to negate the shift amount, require BMI2 BZHI support.
  3224. // It's just too unprofitable for BMI1 BEXTR.
  3225. if (NegateNBits && !Subtarget->hasBMI2())
  3226. return false;
  3227. SDLoc DL(Node);
  3228. // Truncate the shift amount.
  3229. NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
  3230. insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
  3231. // Insert 8-bit NBits into lowest 8 bits of 32-bit register.
  3232. // All the other bits are undefined, we do not care about them.
  3233. SDValue ImplDef = SDValue(
  3234. CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
  3235. insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
  3236. SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
  3237. insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
  3238. NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
  3239. MVT::i32, ImplDef, NBits, SRIdxVal),
  3240. 0);
  3241. insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
  3242. // We might have matched the amount of high bits to be cleared,
  3243. // but we want the amount of low bits to be kept, so negate it then.
  3244. if (NegateNBits) {
  3245. SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32);
  3246. insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC);
  3247. NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits);
  3248. insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
  3249. }
  3250. if (Subtarget->hasBMI2()) {
  3251. // Great, just emit the the BZHI..
  3252. if (NVT != MVT::i32) {
  3253. // But have to place the bit count into the wide-enough register first.
  3254. NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits);
  3255. insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
  3256. }
  3257. SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits);
  3258. ReplaceNode(Node, Extract.getNode());
  3259. SelectCode(Extract.getNode());
  3260. return true;
  3261. }
  3262. // Else, if we do *NOT* have BMI2, let's find out if the if the 'X' is
  3263. // *logically* shifted (potentially with one-use trunc inbetween),
  3264. // and the truncation was the only use of the shift,
  3265. // and if so look past one-use truncation.
  3266. {
  3267. SDValue RealX = peekThroughOneUseTruncation(X);
  3268. // FIXME: only if the shift is one-use?
  3269. if (RealX != X && RealX.getOpcode() == ISD::SRL)
  3270. X = RealX;
  3271. }
  3272. MVT XVT = X.getSimpleValueType();
  3273. // Else, emitting BEXTR requires one more step.
  3274. // The 'control' of BEXTR has the pattern of:
  3275. // [15...8 bit][ 7...0 bit] location
  3276. // [ bit count][ shift] name
  3277. // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
  3278. // Shift NBits left by 8 bits, thus producing 'control'.
  3279. // This makes the low 8 bits to be zero.
  3280. SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8);
  3281. insertDAGNode(*CurDAG, SDValue(Node, 0), C8);
  3282. SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8);
  3283. insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
  3284. // If the 'X' is *logically* shifted, we can fold that shift into 'control'.
  3285. // FIXME: only if the shift is one-use?
  3286. if (X.getOpcode() == ISD::SRL) {
  3287. SDValue ShiftAmt = X.getOperand(1);
  3288. X = X.getOperand(0);
  3289. assert(ShiftAmt.getValueType() == MVT::i8 &&
  3290. "Expected shift amount to be i8");
  3291. // Now, *zero*-extend the shift amount. The bits 8...15 *must* be zero!
  3292. // We could zext to i16 in some form, but we intentionally don't do that.
  3293. SDValue OrigShiftAmt = ShiftAmt;
  3294. ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt);
  3295. insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt);
  3296. // And now 'or' these low 8 bits of shift amount into the 'control'.
  3297. Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt);
  3298. insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
  3299. }
  3300. // But have to place the 'control' into the wide-enough register first.
  3301. if (XVT != MVT::i32) {
  3302. Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control);
  3303. insertDAGNode(*CurDAG, SDValue(Node, 0), Control);
  3304. }
  3305. // And finally, form the BEXTR itself.
  3306. SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control);
  3307. // The 'X' was originally truncated. Do that now.
  3308. if (XVT != NVT) {
  3309. insertDAGNode(*CurDAG, SDValue(Node, 0), Extract);
  3310. Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract);
  3311. }
  3312. ReplaceNode(Node, Extract.getNode());
  3313. SelectCode(Extract.getNode());
  3314. return true;
  3315. }
  3316. // See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
  3317. MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
  3318. MVT NVT = Node->getSimpleValueType(0);
  3319. SDLoc dl(Node);
  3320. SDValue N0 = Node->getOperand(0);
  3321. SDValue N1 = Node->getOperand(1);
  3322. // If we have TBM we can use an immediate for the control. If we have BMI
  3323. // we should only do this if the BEXTR instruction is implemented well.
  3324. // Otherwise moving the control into a register makes this more costly.
  3325. // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
  3326. // hoisting the move immediate would make it worthwhile with a less optimal
  3327. // BEXTR?
  3328. bool PreferBEXTR =
  3329. Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR());
  3330. if (!PreferBEXTR && !Subtarget->hasBMI2())
  3331. return nullptr;
  3332. // Must have a shift right.
  3333. if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
  3334. return nullptr;
  3335. // Shift can't have additional users.
  3336. if (!N0->hasOneUse())
  3337. return nullptr;
  3338. // Only supported for 32 and 64 bits.
  3339. if (NVT != MVT::i32 && NVT != MVT::i64)
  3340. return nullptr;
  3341. // Shift amount and RHS of and must be constant.
  3342. ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
  3343. ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
  3344. if (!MaskCst || !ShiftCst)
  3345. return nullptr;
  3346. // And RHS must be a mask.
  3347. uint64_t Mask = MaskCst->getZExtValue();
  3348. if (!isMask_64(Mask))
  3349. return nullptr;
  3350. uint64_t Shift = ShiftCst->getZExtValue();
  3351. uint64_t MaskSize = countPopulation(Mask);
  3352. // Don't interfere with something that can be handled by extracting AH.
  3353. // TODO: If we are able to fold a load, BEXTR might still be better than AH.
  3354. if (Shift == 8 && MaskSize == 8)
  3355. return nullptr;
  3356. // Make sure we are only using bits that were in the original value, not
  3357. // shifted in.
  3358. if (Shift + MaskSize > NVT.getSizeInBits())
  3359. return nullptr;
  3360. // BZHI, if available, is always fast, unlike BEXTR. But even if we decide
  3361. // that we can't use BEXTR, it is only worthwhile using BZHI if the mask
  3362. // does not fit into 32 bits. Load folding is not a sufficient reason.
  3363. if (!PreferBEXTR && MaskSize <= 32)
  3364. return nullptr;
  3365. SDValue Control;
  3366. unsigned ROpc, MOpc;
  3367. if (!PreferBEXTR) {
  3368. assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
  3369. // If we can't make use of BEXTR then we can't fuse shift+mask stages.
  3370. // Let's perform the mask first, and apply shift later. Note that we need to
  3371. // widen the mask to account for the fact that we'll apply shift afterwards!
  3372. Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
  3373. ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
  3374. MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
  3375. unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
  3376. Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
  3377. } else {
  3378. // The 'control' of BEXTR has the pattern of:
  3379. // [15...8 bit][ 7...0 bit] location
  3380. // [ bit count][ shift] name
  3381. // I.e. 0b000000011'00000001 means (x >> 0b1) & 0b11
  3382. Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT);
  3383. if (Subtarget->hasTBM()) {
  3384. ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri;
  3385. MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi;
  3386. } else {
  3387. assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
  3388. // BMI requires the immediate to placed in a register.
  3389. ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
  3390. MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
  3391. unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
  3392. Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
  3393. }
  3394. }
  3395. MachineSDNode *NewNode;
  3396. SDValue Input = N0->getOperand(0);
  3397. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  3398. if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
  3399. SDValue Ops[] = {
  3400. Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)};
  3401. SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
  3402. NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  3403. // Update the chain.
  3404. ReplaceUses(Input.getValue(1), SDValue(NewNode, 2));
  3405. // Record the mem-refs
  3406. CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()});
  3407. } else {
  3408. NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control);
  3409. }
  3410. if (!PreferBEXTR) {
  3411. // We still need to apply the shift.
  3412. SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
  3413. unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
  3414. NewNode =
  3415. CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
  3416. }
  3417. return NewNode;
  3418. }
  3419. // Emit a PCMISTR(I/M) instruction.
  3420. MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
  3421. bool MayFoldLoad, const SDLoc &dl,
  3422. MVT VT, SDNode *Node) {
  3423. SDValue N0 = Node->getOperand(0);
  3424. SDValue N1 = Node->getOperand(1);
  3425. SDValue Imm = Node->getOperand(2);
  3426. const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
  3427. Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
  3428. // Try to fold a load. No need to check alignment.
  3429. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  3430. if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
  3431. SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
  3432. N1.getOperand(0) };
  3433. SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other);
  3434. MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  3435. // Update the chain.
  3436. ReplaceUses(N1.getValue(1), SDValue(CNode, 2));
  3437. // Record the mem-refs
  3438. CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
  3439. return CNode;
  3440. }
  3441. SDValue Ops[] = { N0, N1, Imm };
  3442. SDVTList VTs = CurDAG->getVTList(VT, MVT::i32);
  3443. MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
  3444. return CNode;
  3445. }
  3446. // Emit a PCMESTR(I/M) instruction. Also return the Glue result in case we need
  3447. // to emit a second instruction after this one. This is needed since we have two
  3448. // copyToReg nodes glued before this and we need to continue that glue through.
  3449. MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
  3450. bool MayFoldLoad, const SDLoc &dl,
  3451. MVT VT, SDNode *Node,
  3452. SDValue &InFlag) {
  3453. SDValue N0 = Node->getOperand(0);
  3454. SDValue N2 = Node->getOperand(2);
  3455. SDValue Imm = Node->getOperand(4);
  3456. const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue();
  3457. Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType());
  3458. // Try to fold a load. No need to check alignment.
  3459. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  3460. if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
  3461. SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
  3462. N2.getOperand(0), InFlag };
  3463. SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
  3464. MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  3465. InFlag = SDValue(CNode, 3);
  3466. // Update the chain.
  3467. ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
  3468. // Record the mem-refs
  3469. CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()});
  3470. return CNode;
  3471. }
  3472. SDValue Ops[] = { N0, N2, Imm, InFlag };
  3473. SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
  3474. MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
  3475. InFlag = SDValue(CNode, 2);
  3476. return CNode;
  3477. }
  3478. bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
  3479. EVT VT = N->getValueType(0);
  3480. // Only handle scalar shifts.
  3481. if (VT.isVector())
  3482. return false;
  3483. // Narrower shifts only mask to 5 bits in hardware.
  3484. unsigned Size = VT == MVT::i64 ? 64 : 32;
  3485. SDValue OrigShiftAmt = N->getOperand(1);
  3486. SDValue ShiftAmt = OrigShiftAmt;
  3487. SDLoc DL(N);
  3488. // Skip over a truncate of the shift amount.
  3489. if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
  3490. ShiftAmt = ShiftAmt->getOperand(0);
  3491. // This function is called after X86DAGToDAGISel::matchBitExtract(),
  3492. // so we are not afraid that we might mess up BZHI/BEXTR pattern.
  3493. SDValue NewShiftAmt;
  3494. if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
  3495. SDValue Add0 = ShiftAmt->getOperand(0);
  3496. SDValue Add1 = ShiftAmt->getOperand(1);
  3497. auto *Add0C = dyn_cast<ConstantSDNode>(Add0);
  3498. auto *Add1C = dyn_cast<ConstantSDNode>(Add1);
  3499. // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
  3500. // to avoid the ADD/SUB.
  3501. if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) {
  3502. NewShiftAmt = Add0;
  3503. // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
  3504. // to generate a NEG instead of a SUB of a constant.
  3505. } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C &&
  3506. Add0C->getZExtValue() != 0) {
  3507. EVT SubVT = ShiftAmt.getValueType();
  3508. SDValue X;
  3509. if (Add0C->getZExtValue() % Size == 0)
  3510. X = Add1;
  3511. else if (ShiftAmt.hasOneUse() && Size == 64 &&
  3512. Add0C->getZExtValue() % 32 == 0) {
  3513. // We have a 64-bit shift by (n*32-x), turn it into -(x+n*32).
  3514. // This is mainly beneficial if we already compute (x+n*32).
  3515. if (Add1.getOpcode() == ISD::TRUNCATE) {
  3516. Add1 = Add1.getOperand(0);
  3517. SubVT = Add1.getValueType();
  3518. }
  3519. if (Add0.getValueType() != SubVT) {
  3520. Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT);
  3521. insertDAGNode(*CurDAG, OrigShiftAmt, Add0);
  3522. }
  3523. X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0);
  3524. insertDAGNode(*CurDAG, OrigShiftAmt, X);
  3525. } else
  3526. return false;
  3527. // Insert a negate op.
  3528. // TODO: This isn't guaranteed to replace the sub if there is a logic cone
  3529. // that uses it that's not a shift.
  3530. SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
  3531. SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X);
  3532. NewShiftAmt = Neg;
  3533. // Insert these operands into a valid topological order so they can
  3534. // get selected independently.
  3535. insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
  3536. insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
  3537. } else
  3538. return false;
  3539. } else
  3540. return false;
  3541. if (NewShiftAmt.getValueType() != MVT::i8) {
  3542. // Need to truncate the shift amount.
  3543. NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
  3544. // Add to a correct topological ordering.
  3545. insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
  3546. }
  3547. // Insert a new mask to keep the shift amount legal. This should be removed
  3548. // by isel patterns.
  3549. NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
  3550. CurDAG->getConstant(Size - 1, DL, MVT::i8));
  3551. // Place in a correct topological ordering.
  3552. insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
  3553. SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
  3554. NewShiftAmt);
  3555. if (UpdatedNode != N) {
  3556. // If we found an existing node, we should replace ourselves with that node
  3557. // and wait for it to be selected after its other users.
  3558. ReplaceNode(N, UpdatedNode);
  3559. return true;
  3560. }
  3561. // If the original shift amount is now dead, delete it so that we don't run
  3562. // it through isel.
  3563. if (OrigShiftAmt.getNode()->use_empty())
  3564. CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
  3565. // Now that we've optimized the shift amount, defer to normal isel to get
  3566. // load folding and legacy vs BMI2 selection without repeating it here.
  3567. SelectCode(N);
  3568. return true;
  3569. }
  3570. bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
  3571. MVT NVT = N->getSimpleValueType(0);
  3572. unsigned Opcode = N->getOpcode();
  3573. SDLoc dl(N);
  3574. // For operations of the form (x << C1) op C2, check if we can use a smaller
  3575. // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
  3576. SDValue Shift = N->getOperand(0);
  3577. SDValue N1 = N->getOperand(1);
  3578. ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
  3579. if (!Cst)
  3580. return false;
  3581. int64_t Val = Cst->getSExtValue();
  3582. // If we have an any_extend feeding the AND, look through it to see if there
  3583. // is a shift behind it. But only if the AND doesn't use the extended bits.
  3584. // FIXME: Generalize this to other ANY_EXTEND than i32 to i64?
  3585. bool FoundAnyExtend = false;
  3586. if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() &&
  3587. Shift.getOperand(0).getSimpleValueType() == MVT::i32 &&
  3588. isUInt<32>(Val)) {
  3589. FoundAnyExtend = true;
  3590. Shift = Shift.getOperand(0);
  3591. }
  3592. if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
  3593. return false;
  3594. // i8 is unshrinkable, i16 should be promoted to i32.
  3595. if (NVT != MVT::i32 && NVT != MVT::i64)
  3596. return false;
  3597. ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
  3598. if (!ShlCst)
  3599. return false;
  3600. uint64_t ShAmt = ShlCst->getZExtValue();
  3601. // Make sure that we don't change the operation by removing bits.
  3602. // This only matters for OR and XOR, AND is unaffected.
  3603. uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1;
  3604. if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
  3605. return false;
  3606. // Check the minimum bitwidth for the new constant.
  3607. // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
  3608. auto CanShrinkImmediate = [&](int64_t &ShiftedVal) {
  3609. if (Opcode == ISD::AND) {
  3610. // AND32ri is the same as AND64ri32 with zext imm.
  3611. // Try this before sign extended immediates below.
  3612. ShiftedVal = (uint64_t)Val >> ShAmt;
  3613. if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
  3614. return true;
  3615. // Also swap order when the AND can become MOVZX.
  3616. if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX)
  3617. return true;
  3618. }
  3619. ShiftedVal = Val >> ShAmt;
  3620. if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) ||
  3621. (!isInt<32>(Val) && isInt<32>(ShiftedVal)))
  3622. return true;
  3623. if (Opcode != ISD::AND) {
  3624. // MOV32ri+OR64r/XOR64r is cheaper than MOV64ri64+OR64rr/XOR64rr
  3625. ShiftedVal = (uint64_t)Val >> ShAmt;
  3626. if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal))
  3627. return true;
  3628. }
  3629. return false;
  3630. };
  3631. int64_t ShiftedVal;
  3632. if (!CanShrinkImmediate(ShiftedVal))
  3633. return false;
  3634. // Ok, we can reorder to get a smaller immediate.
  3635. // But, its possible the original immediate allowed an AND to become MOVZX.
  3636. // Doing this late due to avoid the MakedValueIsZero call as late as
  3637. // possible.
  3638. if (Opcode == ISD::AND) {
  3639. // Find the smallest zext this could possibly be.
  3640. unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
  3641. ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
  3642. // Figure out which bits need to be zero to achieve that mask.
  3643. APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
  3644. ZExtWidth);
  3645. NeededMask &= ~Cst->getAPIntValue();
  3646. if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask))
  3647. return false;
  3648. }
  3649. SDValue X = Shift.getOperand(0);
  3650. if (FoundAnyExtend) {
  3651. SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X);
  3652. insertDAGNode(*CurDAG, SDValue(N, 0), NewX);
  3653. X = NewX;
  3654. }
  3655. SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT);
  3656. insertDAGNode(*CurDAG, SDValue(N, 0), NewCst);
  3657. SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst);
  3658. insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp);
  3659. SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp,
  3660. Shift.getOperand(1));
  3661. ReplaceNode(N, NewSHL.getNode());
  3662. SelectCode(NewSHL.getNode());
  3663. return true;
  3664. }
  3665. bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
  3666. SDNode *ParentB, SDNode *ParentC,
  3667. SDValue A, SDValue B, SDValue C,
  3668. uint8_t Imm) {
  3669. assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) &&
  3670. C.isOperandOf(ParentC) && "Incorrect parent node");
  3671. auto tryFoldLoadOrBCast =
  3672. [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
  3673. SDValue &Index, SDValue &Disp, SDValue &Segment) {
  3674. if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
  3675. return true;
  3676. // Not a load, check for broadcast which may be behind a bitcast.
  3677. if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
  3678. P = L.getNode();
  3679. L = L.getOperand(0);
  3680. }
  3681. if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
  3682. return false;
  3683. // Only 32 and 64 bit broadcasts are supported.
  3684. auto *MemIntr = cast<MemIntrinsicSDNode>(L);
  3685. unsigned Size = MemIntr->getMemoryVT().getSizeInBits();
  3686. if (Size != 32 && Size != 64)
  3687. return false;
  3688. return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
  3689. };
  3690. bool FoldedLoad = false;
  3691. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  3692. if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
  3693. FoldedLoad = true;
  3694. } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
  3695. Tmp4)) {
  3696. FoldedLoad = true;
  3697. std::swap(A, C);
  3698. // Swap bits 1/4 and 3/6.
  3699. uint8_t OldImm = Imm;
  3700. Imm = OldImm & 0xa5;
  3701. if (OldImm & 0x02) Imm |= 0x10;
  3702. if (OldImm & 0x10) Imm |= 0x02;
  3703. if (OldImm & 0x08) Imm |= 0x40;
  3704. if (OldImm & 0x40) Imm |= 0x08;
  3705. } else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3,
  3706. Tmp4)) {
  3707. FoldedLoad = true;
  3708. std::swap(B, C);
  3709. // Swap bits 1/2 and 5/6.
  3710. uint8_t OldImm = Imm;
  3711. Imm = OldImm & 0x99;
  3712. if (OldImm & 0x02) Imm |= 0x04;
  3713. if (OldImm & 0x04) Imm |= 0x02;
  3714. if (OldImm & 0x20) Imm |= 0x40;
  3715. if (OldImm & 0x40) Imm |= 0x20;
  3716. }
  3717. SDLoc DL(Root);
  3718. SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8);
  3719. MVT NVT = Root->getSimpleValueType(0);
  3720. MachineSDNode *MNode;
  3721. if (FoldedLoad) {
  3722. SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
  3723. unsigned Opc;
  3724. if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) {
  3725. auto *MemIntr = cast<MemIntrinsicSDNode>(C);
  3726. unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits();
  3727. assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!");
  3728. bool UseD = EltSize == 32;
  3729. if (NVT.is128BitVector())
  3730. Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi;
  3731. else if (NVT.is256BitVector())
  3732. Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi;
  3733. else if (NVT.is512BitVector())
  3734. Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi;
  3735. else
  3736. llvm_unreachable("Unexpected vector size!");
  3737. } else {
  3738. bool UseD = NVT.getVectorElementType() == MVT::i32;
  3739. if (NVT.is128BitVector())
  3740. Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi;
  3741. else if (NVT.is256BitVector())
  3742. Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi;
  3743. else if (NVT.is512BitVector())
  3744. Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi;
  3745. else
  3746. llvm_unreachable("Unexpected vector size!");
  3747. }
  3748. SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)};
  3749. MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
  3750. // Update the chain.
  3751. ReplaceUses(C.getValue(1), SDValue(MNode, 1));
  3752. // Record the mem-refs
  3753. CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()});
  3754. } else {
  3755. bool UseD = NVT.getVectorElementType() == MVT::i32;
  3756. unsigned Opc;
  3757. if (NVT.is128BitVector())
  3758. Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri;
  3759. else if (NVT.is256BitVector())
  3760. Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri;
  3761. else if (NVT.is512BitVector())
  3762. Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri;
  3763. else
  3764. llvm_unreachable("Unexpected vector size!");
  3765. MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm});
  3766. }
  3767. ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0));
  3768. CurDAG->RemoveDeadNode(Root);
  3769. return true;
  3770. }
  3771. // Try to match two logic ops to a VPTERNLOG.
  3772. // FIXME: Handle more complex patterns that use an operand more than once?
  3773. bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
  3774. MVT NVT = N->getSimpleValueType(0);
  3775. // Make sure we support VPTERNLOG.
  3776. if (!NVT.isVector() || !Subtarget->hasAVX512() ||
  3777. NVT.getVectorElementType() == MVT::i1)
  3778. return false;
  3779. // We need VLX for 128/256-bit.
  3780. if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
  3781. return false;
  3782. SDValue N0 = N->getOperand(0);
  3783. SDValue N1 = N->getOperand(1);
  3784. auto getFoldableLogicOp = [](SDValue Op) {
  3785. // Peek through single use bitcast.
  3786. if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse())
  3787. Op = Op.getOperand(0);
  3788. if (!Op.hasOneUse())
  3789. return SDValue();
  3790. unsigned Opc = Op.getOpcode();
  3791. if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR ||
  3792. Opc == X86ISD::ANDNP)
  3793. return Op;
  3794. return SDValue();
  3795. };
  3796. SDValue A, FoldableOp;
  3797. if ((FoldableOp = getFoldableLogicOp(N1))) {
  3798. A = N0;
  3799. } else if ((FoldableOp = getFoldableLogicOp(N0))) {
  3800. A = N1;
  3801. } else
  3802. return false;
  3803. SDValue B = FoldableOp.getOperand(0);
  3804. SDValue C = FoldableOp.getOperand(1);
  3805. SDNode *ParentA = N;
  3806. SDNode *ParentB = FoldableOp.getNode();
  3807. SDNode *ParentC = FoldableOp.getNode();
  3808. // We can build the appropriate control immediate by performing the logic
  3809. // operation we're matching using these constants for A, B, and C.
  3810. uint8_t TernlogMagicA = 0xf0;
  3811. uint8_t TernlogMagicB = 0xcc;
  3812. uint8_t TernlogMagicC = 0xaa;
  3813. // Some of the inputs may be inverted, peek through them and invert the
  3814. // magic values accordingly.
  3815. // TODO: There may be a bitcast before the xor that we should peek through.
  3816. auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) {
  3817. if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() &&
  3818. ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) {
  3819. Magic = ~Magic;
  3820. Parent = Op.getNode();
  3821. Op = Op.getOperand(0);
  3822. }
  3823. };
  3824. PeekThroughNot(A, ParentA, TernlogMagicA);
  3825. PeekThroughNot(B, ParentB, TernlogMagicB);
  3826. PeekThroughNot(C, ParentC, TernlogMagicC);
  3827. uint8_t Imm;
  3828. switch (FoldableOp.getOpcode()) {
  3829. default: llvm_unreachable("Unexpected opcode!");
  3830. case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break;
  3831. case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break;
  3832. case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break;
  3833. case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break;
  3834. }
  3835. switch (N->getOpcode()) {
  3836. default: llvm_unreachable("Unexpected opcode!");
  3837. case X86ISD::ANDNP:
  3838. if (A == N0)
  3839. Imm &= ~TernlogMagicA;
  3840. else
  3841. Imm = ~(Imm) & TernlogMagicA;
  3842. break;
  3843. case ISD::AND: Imm &= TernlogMagicA; break;
  3844. case ISD::OR: Imm |= TernlogMagicA; break;
  3845. case ISD::XOR: Imm ^= TernlogMagicA; break;
  3846. }
  3847. return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
  3848. }
  3849. /// If the high bits of an 'and' operand are known zero, try setting the
  3850. /// high bits of an 'and' constant operand to produce a smaller encoding by
  3851. /// creating a small, sign-extended negative immediate rather than a large
  3852. /// positive one. This reverses a transform in SimplifyDemandedBits that
  3853. /// shrinks mask constants by clearing bits. There is also a possibility that
  3854. /// the 'and' mask can be made -1, so the 'and' itself is unnecessary. In that
  3855. /// case, just replace the 'and'. Return 'true' if the node is replaced.
  3856. bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
  3857. // i8 is unshrinkable, i16 should be promoted to i32, and vector ops don't
  3858. // have immediate operands.
  3859. MVT VT = And->getSimpleValueType(0);
  3860. if (VT != MVT::i32 && VT != MVT::i64)
  3861. return false;
  3862. auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1));
  3863. if (!And1C)
  3864. return false;
  3865. // Bail out if the mask constant is already negative. It's can't shrink more.
  3866. // If the upper 32 bits of a 64 bit mask are all zeros, we have special isel
  3867. // patterns to use a 32-bit and instead of a 64-bit and by relying on the
  3868. // implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
  3869. // are negative too.
  3870. APInt MaskVal = And1C->getAPIntValue();
  3871. unsigned MaskLZ = MaskVal.countLeadingZeros();
  3872. if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
  3873. return false;
  3874. // Don't extend into the upper 32 bits of a 64 bit mask.
  3875. if (VT == MVT::i64 && MaskLZ >= 32) {
  3876. MaskLZ -= 32;
  3877. MaskVal = MaskVal.trunc(32);
  3878. }
  3879. SDValue And0 = And->getOperand(0);
  3880. APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ);
  3881. APInt NegMaskVal = MaskVal | HighZeros;
  3882. // If a negative constant would not allow a smaller encoding, there's no need
  3883. // to continue. Only change the constant when we know it's a win.
  3884. unsigned MinWidth = NegMaskVal.getMinSignedBits();
  3885. if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
  3886. return false;
  3887. // Extend masks if we truncated above.
  3888. if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) {
  3889. NegMaskVal = NegMaskVal.zext(64);
  3890. HighZeros = HighZeros.zext(64);
  3891. }
  3892. // The variable operand must be all zeros in the top bits to allow using the
  3893. // new, negative constant as the mask.
  3894. if (!CurDAG->MaskedValueIsZero(And0, HighZeros))
  3895. return false;
  3896. // Check if the mask is -1. In that case, this is an unnecessary instruction
  3897. // that escaped earlier analysis.
  3898. if (NegMaskVal.isAllOnes()) {
  3899. ReplaceNode(And, And0.getNode());
  3900. return true;
  3901. }
  3902. // A negative mask allows a smaller encoding. Create a new 'and' node.
  3903. SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT);
  3904. insertDAGNode(*CurDAG, SDValue(And, 0), NewMask);
  3905. SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask);
  3906. ReplaceNode(And, NewAnd.getNode());
  3907. SelectCode(NewAnd.getNode());
  3908. return true;
  3909. }
  3910. static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
  3911. bool FoldedBCast, bool Masked) {
  3912. #define VPTESTM_CASE(VT, SUFFIX) \
  3913. case MVT::VT: \
  3914. if (Masked) \
  3915. return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \
  3916. return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX;
  3917. #define VPTESTM_BROADCAST_CASES(SUFFIX) \
  3918. default: llvm_unreachable("Unexpected VT!"); \
  3919. VPTESTM_CASE(v4i32, DZ128##SUFFIX) \
  3920. VPTESTM_CASE(v2i64, QZ128##SUFFIX) \
  3921. VPTESTM_CASE(v8i32, DZ256##SUFFIX) \
  3922. VPTESTM_CASE(v4i64, QZ256##SUFFIX) \
  3923. VPTESTM_CASE(v16i32, DZ##SUFFIX) \
  3924. VPTESTM_CASE(v8i64, QZ##SUFFIX)
  3925. #define VPTESTM_FULL_CASES(SUFFIX) \
  3926. VPTESTM_BROADCAST_CASES(SUFFIX) \
  3927. VPTESTM_CASE(v16i8, BZ128##SUFFIX) \
  3928. VPTESTM_CASE(v8i16, WZ128##SUFFIX) \
  3929. VPTESTM_CASE(v32i8, BZ256##SUFFIX) \
  3930. VPTESTM_CASE(v16i16, WZ256##SUFFIX) \
  3931. VPTESTM_CASE(v64i8, BZ##SUFFIX) \
  3932. VPTESTM_CASE(v32i16, WZ##SUFFIX)
  3933. if (FoldedBCast) {
  3934. switch (TestVT.SimpleTy) {
  3935. VPTESTM_BROADCAST_CASES(rmb)
  3936. }
  3937. }
  3938. if (FoldedLoad) {
  3939. switch (TestVT.SimpleTy) {
  3940. VPTESTM_FULL_CASES(rm)
  3941. }
  3942. }
  3943. switch (TestVT.SimpleTy) {
  3944. VPTESTM_FULL_CASES(rr)
  3945. }
  3946. #undef VPTESTM_FULL_CASES
  3947. #undef VPTESTM_BROADCAST_CASES
  3948. #undef VPTESTM_CASE
  3949. }
  3950. // Try to create VPTESTM instruction. If InMask is not null, it will be used
  3951. // to form a masked operation.
  3952. bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
  3953. SDValue InMask) {
  3954. assert(Subtarget->hasAVX512() && "Expected AVX512!");
  3955. assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
  3956. "Unexpected VT!");
  3957. // Look for equal and not equal compares.
  3958. ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
  3959. if (CC != ISD::SETEQ && CC != ISD::SETNE)
  3960. return false;
  3961. SDValue SetccOp0 = Setcc.getOperand(0);
  3962. SDValue SetccOp1 = Setcc.getOperand(1);
  3963. // Canonicalize the all zero vector to the RHS.
  3964. if (ISD::isBuildVectorAllZeros(SetccOp0.getNode()))
  3965. std::swap(SetccOp0, SetccOp1);
  3966. // See if we're comparing against zero.
  3967. if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode()))
  3968. return false;
  3969. SDValue N0 = SetccOp0;
  3970. MVT CmpVT = N0.getSimpleValueType();
  3971. MVT CmpSVT = CmpVT.getVectorElementType();
  3972. // Start with both operands the same. We'll try to refine this.
  3973. SDValue Src0 = N0;
  3974. SDValue Src1 = N0;
  3975. {
  3976. // Look through single use bitcasts.
  3977. SDValue N0Temp = N0;
  3978. if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse())
  3979. N0Temp = N0.getOperand(0);
  3980. // Look for single use AND.
  3981. if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) {
  3982. Src0 = N0Temp.getOperand(0);
  3983. Src1 = N0Temp.getOperand(1);
  3984. }
  3985. }
  3986. // Without VLX we need to widen the operation.
  3987. bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector();
  3988. auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L,
  3989. SDValue &Base, SDValue &Scale, SDValue &Index,
  3990. SDValue &Disp, SDValue &Segment) {
  3991. // If we need to widen, we can't fold the load.
  3992. if (!Widen)
  3993. if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment))
  3994. return true;
  3995. // If we didn't fold a load, try to match broadcast. No widening limitation
  3996. // for this. But only 32 and 64 bit types are supported.
  3997. if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64)
  3998. return false;
  3999. // Look through single use bitcasts.
  4000. if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) {
  4001. P = L.getNode();
  4002. L = L.getOperand(0);
  4003. }
  4004. if (L.getOpcode() != X86ISD::VBROADCAST_LOAD)
  4005. return false;
  4006. auto *MemIntr = cast<MemIntrinsicSDNode>(L);
  4007. if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits())
  4008. return false;
  4009. return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment);
  4010. };
  4011. // We can only fold loads if the sources are unique.
  4012. bool CanFoldLoads = Src0 != Src1;
  4013. bool FoldedLoad = false;
  4014. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  4015. if (CanFoldLoads) {
  4016. FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2,
  4017. Tmp3, Tmp4);
  4018. if (!FoldedLoad) {
  4019. // And is commutative.
  4020. FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1,
  4021. Tmp2, Tmp3, Tmp4);
  4022. if (FoldedLoad)
  4023. std::swap(Src0, Src1);
  4024. }
  4025. }
  4026. bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD;
  4027. bool IsMasked = InMask.getNode() != nullptr;
  4028. SDLoc dl(Root);
  4029. MVT ResVT = Setcc.getSimpleValueType();
  4030. MVT MaskVT = ResVT;
  4031. if (Widen) {
  4032. // Widen the inputs using insert_subreg or copy_to_regclass.
  4033. unsigned Scale = CmpVT.is128BitVector() ? 4 : 2;
  4034. unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm;
  4035. unsigned NumElts = CmpVT.getVectorNumElements() * Scale;
  4036. CmpVT = MVT::getVectorVT(CmpSVT, NumElts);
  4037. MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
  4038. SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl,
  4039. CmpVT), 0);
  4040. Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0);
  4041. if (!FoldedBCast)
  4042. Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1);
  4043. if (IsMasked) {
  4044. // Widen the mask.
  4045. unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID();
  4046. SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
  4047. InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
  4048. dl, MaskVT, InMask, RC), 0);
  4049. }
  4050. }
  4051. bool IsTestN = CC == ISD::SETEQ;
  4052. unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
  4053. IsMasked);
  4054. MachineSDNode *CNode;
  4055. if (FoldedLoad) {
  4056. SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other);
  4057. if (IsMasked) {
  4058. SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
  4059. Src1.getOperand(0) };
  4060. CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
  4061. } else {
  4062. SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4,
  4063. Src1.getOperand(0) };
  4064. CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
  4065. }
  4066. // Update the chain.
  4067. ReplaceUses(Src1.getValue(1), SDValue(CNode, 1));
  4068. // Record the mem-refs
  4069. CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()});
  4070. } else {
  4071. if (IsMasked)
  4072. CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1);
  4073. else
  4074. CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1);
  4075. }
  4076. // If we widened, we need to shrink the mask VT.
  4077. if (Widen) {
  4078. unsigned RegClass = TLI->getRegClassFor(ResVT)->getID();
  4079. SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32);
  4080. CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
  4081. dl, ResVT, SDValue(CNode, 0), RC);
  4082. }
  4083. ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0));
  4084. CurDAG->RemoveDeadNode(Root);
  4085. return true;
  4086. }
  4087. // Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
  4088. // into vpternlog.
  4089. bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
  4090. assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
  4091. MVT NVT = N->getSimpleValueType(0);
  4092. // Make sure we support VPTERNLOG.
  4093. if (!NVT.isVector() || !Subtarget->hasAVX512())
  4094. return false;
  4095. // We need VLX for 128/256-bit.
  4096. if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
  4097. return false;
  4098. SDValue N0 = N->getOperand(0);
  4099. SDValue N1 = N->getOperand(1);
  4100. // Canonicalize AND to LHS.
  4101. if (N1.getOpcode() == ISD::AND)
  4102. std::swap(N0, N1);
  4103. if (N0.getOpcode() != ISD::AND ||
  4104. N1.getOpcode() != X86ISD::ANDNP ||
  4105. !N0.hasOneUse() || !N1.hasOneUse())
  4106. return false;
  4107. // ANDN is not commutable, use it to pick down A and C.
  4108. SDValue A = N1.getOperand(0);
  4109. SDValue C = N1.getOperand(1);
  4110. // AND is commutable, if one operand matches A, the other operand is B.
  4111. // Otherwise this isn't a match.
  4112. SDValue B;
  4113. if (N0.getOperand(0) == A)
  4114. B = N0.getOperand(1);
  4115. else if (N0.getOperand(1) == A)
  4116. B = N0.getOperand(0);
  4117. else
  4118. return false;
  4119. SDLoc dl(N);
  4120. SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
  4121. SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
  4122. ReplaceNode(N, Ternlog.getNode());
  4123. return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
  4124. Ternlog.getNode(), A, B, C, 0xCA);
  4125. }
  4126. void X86DAGToDAGISel::Select(SDNode *Node) {
  4127. MVT NVT = Node->getSimpleValueType(0);
  4128. unsigned Opcode = Node->getOpcode();
  4129. SDLoc dl(Node);
  4130. if (Node->isMachineOpcode()) {
  4131. LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
  4132. Node->setNodeId(-1);
  4133. return; // Already selected.
  4134. }
  4135. switch (Opcode) {
  4136. default: break;
  4137. case ISD::INTRINSIC_W_CHAIN: {
  4138. unsigned IntNo = Node->getConstantOperandVal(1);
  4139. switch (IntNo) {
  4140. default: break;
  4141. case Intrinsic::x86_encodekey128:
  4142. case Intrinsic::x86_encodekey256: {
  4143. if (!Subtarget->hasKL())
  4144. break;
  4145. unsigned Opcode;
  4146. switch (IntNo) {
  4147. default: llvm_unreachable("Impossible intrinsic");
  4148. case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break;
  4149. case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break;
  4150. }
  4151. SDValue Chain = Node->getOperand(0);
  4152. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3),
  4153. SDValue());
  4154. if (Opcode == X86::ENCODEKEY256)
  4155. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4),
  4156. Chain.getValue(1));
  4157. MachineSDNode *Res = CurDAG->getMachineNode(
  4158. Opcode, dl, Node->getVTList(),
  4159. {Node->getOperand(2), Chain, Chain.getValue(1)});
  4160. ReplaceNode(Node, Res);
  4161. return;
  4162. }
  4163. case Intrinsic::x86_tileloadd64_internal:
  4164. case Intrinsic::x86_tileloaddt164_internal: {
  4165. if (!Subtarget->hasAMXTILE())
  4166. break;
  4167. unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
  4168. ? X86::PTILELOADDV
  4169. : X86::PTILELOADDT1V;
  4170. // _tile_loadd_internal(row, col, buf, STRIDE)
  4171. SDValue Base = Node->getOperand(4);
  4172. SDValue Scale = getI8Imm(1, dl);
  4173. SDValue Index = Node->getOperand(5);
  4174. SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
  4175. SDValue Segment = CurDAG->getRegister(0, MVT::i16);
  4176. SDValue Chain = Node->getOperand(0);
  4177. MachineSDNode *CNode;
  4178. SDValue Ops[] = {Node->getOperand(2),
  4179. Node->getOperand(3),
  4180. Base,
  4181. Scale,
  4182. Index,
  4183. Disp,
  4184. Segment,
  4185. Chain};
  4186. CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops);
  4187. ReplaceNode(Node, CNode);
  4188. return;
  4189. }
  4190. }
  4191. break;
  4192. }
  4193. case ISD::INTRINSIC_VOID: {
  4194. unsigned IntNo = Node->getConstantOperandVal(1);
  4195. switch (IntNo) {
  4196. default: break;
  4197. case Intrinsic::x86_sse3_monitor:
  4198. case Intrinsic::x86_monitorx:
  4199. case Intrinsic::x86_clzero: {
  4200. bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64;
  4201. unsigned Opc = 0;
  4202. switch (IntNo) {
  4203. default: llvm_unreachable("Unexpected intrinsic!");
  4204. case Intrinsic::x86_sse3_monitor:
  4205. if (!Subtarget->hasSSE3())
  4206. break;
  4207. Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr;
  4208. break;
  4209. case Intrinsic::x86_monitorx:
  4210. if (!Subtarget->hasMWAITX())
  4211. break;
  4212. Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr;
  4213. break;
  4214. case Intrinsic::x86_clzero:
  4215. if (!Subtarget->hasCLZERO())
  4216. break;
  4217. Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r;
  4218. break;
  4219. }
  4220. if (Opc) {
  4221. unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
  4222. SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
  4223. Node->getOperand(2), SDValue());
  4224. SDValue InFlag = Chain.getValue(1);
  4225. if (IntNo == Intrinsic::x86_sse3_monitor ||
  4226. IntNo == Intrinsic::x86_monitorx) {
  4227. // Copy the other two operands to ECX and EDX.
  4228. Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
  4229. InFlag);
  4230. InFlag = Chain.getValue(1);
  4231. Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
  4232. InFlag);
  4233. InFlag = Chain.getValue(1);
  4234. }
  4235. MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
  4236. { Chain, InFlag});
  4237. ReplaceNode(Node, CNode);
  4238. return;
  4239. }
  4240. break;
  4241. }
  4242. case Intrinsic::x86_tilestored64_internal: {
  4243. unsigned Opc = X86::PTILESTOREDV;
  4244. // _tile_stored_internal(row, col, buf, STRIDE, c)
  4245. SDValue Base = Node->getOperand(4);
  4246. SDValue Scale = getI8Imm(1, dl);
  4247. SDValue Index = Node->getOperand(5);
  4248. SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
  4249. SDValue Segment = CurDAG->getRegister(0, MVT::i16);
  4250. SDValue Chain = Node->getOperand(0);
  4251. MachineSDNode *CNode;
  4252. SDValue Ops[] = {Node->getOperand(2),
  4253. Node->getOperand(3),
  4254. Base,
  4255. Scale,
  4256. Index,
  4257. Disp,
  4258. Segment,
  4259. Node->getOperand(6),
  4260. Chain};
  4261. CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
  4262. ReplaceNode(Node, CNode);
  4263. return;
  4264. }
  4265. case Intrinsic::x86_tileloadd64:
  4266. case Intrinsic::x86_tileloaddt164:
  4267. case Intrinsic::x86_tilestored64: {
  4268. if (!Subtarget->hasAMXTILE())
  4269. break;
  4270. unsigned Opc;
  4271. switch (IntNo) {
  4272. default: llvm_unreachable("Unexpected intrinsic!");
  4273. case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break;
  4274. case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break;
  4275. case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break;
  4276. }
  4277. // FIXME: Match displacement and scale.
  4278. unsigned TIndex = Node->getConstantOperandVal(2);
  4279. SDValue TReg = getI8Imm(TIndex, dl);
  4280. SDValue Base = Node->getOperand(3);
  4281. SDValue Scale = getI8Imm(1, dl);
  4282. SDValue Index = Node->getOperand(4);
  4283. SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32);
  4284. SDValue Segment = CurDAG->getRegister(0, MVT::i16);
  4285. SDValue Chain = Node->getOperand(0);
  4286. MachineSDNode *CNode;
  4287. if (Opc == X86::PTILESTORED) {
  4288. SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain };
  4289. CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
  4290. } else {
  4291. SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain };
  4292. CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
  4293. }
  4294. ReplaceNode(Node, CNode);
  4295. return;
  4296. }
  4297. }
  4298. break;
  4299. }
  4300. case ISD::BRIND:
  4301. case X86ISD::NT_BRIND: {
  4302. if (Subtarget->isTargetNaCl())
  4303. // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
  4304. // leave the instruction alone.
  4305. break;
  4306. if (Subtarget->isTarget64BitILP32()) {
  4307. // Converts a 32-bit register to a 64-bit, zero-extended version of
  4308. // it. This is needed because x86-64 can do many things, but jmp %r32
  4309. // ain't one of them.
  4310. SDValue Target = Node->getOperand(1);
  4311. assert(Target.getValueType() == MVT::i32 && "Unexpected VT!");
  4312. SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64);
  4313. SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other,
  4314. Node->getOperand(0), ZextTarget);
  4315. ReplaceNode(Node, Brind.getNode());
  4316. SelectCode(ZextTarget.getNode());
  4317. SelectCode(Brind.getNode());
  4318. return;
  4319. }
  4320. break;
  4321. }
  4322. case X86ISD::GlobalBaseReg:
  4323. ReplaceNode(Node, getGlobalBaseReg());
  4324. return;
  4325. case ISD::BITCAST:
  4326. // Just drop all 128/256/512-bit bitcasts.
  4327. if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() ||
  4328. NVT == MVT::f128) {
  4329. ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
  4330. CurDAG->RemoveDeadNode(Node);
  4331. return;
  4332. }
  4333. break;
  4334. case ISD::SRL:
  4335. if (matchBitExtract(Node))
  4336. return;
  4337. LLVM_FALLTHROUGH;
  4338. case ISD::SRA:
  4339. case ISD::SHL:
  4340. if (tryShiftAmountMod(Node))
  4341. return;
  4342. break;
  4343. case X86ISD::VPTERNLOG: {
  4344. uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
  4345. if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
  4346. Node->getOperand(1), Node->getOperand(2), Imm))
  4347. return;
  4348. break;
  4349. }
  4350. case X86ISD::ANDNP:
  4351. if (tryVPTERNLOG(Node))
  4352. return;
  4353. break;
  4354. case ISD::AND:
  4355. if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
  4356. // Try to form a masked VPTESTM. Operands can be in either order.
  4357. SDValue N0 = Node->getOperand(0);
  4358. SDValue N1 = Node->getOperand(1);
  4359. if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
  4360. tryVPTESTM(Node, N0, N1))
  4361. return;
  4362. if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
  4363. tryVPTESTM(Node, N1, N0))
  4364. return;
  4365. }
  4366. if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) {
  4367. ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
  4368. CurDAG->RemoveDeadNode(Node);
  4369. return;
  4370. }
  4371. if (matchBitExtract(Node))
  4372. return;
  4373. if (AndImmShrink && shrinkAndImmediate(Node))
  4374. return;
  4375. LLVM_FALLTHROUGH;
  4376. case ISD::OR:
  4377. case ISD::XOR:
  4378. if (tryShrinkShlLogicImm(Node))
  4379. return;
  4380. if (Opcode == ISD::OR && tryMatchBitSelect(Node))
  4381. return;
  4382. if (tryVPTERNLOG(Node))
  4383. return;
  4384. LLVM_FALLTHROUGH;
  4385. case ISD::ADD:
  4386. case ISD::SUB: {
  4387. // Try to avoid folding immediates with multiple uses for optsize.
  4388. // This code tries to select to register form directly to avoid going
  4389. // through the isel table which might fold the immediate. We can't change
  4390. // the patterns on the add/sub/and/or/xor with immediate paterns in the
  4391. // tablegen files to check immediate use count without making the patterns
  4392. // unavailable to the fast-isel table.
  4393. if (!CurDAG->shouldOptForSize())
  4394. break;
  4395. // Only handle i8/i16/i32/i64.
  4396. if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64)
  4397. break;
  4398. SDValue N0 = Node->getOperand(0);
  4399. SDValue N1 = Node->getOperand(1);
  4400. ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
  4401. if (!Cst)
  4402. break;
  4403. int64_t Val = Cst->getSExtValue();
  4404. // Make sure its an immediate that is considered foldable.
  4405. // FIXME: Handle unsigned 32 bit immediates for 64-bit AND.
  4406. if (!isInt<8>(Val) && !isInt<32>(Val))
  4407. break;
  4408. // If this can match to INC/DEC, let it go.
  4409. if (Opcode == ISD::ADD && (Val == 1 || Val == -1))
  4410. break;
  4411. // Check if we should avoid folding this immediate.
  4412. if (!shouldAvoidImmediateInstFormsForSize(N1.getNode()))
  4413. break;
  4414. // We should not fold the immediate. So we need a register form instead.
  4415. unsigned ROpc, MOpc;
  4416. switch (NVT.SimpleTy) {
  4417. default: llvm_unreachable("Unexpected VT!");
  4418. case MVT::i8:
  4419. switch (Opcode) {
  4420. default: llvm_unreachable("Unexpected opcode!");
  4421. case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
  4422. case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
  4423. case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
  4424. case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
  4425. case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
  4426. }
  4427. break;
  4428. case MVT::i16:
  4429. switch (Opcode) {
  4430. default: llvm_unreachable("Unexpected opcode!");
  4431. case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
  4432. case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
  4433. case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
  4434. case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
  4435. case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
  4436. }
  4437. break;
  4438. case MVT::i32:
  4439. switch (Opcode) {
  4440. default: llvm_unreachable("Unexpected opcode!");
  4441. case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
  4442. case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
  4443. case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
  4444. case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
  4445. case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
  4446. }
  4447. break;
  4448. case MVT::i64:
  4449. switch (Opcode) {
  4450. default: llvm_unreachable("Unexpected opcode!");
  4451. case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
  4452. case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
  4453. case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
  4454. case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
  4455. case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
  4456. }
  4457. break;
  4458. }
  4459. // Ok this is a AND/OR/XOR/ADD/SUB with constant.
  4460. // If this is a not a subtract, we can still try to fold a load.
  4461. if (Opcode != ISD::SUB) {
  4462. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  4463. if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
  4464. SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
  4465. SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
  4466. MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  4467. // Update the chain.
  4468. ReplaceUses(N0.getValue(1), SDValue(CNode, 2));
  4469. // Record the mem-refs
  4470. CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()});
  4471. ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
  4472. CurDAG->RemoveDeadNode(Node);
  4473. return;
  4474. }
  4475. }
  4476. CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1);
  4477. return;
  4478. }
  4479. case X86ISD::SMUL:
  4480. // i16/i32/i64 are handled with isel patterns.
  4481. if (NVT != MVT::i8)
  4482. break;
  4483. LLVM_FALLTHROUGH;
  4484. case X86ISD::UMUL: {
  4485. SDValue N0 = Node->getOperand(0);
  4486. SDValue N1 = Node->getOperand(1);
  4487. unsigned LoReg, ROpc, MOpc;
  4488. switch (NVT.SimpleTy) {
  4489. default: llvm_unreachable("Unsupported VT!");
  4490. case MVT::i8:
  4491. LoReg = X86::AL;
  4492. ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
  4493. MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
  4494. break;
  4495. case MVT::i16:
  4496. LoReg = X86::AX;
  4497. ROpc = X86::MUL16r;
  4498. MOpc = X86::MUL16m;
  4499. break;
  4500. case MVT::i32:
  4501. LoReg = X86::EAX;
  4502. ROpc = X86::MUL32r;
  4503. MOpc = X86::MUL32m;
  4504. break;
  4505. case MVT::i64:
  4506. LoReg = X86::RAX;
  4507. ROpc = X86::MUL64r;
  4508. MOpc = X86::MUL64m;
  4509. break;
  4510. }
  4511. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  4512. bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
  4513. // Multiply is commutative.
  4514. if (!FoldedLoad) {
  4515. FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
  4516. if (FoldedLoad)
  4517. std::swap(N0, N1);
  4518. }
  4519. SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
  4520. N0, SDValue()).getValue(1);
  4521. MachineSDNode *CNode;
  4522. if (FoldedLoad) {
  4523. // i16/i32/i64 use an instruction that produces a low and high result even
  4524. // though only the low result is used.
  4525. SDVTList VTs;
  4526. if (NVT == MVT::i8)
  4527. VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
  4528. else
  4529. VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
  4530. SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
  4531. InFlag };
  4532. CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  4533. // Update the chain.
  4534. ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
  4535. // Record the mem-refs
  4536. CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
  4537. } else {
  4538. // i16/i32/i64 use an instruction that produces a low and high result even
  4539. // though only the low result is used.
  4540. SDVTList VTs;
  4541. if (NVT == MVT::i8)
  4542. VTs = CurDAG->getVTList(NVT, MVT::i32);
  4543. else
  4544. VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
  4545. CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
  4546. }
  4547. ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
  4548. ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
  4549. CurDAG->RemoveDeadNode(Node);
  4550. return;
  4551. }
  4552. case ISD::SMUL_LOHI:
  4553. case ISD::UMUL_LOHI: {
  4554. SDValue N0 = Node->getOperand(0);
  4555. SDValue N1 = Node->getOperand(1);
  4556. unsigned Opc, MOpc;
  4557. unsigned LoReg, HiReg;
  4558. bool IsSigned = Opcode == ISD::SMUL_LOHI;
  4559. bool UseMULX = !IsSigned && Subtarget->hasBMI2();
  4560. bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
  4561. switch (NVT.SimpleTy) {
  4562. default: llvm_unreachable("Unsupported VT!");
  4563. case MVT::i32:
  4564. Opc = UseMULXHi ? X86::MULX32Hrr :
  4565. UseMULX ? X86::MULX32rr :
  4566. IsSigned ? X86::IMUL32r : X86::MUL32r;
  4567. MOpc = UseMULXHi ? X86::MULX32Hrm :
  4568. UseMULX ? X86::MULX32rm :
  4569. IsSigned ? X86::IMUL32m : X86::MUL32m;
  4570. LoReg = UseMULX ? X86::EDX : X86::EAX;
  4571. HiReg = X86::EDX;
  4572. break;
  4573. case MVT::i64:
  4574. Opc = UseMULXHi ? X86::MULX64Hrr :
  4575. UseMULX ? X86::MULX64rr :
  4576. IsSigned ? X86::IMUL64r : X86::MUL64r;
  4577. MOpc = UseMULXHi ? X86::MULX64Hrm :
  4578. UseMULX ? X86::MULX64rm :
  4579. IsSigned ? X86::IMUL64m : X86::MUL64m;
  4580. LoReg = UseMULX ? X86::RDX : X86::RAX;
  4581. HiReg = X86::RDX;
  4582. break;
  4583. }
  4584. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  4585. bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
  4586. // Multiply is commmutative.
  4587. if (!foldedLoad) {
  4588. foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
  4589. if (foldedLoad)
  4590. std::swap(N0, N1);
  4591. }
  4592. SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
  4593. N0, SDValue()).getValue(1);
  4594. SDValue ResHi, ResLo;
  4595. if (foldedLoad) {
  4596. SDValue Chain;
  4597. MachineSDNode *CNode = nullptr;
  4598. SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
  4599. InFlag };
  4600. if (UseMULXHi) {
  4601. SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
  4602. CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  4603. ResHi = SDValue(CNode, 0);
  4604. Chain = SDValue(CNode, 1);
  4605. } else if (UseMULX) {
  4606. SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other);
  4607. CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  4608. ResHi = SDValue(CNode, 0);
  4609. ResLo = SDValue(CNode, 1);
  4610. Chain = SDValue(CNode, 2);
  4611. } else {
  4612. SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
  4613. CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
  4614. Chain = SDValue(CNode, 0);
  4615. InFlag = SDValue(CNode, 1);
  4616. }
  4617. // Update the chain.
  4618. ReplaceUses(N1.getValue(1), Chain);
  4619. // Record the mem-refs
  4620. CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
  4621. } else {
  4622. SDValue Ops[] = { N1, InFlag };
  4623. if (UseMULXHi) {
  4624. SDVTList VTs = CurDAG->getVTList(NVT);
  4625. SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
  4626. ResHi = SDValue(CNode, 0);
  4627. } else if (UseMULX) {
  4628. SDVTList VTs = CurDAG->getVTList(NVT, NVT);
  4629. SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
  4630. ResHi = SDValue(CNode, 0);
  4631. ResLo = SDValue(CNode, 1);
  4632. } else {
  4633. SDVTList VTs = CurDAG->getVTList(MVT::Glue);
  4634. SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
  4635. InFlag = SDValue(CNode, 0);
  4636. }
  4637. }
  4638. // Copy the low half of the result, if it is needed.
  4639. if (!SDValue(Node, 0).use_empty()) {
  4640. if (!ResLo) {
  4641. assert(LoReg && "Register for low half is not defined!");
  4642. ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
  4643. NVT, InFlag);
  4644. InFlag = ResLo.getValue(2);
  4645. }
  4646. ReplaceUses(SDValue(Node, 0), ResLo);
  4647. LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
  4648. dbgs() << '\n');
  4649. }
  4650. // Copy the high half of the result, if it is needed.
  4651. if (!SDValue(Node, 1).use_empty()) {
  4652. if (!ResHi) {
  4653. assert(HiReg && "Register for high half is not defined!");
  4654. ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
  4655. NVT, InFlag);
  4656. InFlag = ResHi.getValue(2);
  4657. }
  4658. ReplaceUses(SDValue(Node, 1), ResHi);
  4659. LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
  4660. dbgs() << '\n');
  4661. }
  4662. CurDAG->RemoveDeadNode(Node);
  4663. return;
  4664. }
  4665. case ISD::SDIVREM:
  4666. case ISD::UDIVREM: {
  4667. SDValue N0 = Node->getOperand(0);
  4668. SDValue N1 = Node->getOperand(1);
  4669. unsigned ROpc, MOpc;
  4670. bool isSigned = Opcode == ISD::SDIVREM;
  4671. if (!isSigned) {
  4672. switch (NVT.SimpleTy) {
  4673. default: llvm_unreachable("Unsupported VT!");
  4674. case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break;
  4675. case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break;
  4676. case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break;
  4677. case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break;
  4678. }
  4679. } else {
  4680. switch (NVT.SimpleTy) {
  4681. default: llvm_unreachable("Unsupported VT!");
  4682. case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
  4683. case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
  4684. case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
  4685. case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
  4686. }
  4687. }
  4688. unsigned LoReg, HiReg, ClrReg;
  4689. unsigned SExtOpcode;
  4690. switch (NVT.SimpleTy) {
  4691. default: llvm_unreachable("Unsupported VT!");
  4692. case MVT::i8:
  4693. LoReg = X86::AL; ClrReg = HiReg = X86::AH;
  4694. SExtOpcode = 0; // Not used.
  4695. break;
  4696. case MVT::i16:
  4697. LoReg = X86::AX; HiReg = X86::DX;
  4698. ClrReg = X86::DX;
  4699. SExtOpcode = X86::CWD;
  4700. break;
  4701. case MVT::i32:
  4702. LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
  4703. SExtOpcode = X86::CDQ;
  4704. break;
  4705. case MVT::i64:
  4706. LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
  4707. SExtOpcode = X86::CQO;
  4708. break;
  4709. }
  4710. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  4711. bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
  4712. bool signBitIsZero = CurDAG->SignBitIsZero(N0);
  4713. SDValue InFlag;
  4714. if (NVT == MVT::i8) {
  4715. // Special case for div8, just use a move with zero extension to AX to
  4716. // clear the upper 8 bits (AH).
  4717. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain;
  4718. MachineSDNode *Move;
  4719. if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
  4720. SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
  4721. unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8
  4722. : X86::MOVZX16rm8;
  4723. Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops);
  4724. Chain = SDValue(Move, 1);
  4725. ReplaceUses(N0.getValue(1), Chain);
  4726. // Record the mem-refs
  4727. CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()});
  4728. } else {
  4729. unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8
  4730. : X86::MOVZX16rr8;
  4731. Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0);
  4732. Chain = CurDAG->getEntryNode();
  4733. }
  4734. Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
  4735. SDValue());
  4736. InFlag = Chain.getValue(1);
  4737. } else {
  4738. InFlag =
  4739. CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
  4740. LoReg, N0, SDValue()).getValue(1);
  4741. if (isSigned && !signBitIsZero) {
  4742. // Sign extend the low part into the high part.
  4743. InFlag =
  4744. SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
  4745. } else {
  4746. // Zero out the high part, effectively zero extending the input.
  4747. SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
  4748. SDValue ClrNode =
  4749. SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
  4750. switch (NVT.SimpleTy) {
  4751. case MVT::i16:
  4752. ClrNode =
  4753. SDValue(CurDAG->getMachineNode(
  4754. TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
  4755. CurDAG->getTargetConstant(X86::sub_16bit, dl,
  4756. MVT::i32)),
  4757. 0);
  4758. break;
  4759. case MVT::i32:
  4760. break;
  4761. case MVT::i64:
  4762. ClrNode =
  4763. SDValue(CurDAG->getMachineNode(
  4764. TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
  4765. CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
  4766. CurDAG->getTargetConstant(X86::sub_32bit, dl,
  4767. MVT::i32)),
  4768. 0);
  4769. break;
  4770. default:
  4771. llvm_unreachable("Unexpected division source");
  4772. }
  4773. InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
  4774. ClrNode, InFlag).getValue(1);
  4775. }
  4776. }
  4777. if (foldedLoad) {
  4778. SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
  4779. InFlag };
  4780. MachineSDNode *CNode =
  4781. CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
  4782. InFlag = SDValue(CNode, 1);
  4783. // Update the chain.
  4784. ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
  4785. // Record the mem-refs
  4786. CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
  4787. } else {
  4788. InFlag =
  4789. SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0);
  4790. }
  4791. // Prevent use of AH in a REX instruction by explicitly copying it to
  4792. // an ABCD_L register.
  4793. //
  4794. // The current assumption of the register allocator is that isel
  4795. // won't generate explicit references to the GR8_ABCD_H registers. If
  4796. // the allocator and/or the backend get enhanced to be more robust in
  4797. // that regard, this can be, and should be, removed.
  4798. if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
  4799. SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
  4800. unsigned AHExtOpcode =
  4801. isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
  4802. SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
  4803. MVT::Glue, AHCopy, InFlag);
  4804. SDValue Result(RNode, 0);
  4805. InFlag = SDValue(RNode, 1);
  4806. Result =
  4807. CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
  4808. ReplaceUses(SDValue(Node, 1), Result);
  4809. LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
  4810. dbgs() << '\n');
  4811. }
  4812. // Copy the division (low) result, if it is needed.
  4813. if (!SDValue(Node, 0).use_empty()) {
  4814. SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
  4815. LoReg, NVT, InFlag);
  4816. InFlag = Result.getValue(2);
  4817. ReplaceUses(SDValue(Node, 0), Result);
  4818. LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
  4819. dbgs() << '\n');
  4820. }
  4821. // Copy the remainder (high) result, if it is needed.
  4822. if (!SDValue(Node, 1).use_empty()) {
  4823. SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
  4824. HiReg, NVT, InFlag);
  4825. InFlag = Result.getValue(2);
  4826. ReplaceUses(SDValue(Node, 1), Result);
  4827. LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
  4828. dbgs() << '\n');
  4829. }
  4830. CurDAG->RemoveDeadNode(Node);
  4831. return;
  4832. }
  4833. case X86ISD::FCMP:
  4834. case X86ISD::STRICT_FCMP:
  4835. case X86ISD::STRICT_FCMPS: {
  4836. bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP ||
  4837. Node->getOpcode() == X86ISD::STRICT_FCMPS;
  4838. SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0);
  4839. SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1);
  4840. // Save the original VT of the compare.
  4841. MVT CmpVT = N0.getSimpleValueType();
  4842. // Floating point needs special handling if we don't have FCOMI.
  4843. if (Subtarget->hasCMov())
  4844. break;
  4845. bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS;
  4846. unsigned Opc;
  4847. switch (CmpVT.SimpleTy) {
  4848. default: llvm_unreachable("Unexpected type!");
  4849. case MVT::f32:
  4850. Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32;
  4851. break;
  4852. case MVT::f64:
  4853. Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64;
  4854. break;
  4855. case MVT::f80:
  4856. Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80;
  4857. break;
  4858. }
  4859. SDValue Chain =
  4860. IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode();
  4861. SDValue Glue;
  4862. if (IsStrictCmp) {
  4863. SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
  4864. Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0);
  4865. Glue = Chain.getValue(1);
  4866. } else {
  4867. Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0);
  4868. }
  4869. // Move FPSW to AX.
  4870. SDValue FNSTSW =
  4871. SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0);
  4872. // Extract upper 8-bits of AX.
  4873. SDValue Extract =
  4874. CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW);
  4875. // Move AH into flags.
  4876. // Some 64-bit targets lack SAHF support, but they do support FCOMI.
  4877. assert(Subtarget->hasLAHFSAHF() &&
  4878. "Target doesn't support SAHF or FCOMI?");
  4879. SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue());
  4880. Chain = AH;
  4881. SDValue SAHF = SDValue(
  4882. CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0);
  4883. if (IsStrictCmp)
  4884. ReplaceUses(SDValue(Node, 1), Chain);
  4885. ReplaceUses(SDValue(Node, 0), SAHF);
  4886. CurDAG->RemoveDeadNode(Node);
  4887. return;
  4888. }
  4889. case X86ISD::CMP: {
  4890. SDValue N0 = Node->getOperand(0);
  4891. SDValue N1 = Node->getOperand(1);
  4892. // Optimizations for TEST compares.
  4893. if (!isNullConstant(N1))
  4894. break;
  4895. // Save the original VT of the compare.
  4896. MVT CmpVT = N0.getSimpleValueType();
  4897. // If we are comparing (and (shr X, C, Mask) with 0, emit a BEXTR followed
  4898. // by a test instruction. The test should be removed later by
  4899. // analyzeCompare if we are using only the zero flag.
  4900. // TODO: Should we check the users and use the BEXTR flags directly?
  4901. if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
  4902. if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) {
  4903. unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr
  4904. : X86::TEST32rr;
  4905. SDValue BEXTR = SDValue(NewNode, 0);
  4906. NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR);
  4907. ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
  4908. CurDAG->RemoveDeadNode(Node);
  4909. return;
  4910. }
  4911. }
  4912. // We can peek through truncates, but we need to be careful below.
  4913. if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
  4914. N0 = N0.getOperand(0);
  4915. // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
  4916. // use a smaller encoding.
  4917. // Look past the truncate if CMP is the only use of it.
  4918. if (N0.getOpcode() == ISD::AND &&
  4919. N0.getNode()->hasOneUse() &&
  4920. N0.getValueType() != MVT::i8) {
  4921. ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
  4922. if (!C) break;
  4923. uint64_t Mask = C->getZExtValue();
  4924. // We may have looked through a truncate so mask off any bits that
  4925. // shouldn't be part of the compare.
  4926. Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits());
  4927. // Check if we can replace AND+IMM64 with a shift. This is possible for
  4928. // masks/ like 0xFF000000 or 0x00FFFFFF and if we care only about the zero
  4929. // flag.
  4930. if (CmpVT == MVT::i64 && !isInt<32>(Mask) &&
  4931. onlyUsesZeroFlag(SDValue(Node, 0))) {
  4932. if (isMask_64(~Mask)) {
  4933. unsigned TrailingZeros = countTrailingZeros(Mask);
  4934. SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64);
  4935. SDValue Shift =
  4936. SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32,
  4937. N0.getOperand(0), Imm), 0);
  4938. MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
  4939. MVT::i32, Shift, Shift);
  4940. ReplaceNode(Node, Test);
  4941. return;
  4942. }
  4943. if (isMask_64(Mask)) {
  4944. unsigned LeadingZeros = countLeadingZeros(Mask);
  4945. SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64);
  4946. SDValue Shift =
  4947. SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32,
  4948. N0.getOperand(0), Imm), 0);
  4949. MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl,
  4950. MVT::i32, Shift, Shift);
  4951. ReplaceNode(Node, Test);
  4952. return;
  4953. }
  4954. }
  4955. MVT VT;
  4956. int SubRegOp;
  4957. unsigned ROpc, MOpc;
  4958. // For each of these checks we need to be careful if the sign flag is
  4959. // being used. It is only safe to use the sign flag in two conditions,
  4960. // either the sign bit in the shrunken mask is zero or the final test
  4961. // size is equal to the original compare size.
  4962. if (isUInt<8>(Mask) &&
  4963. (!(Mask & 0x80) || CmpVT == MVT::i8 ||
  4964. hasNoSignFlagUses(SDValue(Node, 0)))) {
  4965. // For example, convert "testl %eax, $8" to "testb %al, $8"
  4966. VT = MVT::i8;
  4967. SubRegOp = X86::sub_8bit;
  4968. ROpc = X86::TEST8ri;
  4969. MOpc = X86::TEST8mi;
  4970. } else if (OptForMinSize && isUInt<16>(Mask) &&
  4971. (!(Mask & 0x8000) || CmpVT == MVT::i16 ||
  4972. hasNoSignFlagUses(SDValue(Node, 0)))) {
  4973. // For example, "testl %eax, $32776" to "testw %ax, $32776".
  4974. // NOTE: We only want to form TESTW instructions if optimizing for
  4975. // min size. Otherwise we only save one byte and possibly get a length
  4976. // changing prefix penalty in the decoders.
  4977. VT = MVT::i16;
  4978. SubRegOp = X86::sub_16bit;
  4979. ROpc = X86::TEST16ri;
  4980. MOpc = X86::TEST16mi;
  4981. } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 &&
  4982. ((!(Mask & 0x80000000) &&
  4983. // Without minsize 16-bit Cmps can get here so we need to
  4984. // be sure we calculate the correct sign flag if needed.
  4985. (CmpVT != MVT::i16 || !(Mask & 0x8000))) ||
  4986. CmpVT == MVT::i32 ||
  4987. hasNoSignFlagUses(SDValue(Node, 0)))) {
  4988. // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
  4989. // NOTE: We only want to run that transform if N0 is 32 or 64 bits.
  4990. // Otherwize, we find ourselves in a position where we have to do
  4991. // promotion. If previous passes did not promote the and, we assume
  4992. // they had a good reason not to and do not promote here.
  4993. VT = MVT::i32;
  4994. SubRegOp = X86::sub_32bit;
  4995. ROpc = X86::TEST32ri;
  4996. MOpc = X86::TEST32mi;
  4997. } else {
  4998. // No eligible transformation was found.
  4999. break;
  5000. }
  5001. SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT);
  5002. SDValue Reg = N0.getOperand(0);
  5003. // Emit a testl or testw.
  5004. MachineSDNode *NewNode;
  5005. SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
  5006. if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
  5007. if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) {
  5008. if (!LoadN->isSimple()) {
  5009. unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits();
  5010. if ((MOpc == X86::TEST8mi && NumVolBits != 8) ||
  5011. (MOpc == X86::TEST16mi && NumVolBits != 16) ||
  5012. (MOpc == X86::TEST32mi && NumVolBits != 32))
  5013. break;
  5014. }
  5015. }
  5016. SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
  5017. Reg.getOperand(0) };
  5018. NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops);
  5019. // Update the chain.
  5020. ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1));
  5021. // Record the mem-refs
  5022. CurDAG->setNodeMemRefs(NewNode,
  5023. {cast<LoadSDNode>(Reg)->getMemOperand()});
  5024. } else {
  5025. // Extract the subregister if necessary.
  5026. if (N0.getValueType() != VT)
  5027. Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg);
  5028. NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm);
  5029. }
  5030. // Replace CMP with TEST.
  5031. ReplaceNode(Node, NewNode);
  5032. return;
  5033. }
  5034. break;
  5035. }
  5036. case X86ISD::PCMPISTR: {
  5037. if (!Subtarget->hasSSE42())
  5038. break;
  5039. bool NeedIndex = !SDValue(Node, 0).use_empty();
  5040. bool NeedMask = !SDValue(Node, 1).use_empty();
  5041. // We can't fold a load if we are going to make two instructions.
  5042. bool MayFoldLoad = !NeedIndex || !NeedMask;
  5043. MachineSDNode *CNode;
  5044. if (NeedMask) {
  5045. unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr;
  5046. unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm;
  5047. CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node);
  5048. ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
  5049. }
  5050. if (NeedIndex || !NeedMask) {
  5051. unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr;
  5052. unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm;
  5053. CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node);
  5054. ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
  5055. }
  5056. // Connect the flag usage to the last instruction created.
  5057. ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
  5058. CurDAG->RemoveDeadNode(Node);
  5059. return;
  5060. }
  5061. case X86ISD::PCMPESTR: {
  5062. if (!Subtarget->hasSSE42())
  5063. break;
  5064. // Copy the two implicit register inputs.
  5065. SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
  5066. Node->getOperand(1),
  5067. SDValue()).getValue(1);
  5068. InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
  5069. Node->getOperand(3), InFlag).getValue(1);
  5070. bool NeedIndex = !SDValue(Node, 0).use_empty();
  5071. bool NeedMask = !SDValue(Node, 1).use_empty();
  5072. // We can't fold a load if we are going to make two instructions.
  5073. bool MayFoldLoad = !NeedIndex || !NeedMask;
  5074. MachineSDNode *CNode;
  5075. if (NeedMask) {
  5076. unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
  5077. unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
  5078. CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
  5079. InFlag);
  5080. ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
  5081. }
  5082. if (NeedIndex || !NeedMask) {
  5083. unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
  5084. unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
  5085. CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
  5086. ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
  5087. }
  5088. // Connect the flag usage to the last instruction created.
  5089. ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1));
  5090. CurDAG->RemoveDeadNode(Node);
  5091. return;
  5092. }
  5093. case ISD::SETCC: {
  5094. if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
  5095. return;
  5096. break;
  5097. }
  5098. case ISD::STORE:
  5099. if (foldLoadStoreIntoMemOperand(Node))
  5100. return;
  5101. break;
  5102. case X86ISD::SETCC_CARRY: {
  5103. // We have to do this manually because tblgen will put the eflags copy in
  5104. // the wrong place if we use an extract_subreg in the pattern.
  5105. MVT VT = Node->getSimpleValueType(0);
  5106. // Copy flags to the EFLAGS register and glue it to next node.
  5107. SDValue EFLAGS =
  5108. CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
  5109. Node->getOperand(1), SDValue());
  5110. // Create a 64-bit instruction if the result is 64-bits otherwise use the
  5111. // 32-bit version.
  5112. unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r;
  5113. MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
  5114. SDValue Result = SDValue(
  5115. CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0);
  5116. // For less than 32-bits we need to extract from the 32-bit node.
  5117. if (VT == MVT::i8 || VT == MVT::i16) {
  5118. int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
  5119. Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
  5120. }
  5121. ReplaceUses(SDValue(Node, 0), Result);
  5122. CurDAG->RemoveDeadNode(Node);
  5123. return;
  5124. }
  5125. case X86ISD::SBB: {
  5126. if (isNullConstant(Node->getOperand(0)) &&
  5127. isNullConstant(Node->getOperand(1))) {
  5128. MVT VT = Node->getSimpleValueType(0);
  5129. // Create zero.
  5130. SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
  5131. SDValue Zero =
  5132. SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0);
  5133. if (VT == MVT::i64) {
  5134. Zero = SDValue(
  5135. CurDAG->getMachineNode(
  5136. TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
  5137. CurDAG->getTargetConstant(0, dl, MVT::i64), Zero,
  5138. CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)),
  5139. 0);
  5140. }
  5141. // Copy flags to the EFLAGS register and glue it to next node.
  5142. SDValue EFLAGS =
  5143. CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS,
  5144. Node->getOperand(2), SDValue());
  5145. // Create a 64-bit instruction if the result is 64-bits otherwise use the
  5146. // 32-bit version.
  5147. unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr;
  5148. MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32;
  5149. VTs = CurDAG->getVTList(SBBVT, MVT::i32);
  5150. SDValue Result =
  5151. SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS,
  5152. EFLAGS.getValue(1)}),
  5153. 0);
  5154. // Replace the flag use.
  5155. ReplaceUses(SDValue(Node, 1), Result.getValue(1));
  5156. // Replace the result use.
  5157. if (!SDValue(Node, 0).use_empty()) {
  5158. // For less than 32-bits we need to extract from the 32-bit node.
  5159. if (VT == MVT::i8 || VT == MVT::i16) {
  5160. int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit;
  5161. Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result);
  5162. }
  5163. ReplaceUses(SDValue(Node, 0), Result);
  5164. }
  5165. CurDAG->RemoveDeadNode(Node);
  5166. return;
  5167. }
  5168. break;
  5169. }
  5170. case X86ISD::MGATHER: {
  5171. auto *Mgt = cast<X86MaskedGatherSDNode>(Node);
  5172. SDValue IndexOp = Mgt->getIndex();
  5173. SDValue Mask = Mgt->getMask();
  5174. MVT IndexVT = IndexOp.getSimpleValueType();
  5175. MVT ValueVT = Node->getSimpleValueType(0);
  5176. MVT MaskVT = Mask.getSimpleValueType();
  5177. // This is just to prevent crashes if the nodes are malformed somehow. We're
  5178. // otherwise only doing loose type checking in here based on type what
  5179. // a type constraint would say just like table based isel.
  5180. if (!ValueVT.isVector() || !MaskVT.isVector())
  5181. break;
  5182. unsigned NumElts = ValueVT.getVectorNumElements();
  5183. MVT ValueSVT = ValueVT.getVectorElementType();
  5184. bool IsFP = ValueSVT.isFloatingPoint();
  5185. unsigned EltSize = ValueSVT.getSizeInBits();
  5186. unsigned Opc = 0;
  5187. bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1;
  5188. if (AVX512Gather) {
  5189. if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
  5190. Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm;
  5191. else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
  5192. Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm;
  5193. else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
  5194. Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm;
  5195. else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
  5196. Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm;
  5197. else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
  5198. Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm;
  5199. else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
  5200. Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm;
  5201. else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
  5202. Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm;
  5203. else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
  5204. Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm;
  5205. else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
  5206. Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm;
  5207. else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
  5208. Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm;
  5209. else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
  5210. Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm;
  5211. else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
  5212. Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm;
  5213. } else {
  5214. assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() &&
  5215. "Unexpected mask VT!");
  5216. if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
  5217. Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm;
  5218. else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
  5219. Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm;
  5220. else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
  5221. Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm;
  5222. else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
  5223. Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm;
  5224. else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
  5225. Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm;
  5226. else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
  5227. Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm;
  5228. else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
  5229. Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm;
  5230. else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
  5231. Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm;
  5232. }
  5233. if (!Opc)
  5234. break;
  5235. SDValue Base, Scale, Index, Disp, Segment;
  5236. if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(),
  5237. Base, Scale, Index, Disp, Segment))
  5238. break;
  5239. SDValue PassThru = Mgt->getPassThru();
  5240. SDValue Chain = Mgt->getChain();
  5241. // Gather instructions have a mask output not in the ISD node.
  5242. SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other);
  5243. MachineSDNode *NewNode;
  5244. if (AVX512Gather) {
  5245. SDValue Ops[] = {PassThru, Mask, Base, Scale,
  5246. Index, Disp, Segment, Chain};
  5247. NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
  5248. } else {
  5249. SDValue Ops[] = {PassThru, Base, Scale, Index,
  5250. Disp, Segment, Mask, Chain};
  5251. NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
  5252. }
  5253. CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()});
  5254. ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
  5255. ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2));
  5256. CurDAG->RemoveDeadNode(Node);
  5257. return;
  5258. }
  5259. case X86ISD::MSCATTER: {
  5260. auto *Sc = cast<X86MaskedScatterSDNode>(Node);
  5261. SDValue Value = Sc->getValue();
  5262. SDValue IndexOp = Sc->getIndex();
  5263. MVT IndexVT = IndexOp.getSimpleValueType();
  5264. MVT ValueVT = Value.getSimpleValueType();
  5265. // This is just to prevent crashes if the nodes are malformed somehow. We're
  5266. // otherwise only doing loose type checking in here based on type what
  5267. // a type constraint would say just like table based isel.
  5268. if (!ValueVT.isVector())
  5269. break;
  5270. unsigned NumElts = ValueVT.getVectorNumElements();
  5271. MVT ValueSVT = ValueVT.getVectorElementType();
  5272. bool IsFP = ValueSVT.isFloatingPoint();
  5273. unsigned EltSize = ValueSVT.getSizeInBits();
  5274. unsigned Opc;
  5275. if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32)
  5276. Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr;
  5277. else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32)
  5278. Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr;
  5279. else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32)
  5280. Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr;
  5281. else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64)
  5282. Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr;
  5283. else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64)
  5284. Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr;
  5285. else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64)
  5286. Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr;
  5287. else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32)
  5288. Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr;
  5289. else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32)
  5290. Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr;
  5291. else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32)
  5292. Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr;
  5293. else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64)
  5294. Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr;
  5295. else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64)
  5296. Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr;
  5297. else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64)
  5298. Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr;
  5299. else
  5300. break;
  5301. SDValue Base, Scale, Index, Disp, Segment;
  5302. if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(),
  5303. Base, Scale, Index, Disp, Segment))
  5304. break;
  5305. SDValue Mask = Sc->getMask();
  5306. SDValue Chain = Sc->getChain();
  5307. // Scatter instructions have a mask output not in the ISD node.
  5308. SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other);
  5309. SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain};
  5310. MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops);
  5311. CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()});
  5312. ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1));
  5313. CurDAG->RemoveDeadNode(Node);
  5314. return;
  5315. }
  5316. case ISD::PREALLOCATED_SETUP: {
  5317. auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
  5318. auto CallId = MFI->getPreallocatedIdForCallSite(
  5319. cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
  5320. SDValue Chain = Node->getOperand(0);
  5321. SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
  5322. MachineSDNode *New = CurDAG->getMachineNode(
  5323. TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
  5324. ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
  5325. CurDAG->RemoveDeadNode(Node);
  5326. return;
  5327. }
  5328. case ISD::PREALLOCATED_ARG: {
  5329. auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
  5330. auto CallId = MFI->getPreallocatedIdForCallSite(
  5331. cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
  5332. SDValue Chain = Node->getOperand(0);
  5333. SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
  5334. SDValue ArgIndex = Node->getOperand(2);
  5335. SDValue Ops[3];
  5336. Ops[0] = CallIdValue;
  5337. Ops[1] = ArgIndex;
  5338. Ops[2] = Chain;
  5339. MachineSDNode *New = CurDAG->getMachineNode(
  5340. TargetOpcode::PREALLOCATED_ARG, dl,
  5341. CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
  5342. MVT::Other),
  5343. Ops);
  5344. ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
  5345. ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
  5346. CurDAG->RemoveDeadNode(Node);
  5347. return;
  5348. }
  5349. case X86ISD::AESENCWIDE128KL:
  5350. case X86ISD::AESDECWIDE128KL:
  5351. case X86ISD::AESENCWIDE256KL:
  5352. case X86ISD::AESDECWIDE256KL: {
  5353. if (!Subtarget->hasWIDEKL())
  5354. break;
  5355. unsigned Opcode;
  5356. switch (Node->getOpcode()) {
  5357. default:
  5358. llvm_unreachable("Unexpected opcode!");
  5359. case X86ISD::AESENCWIDE128KL:
  5360. Opcode = X86::AESENCWIDE128KL;
  5361. break;
  5362. case X86ISD::AESDECWIDE128KL:
  5363. Opcode = X86::AESDECWIDE128KL;
  5364. break;
  5365. case X86ISD::AESENCWIDE256KL:
  5366. Opcode = X86::AESENCWIDE256KL;
  5367. break;
  5368. case X86ISD::AESDECWIDE256KL:
  5369. Opcode = X86::AESDECWIDE256KL;
  5370. break;
  5371. }
  5372. SDValue Chain = Node->getOperand(0);
  5373. SDValue Addr = Node->getOperand(1);
  5374. SDValue Base, Scale, Index, Disp, Segment;
  5375. if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment))
  5376. break;
  5377. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2),
  5378. SDValue());
  5379. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3),
  5380. Chain.getValue(1));
  5381. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4),
  5382. Chain.getValue(1));
  5383. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5),
  5384. Chain.getValue(1));
  5385. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6),
  5386. Chain.getValue(1));
  5387. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7),
  5388. Chain.getValue(1));
  5389. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8),
  5390. Chain.getValue(1));
  5391. Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9),
  5392. Chain.getValue(1));
  5393. MachineSDNode *Res = CurDAG->getMachineNode(
  5394. Opcode, dl, Node->getVTList(),
  5395. {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)});
  5396. CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand());
  5397. ReplaceNode(Node, Res);
  5398. return;
  5399. }
  5400. }
  5401. SelectCode(Node);
  5402. }
  5403. bool X86DAGToDAGISel::
  5404. SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
  5405. std::vector<SDValue> &OutOps) {
  5406. SDValue Op0, Op1, Op2, Op3, Op4;
  5407. switch (ConstraintID) {
  5408. default:
  5409. llvm_unreachable("Unexpected asm memory constraint");
  5410. case InlineAsm::Constraint_o: // offsetable ??
  5411. case InlineAsm::Constraint_v: // not offsetable ??
  5412. case InlineAsm::Constraint_m: // memory
  5413. case InlineAsm::Constraint_X:
  5414. if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
  5415. return true;
  5416. break;
  5417. }
  5418. OutOps.push_back(Op0);
  5419. OutOps.push_back(Op1);
  5420. OutOps.push_back(Op2);
  5421. OutOps.push_back(Op3);
  5422. OutOps.push_back(Op4);
  5423. return false;
  5424. }
  5425. /// This pass converts a legalized DAG into a X86-specific DAG,
  5426. /// ready for instruction scheduling.
  5427. FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
  5428. CodeGenOpt::Level OptLevel) {
  5429. return new X86DAGToDAGISel(TM, OptLevel);
  5430. }