AutoUpgrade.cpp 196 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477
  1. //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the auto-upgrade helper functions.
  10. // This is where deprecated IR intrinsics and other IR features are updated to
  11. // current specifications.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/IR/AutoUpgrade.h"
  15. #include "llvm/ADT/StringSwitch.h"
  16. #include "llvm/IR/Constants.h"
  17. #include "llvm/IR/DIBuilder.h"
  18. #include "llvm/IR/DebugInfo.h"
  19. #include "llvm/IR/DiagnosticInfo.h"
  20. #include "llvm/IR/Function.h"
  21. #include "llvm/IR/IRBuilder.h"
  22. #include "llvm/IR/Instruction.h"
  23. #include "llvm/IR/InstVisitor.h"
  24. #include "llvm/IR/IntrinsicInst.h"
  25. #include "llvm/IR/Intrinsics.h"
  26. #include "llvm/IR/IntrinsicsAArch64.h"
  27. #include "llvm/IR/IntrinsicsARM.h"
  28. #include "llvm/IR/IntrinsicsX86.h"
  29. #include "llvm/IR/LLVMContext.h"
  30. #include "llvm/IR/Module.h"
  31. #include "llvm/IR/Verifier.h"
  32. #include "llvm/Support/ErrorHandling.h"
  33. #include "llvm/Support/Regex.h"
  34. #include <cstring>
  35. using namespace llvm;
  36. static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
  37. // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
  38. // changed their type from v4f32 to v2i64.
  39. static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
  40. Function *&NewFn) {
  41. // Check whether this is an old version of the function, which received
  42. // v4f32 arguments.
  43. Type *Arg0Type = F->getFunctionType()->getParamType(0);
  44. if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
  45. return false;
  46. // Yes, it's old, replace it with new version.
  47. rename(F);
  48. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  49. return true;
  50. }
  51. // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
  52. // arguments have changed their type from i32 to i8.
  53. static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
  54. Function *&NewFn) {
  55. // Check that the last argument is an i32.
  56. Type *LastArgType = F->getFunctionType()->getParamType(
  57. F->getFunctionType()->getNumParams() - 1);
  58. if (!LastArgType->isIntegerTy(32))
  59. return false;
  60. // Move this function aside and map down.
  61. rename(F);
  62. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  63. return true;
  64. }
  65. // Upgrade the declaration of fp compare intrinsics that change return type
  66. // from scalar to vXi1 mask.
  67. static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
  68. Function *&NewFn) {
  69. // Check if the return type is a vector.
  70. if (F->getReturnType()->isVectorTy())
  71. return false;
  72. rename(F);
  73. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  74. return true;
  75. }
  76. static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
  77. // All of the intrinsics matches below should be marked with which llvm
  78. // version started autoupgrading them. At some point in the future we would
  79. // like to use this information to remove upgrade code for some older
  80. // intrinsics. It is currently undecided how we will determine that future
  81. // point.
  82. if (Name == "addcarryx.u32" || // Added in 8.0
  83. Name == "addcarryx.u64" || // Added in 8.0
  84. Name == "addcarry.u32" || // Added in 8.0
  85. Name == "addcarry.u64" || // Added in 8.0
  86. Name == "subborrow.u32" || // Added in 8.0
  87. Name == "subborrow.u64" || // Added in 8.0
  88. Name.startswith("sse2.padds.") || // Added in 8.0
  89. Name.startswith("sse2.psubs.") || // Added in 8.0
  90. Name.startswith("sse2.paddus.") || // Added in 8.0
  91. Name.startswith("sse2.psubus.") || // Added in 8.0
  92. Name.startswith("avx2.padds.") || // Added in 8.0
  93. Name.startswith("avx2.psubs.") || // Added in 8.0
  94. Name.startswith("avx2.paddus.") || // Added in 8.0
  95. Name.startswith("avx2.psubus.") || // Added in 8.0
  96. Name.startswith("avx512.padds.") || // Added in 8.0
  97. Name.startswith("avx512.psubs.") || // Added in 8.0
  98. Name.startswith("avx512.mask.padds.") || // Added in 8.0
  99. Name.startswith("avx512.mask.psubs.") || // Added in 8.0
  100. Name.startswith("avx512.mask.paddus.") || // Added in 8.0
  101. Name.startswith("avx512.mask.psubus.") || // Added in 8.0
  102. Name=="ssse3.pabs.b.128" || // Added in 6.0
  103. Name=="ssse3.pabs.w.128" || // Added in 6.0
  104. Name=="ssse3.pabs.d.128" || // Added in 6.0
  105. Name.startswith("fma4.vfmadd.s") || // Added in 7.0
  106. Name.startswith("fma.vfmadd.") || // Added in 7.0
  107. Name.startswith("fma.vfmsub.") || // Added in 7.0
  108. Name.startswith("fma.vfmsubadd.") || // Added in 7.0
  109. Name.startswith("fma.vfnmadd.") || // Added in 7.0
  110. Name.startswith("fma.vfnmsub.") || // Added in 7.0
  111. Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
  112. Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
  113. Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
  114. Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
  115. Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
  116. Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
  117. Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
  118. Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
  119. Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
  120. Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
  121. Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
  122. Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
  123. Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
  124. Name.startswith("avx512.kunpck") || //added in 6.0
  125. Name.startswith("avx2.pabs.") || // Added in 6.0
  126. Name.startswith("avx512.mask.pabs.") || // Added in 6.0
  127. Name.startswith("avx512.broadcastm") || // Added in 6.0
  128. Name == "sse.sqrt.ss" || // Added in 7.0
  129. Name == "sse2.sqrt.sd" || // Added in 7.0
  130. Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
  131. Name.startswith("avx.sqrt.p") || // Added in 7.0
  132. Name.startswith("sse2.sqrt.p") || // Added in 7.0
  133. Name.startswith("sse.sqrt.p") || // Added in 7.0
  134. Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
  135. Name.startswith("sse2.pcmpeq.") || // Added in 3.1
  136. Name.startswith("sse2.pcmpgt.") || // Added in 3.1
  137. Name.startswith("avx2.pcmpeq.") || // Added in 3.1
  138. Name.startswith("avx2.pcmpgt.") || // Added in 3.1
  139. Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
  140. Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
  141. Name.startswith("avx.vperm2f128.") || // Added in 6.0
  142. Name == "avx2.vperm2i128" || // Added in 6.0
  143. Name == "sse.add.ss" || // Added in 4.0
  144. Name == "sse2.add.sd" || // Added in 4.0
  145. Name == "sse.sub.ss" || // Added in 4.0
  146. Name == "sse2.sub.sd" || // Added in 4.0
  147. Name == "sse.mul.ss" || // Added in 4.0
  148. Name == "sse2.mul.sd" || // Added in 4.0
  149. Name == "sse.div.ss" || // Added in 4.0
  150. Name == "sse2.div.sd" || // Added in 4.0
  151. Name == "sse41.pmaxsb" || // Added in 3.9
  152. Name == "sse2.pmaxs.w" || // Added in 3.9
  153. Name == "sse41.pmaxsd" || // Added in 3.9
  154. Name == "sse2.pmaxu.b" || // Added in 3.9
  155. Name == "sse41.pmaxuw" || // Added in 3.9
  156. Name == "sse41.pmaxud" || // Added in 3.9
  157. Name == "sse41.pminsb" || // Added in 3.9
  158. Name == "sse2.pmins.w" || // Added in 3.9
  159. Name == "sse41.pminsd" || // Added in 3.9
  160. Name == "sse2.pminu.b" || // Added in 3.9
  161. Name == "sse41.pminuw" || // Added in 3.9
  162. Name == "sse41.pminud" || // Added in 3.9
  163. Name == "avx512.kand.w" || // Added in 7.0
  164. Name == "avx512.kandn.w" || // Added in 7.0
  165. Name == "avx512.knot.w" || // Added in 7.0
  166. Name == "avx512.kor.w" || // Added in 7.0
  167. Name == "avx512.kxor.w" || // Added in 7.0
  168. Name == "avx512.kxnor.w" || // Added in 7.0
  169. Name == "avx512.kortestc.w" || // Added in 7.0
  170. Name == "avx512.kortestz.w" || // Added in 7.0
  171. Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
  172. Name.startswith("avx2.pmax") || // Added in 3.9
  173. Name.startswith("avx2.pmin") || // Added in 3.9
  174. Name.startswith("avx512.mask.pmax") || // Added in 4.0
  175. Name.startswith("avx512.mask.pmin") || // Added in 4.0
  176. Name.startswith("avx2.vbroadcast") || // Added in 3.8
  177. Name.startswith("avx2.pbroadcast") || // Added in 3.8
  178. Name.startswith("avx.vpermil.") || // Added in 3.1
  179. Name.startswith("sse2.pshuf") || // Added in 3.9
  180. Name.startswith("avx512.pbroadcast") || // Added in 3.9
  181. Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
  182. Name.startswith("avx512.mask.movddup") || // Added in 3.9
  183. Name.startswith("avx512.mask.movshdup") || // Added in 3.9
  184. Name.startswith("avx512.mask.movsldup") || // Added in 3.9
  185. Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
  186. Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
  187. Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
  188. Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
  189. Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
  190. Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
  191. Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
  192. Name.startswith("avx512.mask.punpckl") || // Added in 3.9
  193. Name.startswith("avx512.mask.punpckh") || // Added in 3.9
  194. Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
  195. Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
  196. Name.startswith("avx512.mask.pand.") || // Added in 3.9
  197. Name.startswith("avx512.mask.pandn.") || // Added in 3.9
  198. Name.startswith("avx512.mask.por.") || // Added in 3.9
  199. Name.startswith("avx512.mask.pxor.") || // Added in 3.9
  200. Name.startswith("avx512.mask.and.") || // Added in 3.9
  201. Name.startswith("avx512.mask.andn.") || // Added in 3.9
  202. Name.startswith("avx512.mask.or.") || // Added in 3.9
  203. Name.startswith("avx512.mask.xor.") || // Added in 3.9
  204. Name.startswith("avx512.mask.padd.") || // Added in 4.0
  205. Name.startswith("avx512.mask.psub.") || // Added in 4.0
  206. Name.startswith("avx512.mask.pmull.") || // Added in 4.0
  207. Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
  208. Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
  209. Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
  210. Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
  211. Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
  212. Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
  213. Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
  214. Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
  215. Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
  216. Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
  217. Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
  218. Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
  219. Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
  220. Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
  221. Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
  222. Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
  223. Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
  224. Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
  225. Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
  226. Name == "avx512.cvtusi2sd" || // Added in 7.0
  227. Name.startswith("avx512.mask.permvar.") || // Added in 7.0
  228. Name == "sse2.pmulu.dq" || // Added in 7.0
  229. Name == "sse41.pmuldq" || // Added in 7.0
  230. Name == "avx2.pmulu.dq" || // Added in 7.0
  231. Name == "avx2.pmul.dq" || // Added in 7.0
  232. Name == "avx512.pmulu.dq.512" || // Added in 7.0
  233. Name == "avx512.pmul.dq.512" || // Added in 7.0
  234. Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
  235. Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
  236. Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
  237. Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
  238. Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
  239. Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
  240. Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
  241. Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
  242. Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
  243. Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
  244. Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
  245. Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
  246. Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
  247. Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
  248. Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
  249. Name.startswith("avx512.cmp.p") || // Added in 12.0
  250. Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
  251. Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
  252. Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
  253. Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
  254. Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
  255. Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
  256. Name.startswith("avx512.mask.psll.d") || // Added in 4.0
  257. Name.startswith("avx512.mask.psll.q") || // Added in 4.0
  258. Name.startswith("avx512.mask.psll.w") || // Added in 4.0
  259. Name.startswith("avx512.mask.psra.d") || // Added in 4.0
  260. Name.startswith("avx512.mask.psra.q") || // Added in 4.0
  261. Name.startswith("avx512.mask.psra.w") || // Added in 4.0
  262. Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
  263. Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
  264. Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
  265. Name.startswith("avx512.mask.pslli") || // Added in 4.0
  266. Name.startswith("avx512.mask.psrai") || // Added in 4.0
  267. Name.startswith("avx512.mask.psrli") || // Added in 4.0
  268. Name.startswith("avx512.mask.psllv") || // Added in 4.0
  269. Name.startswith("avx512.mask.psrav") || // Added in 4.0
  270. Name.startswith("avx512.mask.psrlv") || // Added in 4.0
  271. Name.startswith("sse41.pmovsx") || // Added in 3.8
  272. Name.startswith("sse41.pmovzx") || // Added in 3.9
  273. Name.startswith("avx2.pmovsx") || // Added in 3.9
  274. Name.startswith("avx2.pmovzx") || // Added in 3.9
  275. Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
  276. Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
  277. Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
  278. Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
  279. Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
  280. Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
  281. Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
  282. Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
  283. Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
  284. Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
  285. Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
  286. Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
  287. Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
  288. Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
  289. Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
  290. Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
  291. Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
  292. Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
  293. Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
  294. Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
  295. Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
  296. Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
  297. Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
  298. Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
  299. Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
  300. Name.startswith("avx512.vpshld.") || // Added in 8.0
  301. Name.startswith("avx512.vpshrd.") || // Added in 8.0
  302. Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
  303. Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
  304. Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
  305. Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
  306. Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
  307. Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
  308. Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
  309. Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
  310. Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
  311. Name.startswith("avx512.mask.conflict.") || // Added in 9.0
  312. Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
  313. Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
  314. Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
  315. Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
  316. Name == "sse.cvtsi2ss" || // Added in 7.0
  317. Name == "sse.cvtsi642ss" || // Added in 7.0
  318. Name == "sse2.cvtsi2sd" || // Added in 7.0
  319. Name == "sse2.cvtsi642sd" || // Added in 7.0
  320. Name == "sse2.cvtss2sd" || // Added in 7.0
  321. Name == "sse2.cvtdq2pd" || // Added in 3.9
  322. Name == "sse2.cvtdq2ps" || // Added in 7.0
  323. Name == "sse2.cvtps2pd" || // Added in 3.9
  324. Name == "avx.cvtdq2.pd.256" || // Added in 3.9
  325. Name == "avx.cvtdq2.ps.256" || // Added in 7.0
  326. Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
  327. Name.startswith("vcvtph2ps.") || // Added in 11.0
  328. Name.startswith("avx.vinsertf128.") || // Added in 3.7
  329. Name == "avx2.vinserti128" || // Added in 3.7
  330. Name.startswith("avx512.mask.insert") || // Added in 4.0
  331. Name.startswith("avx.vextractf128.") || // Added in 3.7
  332. Name == "avx2.vextracti128" || // Added in 3.7
  333. Name.startswith("avx512.mask.vextract") || // Added in 4.0
  334. Name.startswith("sse4a.movnt.") || // Added in 3.9
  335. Name.startswith("avx.movnt.") || // Added in 3.2
  336. Name.startswith("avx512.storent.") || // Added in 3.9
  337. Name == "sse41.movntdqa" || // Added in 5.0
  338. Name == "avx2.movntdqa" || // Added in 5.0
  339. Name == "avx512.movntdqa" || // Added in 5.0
  340. Name == "sse2.storel.dq" || // Added in 3.9
  341. Name.startswith("sse.storeu.") || // Added in 3.9
  342. Name.startswith("sse2.storeu.") || // Added in 3.9
  343. Name.startswith("avx.storeu.") || // Added in 3.9
  344. Name.startswith("avx512.mask.storeu.") || // Added in 3.9
  345. Name.startswith("avx512.mask.store.p") || // Added in 3.9
  346. Name.startswith("avx512.mask.store.b.") || // Added in 3.9
  347. Name.startswith("avx512.mask.store.w.") || // Added in 3.9
  348. Name.startswith("avx512.mask.store.d.") || // Added in 3.9
  349. Name.startswith("avx512.mask.store.q.") || // Added in 3.9
  350. Name == "avx512.mask.store.ss" || // Added in 7.0
  351. Name.startswith("avx512.mask.loadu.") || // Added in 3.9
  352. Name.startswith("avx512.mask.load.") || // Added in 3.9
  353. Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
  354. Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
  355. Name.startswith("avx512.mask.expand.b") || // Added in 9.0
  356. Name.startswith("avx512.mask.expand.w") || // Added in 9.0
  357. Name.startswith("avx512.mask.expand.d") || // Added in 9.0
  358. Name.startswith("avx512.mask.expand.q") || // Added in 9.0
  359. Name.startswith("avx512.mask.expand.p") || // Added in 9.0
  360. Name.startswith("avx512.mask.compress.b") || // Added in 9.0
  361. Name.startswith("avx512.mask.compress.w") || // Added in 9.0
  362. Name.startswith("avx512.mask.compress.d") || // Added in 9.0
  363. Name.startswith("avx512.mask.compress.q") || // Added in 9.0
  364. Name.startswith("avx512.mask.compress.p") || // Added in 9.0
  365. Name == "sse42.crc32.64.8" || // Added in 3.4
  366. Name.startswith("avx.vbroadcast.s") || // Added in 3.5
  367. Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
  368. Name.startswith("avx512.mask.palignr.") || // Added in 3.9
  369. Name.startswith("avx512.mask.valign.") || // Added in 4.0
  370. Name.startswith("sse2.psll.dq") || // Added in 3.7
  371. Name.startswith("sse2.psrl.dq") || // Added in 3.7
  372. Name.startswith("avx2.psll.dq") || // Added in 3.7
  373. Name.startswith("avx2.psrl.dq") || // Added in 3.7
  374. Name.startswith("avx512.psll.dq") || // Added in 3.9
  375. Name.startswith("avx512.psrl.dq") || // Added in 3.9
  376. Name == "sse41.pblendw" || // Added in 3.7
  377. Name.startswith("sse41.blendp") || // Added in 3.7
  378. Name.startswith("avx.blend.p") || // Added in 3.7
  379. Name == "avx2.pblendw" || // Added in 3.7
  380. Name.startswith("avx2.pblendd.") || // Added in 3.7
  381. Name.startswith("avx.vbroadcastf128") || // Added in 4.0
  382. Name == "avx2.vbroadcasti128" || // Added in 3.7
  383. Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
  384. Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
  385. Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
  386. Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
  387. Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
  388. Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
  389. Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
  390. Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
  391. Name == "xop.vpcmov" || // Added in 3.8
  392. Name == "xop.vpcmov.256" || // Added in 5.0
  393. Name.startswith("avx512.mask.move.s") || // Added in 4.0
  394. Name.startswith("avx512.cvtmask2") || // Added in 5.0
  395. Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
  396. Name.startswith("xop.vprot") || // Added in 8.0
  397. Name.startswith("avx512.prol") || // Added in 8.0
  398. Name.startswith("avx512.pror") || // Added in 8.0
  399. Name.startswith("avx512.mask.prorv.") || // Added in 8.0
  400. Name.startswith("avx512.mask.pror.") || // Added in 8.0
  401. Name.startswith("avx512.mask.prolv.") || // Added in 8.0
  402. Name.startswith("avx512.mask.prol.") || // Added in 8.0
  403. Name.startswith("avx512.ptestm") || //Added in 6.0
  404. Name.startswith("avx512.ptestnm") || //Added in 6.0
  405. Name.startswith("avx512.mask.pavg")) // Added in 6.0
  406. return true;
  407. return false;
  408. }
  409. static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
  410. Function *&NewFn) {
  411. // Only handle intrinsics that start with "x86.".
  412. if (!Name.startswith("x86."))
  413. return false;
  414. // Remove "x86." prefix.
  415. Name = Name.substr(4);
  416. if (ShouldUpgradeX86Intrinsic(F, Name)) {
  417. NewFn = nullptr;
  418. return true;
  419. }
  420. if (Name == "rdtscp") { // Added in 8.0
  421. // If this intrinsic has 0 operands, it's the new version.
  422. if (F->getFunctionType()->getNumParams() == 0)
  423. return false;
  424. rename(F);
  425. NewFn = Intrinsic::getDeclaration(F->getParent(),
  426. Intrinsic::x86_rdtscp);
  427. return true;
  428. }
  429. // SSE4.1 ptest functions may have an old signature.
  430. if (Name.startswith("sse41.ptest")) { // Added in 3.2
  431. if (Name.substr(11) == "c")
  432. return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
  433. if (Name.substr(11) == "z")
  434. return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
  435. if (Name.substr(11) == "nzc")
  436. return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
  437. }
  438. // Several blend and other instructions with masks used the wrong number of
  439. // bits.
  440. if (Name == "sse41.insertps") // Added in 3.6
  441. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
  442. NewFn);
  443. if (Name == "sse41.dppd") // Added in 3.6
  444. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
  445. NewFn);
  446. if (Name == "sse41.dpps") // Added in 3.6
  447. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
  448. NewFn);
  449. if (Name == "sse41.mpsadbw") // Added in 3.6
  450. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
  451. NewFn);
  452. if (Name == "avx.dp.ps.256") // Added in 3.6
  453. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
  454. NewFn);
  455. if (Name == "avx2.mpsadbw") // Added in 3.6
  456. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
  457. NewFn);
  458. if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
  459. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
  460. NewFn);
  461. if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
  462. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
  463. NewFn);
  464. if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
  465. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
  466. NewFn);
  467. if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
  468. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
  469. NewFn);
  470. if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
  471. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
  472. NewFn);
  473. if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
  474. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
  475. NewFn);
  476. // frcz.ss/sd may need to have an argument dropped. Added in 3.2
  477. if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
  478. rename(F);
  479. NewFn = Intrinsic::getDeclaration(F->getParent(),
  480. Intrinsic::x86_xop_vfrcz_ss);
  481. return true;
  482. }
  483. if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
  484. rename(F);
  485. NewFn = Intrinsic::getDeclaration(F->getParent(),
  486. Intrinsic::x86_xop_vfrcz_sd);
  487. return true;
  488. }
  489. // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
  490. if (Name.startswith("xop.vpermil2")) { // Added in 3.9
  491. auto Idx = F->getFunctionType()->getParamType(2);
  492. if (Idx->isFPOrFPVectorTy()) {
  493. rename(F);
  494. unsigned IdxSize = Idx->getPrimitiveSizeInBits();
  495. unsigned EltSize = Idx->getScalarSizeInBits();
  496. Intrinsic::ID Permil2ID;
  497. if (EltSize == 64 && IdxSize == 128)
  498. Permil2ID = Intrinsic::x86_xop_vpermil2pd;
  499. else if (EltSize == 32 && IdxSize == 128)
  500. Permil2ID = Intrinsic::x86_xop_vpermil2ps;
  501. else if (EltSize == 64 && IdxSize == 256)
  502. Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
  503. else
  504. Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
  505. NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
  506. return true;
  507. }
  508. }
  509. if (Name == "seh.recoverfp") {
  510. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
  511. return true;
  512. }
  513. return false;
  514. }
  515. static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
  516. assert(F && "Illegal to upgrade a non-existent Function.");
  517. // Quickly eliminate it, if it's not a candidate.
  518. StringRef Name = F->getName();
  519. if (Name.size() <= 8 || !Name.startswith("llvm."))
  520. return false;
  521. Name = Name.substr(5); // Strip off "llvm."
  522. switch (Name[0]) {
  523. default: break;
  524. case 'a': {
  525. if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
  526. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
  527. F->arg_begin()->getType());
  528. return true;
  529. }
  530. if (Name.startswith("arm.neon.vclz")) {
  531. Type* args[2] = {
  532. F->arg_begin()->getType(),
  533. Type::getInt1Ty(F->getContext())
  534. };
  535. // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
  536. // the end of the name. Change name from llvm.arm.neon.vclz.* to
  537. // llvm.ctlz.*
  538. FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
  539. NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
  540. "llvm.ctlz." + Name.substr(14), F->getParent());
  541. return true;
  542. }
  543. if (Name.startswith("arm.neon.vcnt")) {
  544. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
  545. F->arg_begin()->getType());
  546. return true;
  547. }
  548. static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
  549. if (vldRegex.match(Name)) {
  550. auto fArgs = F->getFunctionType()->params();
  551. SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
  552. // Can't use Intrinsic::getDeclaration here as the return types might
  553. // then only be structurally equal.
  554. FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
  555. NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
  556. "llvm." + Name + ".p0i8", F->getParent());
  557. return true;
  558. }
  559. static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
  560. if (vstRegex.match(Name)) {
  561. static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
  562. Intrinsic::arm_neon_vst2,
  563. Intrinsic::arm_neon_vst3,
  564. Intrinsic::arm_neon_vst4};
  565. static const Intrinsic::ID StoreLaneInts[] = {
  566. Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
  567. Intrinsic::arm_neon_vst4lane
  568. };
  569. auto fArgs = F->getFunctionType()->params();
  570. Type *Tys[] = {fArgs[0], fArgs[1]};
  571. if (Name.find("lane") == StringRef::npos)
  572. NewFn = Intrinsic::getDeclaration(F->getParent(),
  573. StoreInts[fArgs.size() - 3], Tys);
  574. else
  575. NewFn = Intrinsic::getDeclaration(F->getParent(),
  576. StoreLaneInts[fArgs.size() - 5], Tys);
  577. return true;
  578. }
  579. if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
  580. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
  581. return true;
  582. }
  583. if (Name.startswith("arm.neon.vqadds.")) {
  584. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
  585. F->arg_begin()->getType());
  586. return true;
  587. }
  588. if (Name.startswith("arm.neon.vqaddu.")) {
  589. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
  590. F->arg_begin()->getType());
  591. return true;
  592. }
  593. if (Name.startswith("arm.neon.vqsubs.")) {
  594. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
  595. F->arg_begin()->getType());
  596. return true;
  597. }
  598. if (Name.startswith("arm.neon.vqsubu.")) {
  599. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
  600. F->arg_begin()->getType());
  601. return true;
  602. }
  603. if (Name.startswith("aarch64.neon.addp")) {
  604. if (F->arg_size() != 2)
  605. break; // Invalid IR.
  606. VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
  607. if (Ty && Ty->getElementType()->isFloatingPointTy()) {
  608. NewFn = Intrinsic::getDeclaration(F->getParent(),
  609. Intrinsic::aarch64_neon_faddp, Ty);
  610. return true;
  611. }
  612. }
  613. // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
  614. // respectively
  615. if ((Name.startswith("arm.neon.bfdot.") ||
  616. Name.startswith("aarch64.neon.bfdot.")) &&
  617. Name.endswith("i8")) {
  618. Intrinsic::ID IID =
  619. StringSwitch<Intrinsic::ID>(Name)
  620. .Cases("arm.neon.bfdot.v2f32.v8i8",
  621. "arm.neon.bfdot.v4f32.v16i8",
  622. Intrinsic::arm_neon_bfdot)
  623. .Cases("aarch64.neon.bfdot.v2f32.v8i8",
  624. "aarch64.neon.bfdot.v4f32.v16i8",
  625. Intrinsic::aarch64_neon_bfdot)
  626. .Default(Intrinsic::not_intrinsic);
  627. if (IID == Intrinsic::not_intrinsic)
  628. break;
  629. size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
  630. assert((OperandWidth == 64 || OperandWidth == 128) &&
  631. "Unexpected operand width");
  632. LLVMContext &Ctx = F->getParent()->getContext();
  633. std::array<Type *, 2> Tys {{
  634. F->getReturnType(),
  635. FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
  636. }};
  637. NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
  638. return true;
  639. }
  640. // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
  641. // and accept v8bf16 instead of v16i8
  642. if ((Name.startswith("arm.neon.bfm") ||
  643. Name.startswith("aarch64.neon.bfm")) &&
  644. Name.endswith(".v4f32.v16i8")) {
  645. Intrinsic::ID IID =
  646. StringSwitch<Intrinsic::ID>(Name)
  647. .Case("arm.neon.bfmmla.v4f32.v16i8",
  648. Intrinsic::arm_neon_bfmmla)
  649. .Case("arm.neon.bfmlalb.v4f32.v16i8",
  650. Intrinsic::arm_neon_bfmlalb)
  651. .Case("arm.neon.bfmlalt.v4f32.v16i8",
  652. Intrinsic::arm_neon_bfmlalt)
  653. .Case("aarch64.neon.bfmmla.v4f32.v16i8",
  654. Intrinsic::aarch64_neon_bfmmla)
  655. .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
  656. Intrinsic::aarch64_neon_bfmlalb)
  657. .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
  658. Intrinsic::aarch64_neon_bfmlalt)
  659. .Default(Intrinsic::not_intrinsic);
  660. if (IID == Intrinsic::not_intrinsic)
  661. break;
  662. std::array<Type *, 0> Tys;
  663. NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
  664. return true;
  665. }
  666. break;
  667. }
  668. case 'c': {
  669. if (Name.startswith("ctlz.") && F->arg_size() == 1) {
  670. rename(F);
  671. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
  672. F->arg_begin()->getType());
  673. return true;
  674. }
  675. if (Name.startswith("cttz.") && F->arg_size() == 1) {
  676. rename(F);
  677. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
  678. F->arg_begin()->getType());
  679. return true;
  680. }
  681. break;
  682. }
  683. case 'd': {
  684. if (Name == "dbg.value" && F->arg_size() == 4) {
  685. rename(F);
  686. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
  687. return true;
  688. }
  689. break;
  690. }
  691. case 'e': {
  692. SmallVector<StringRef, 2> Groups;
  693. static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
  694. if (R.match(Name, &Groups)) {
  695. Intrinsic::ID ID;
  696. ID = StringSwitch<Intrinsic::ID>(Groups[1])
  697. .Case("add", Intrinsic::vector_reduce_add)
  698. .Case("mul", Intrinsic::vector_reduce_mul)
  699. .Case("and", Intrinsic::vector_reduce_and)
  700. .Case("or", Intrinsic::vector_reduce_or)
  701. .Case("xor", Intrinsic::vector_reduce_xor)
  702. .Case("smax", Intrinsic::vector_reduce_smax)
  703. .Case("smin", Intrinsic::vector_reduce_smin)
  704. .Case("umax", Intrinsic::vector_reduce_umax)
  705. .Case("umin", Intrinsic::vector_reduce_umin)
  706. .Case("fmax", Intrinsic::vector_reduce_fmax)
  707. .Case("fmin", Intrinsic::vector_reduce_fmin)
  708. .Default(Intrinsic::not_intrinsic);
  709. if (ID != Intrinsic::not_intrinsic) {
  710. rename(F);
  711. auto Args = F->getFunctionType()->params();
  712. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
  713. return true;
  714. }
  715. }
  716. static const Regex R2(
  717. "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
  718. Groups.clear();
  719. if (R2.match(Name, &Groups)) {
  720. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  721. if (Groups[1] == "fadd")
  722. ID = Intrinsic::vector_reduce_fadd;
  723. if (Groups[1] == "fmul")
  724. ID = Intrinsic::vector_reduce_fmul;
  725. if (ID != Intrinsic::not_intrinsic) {
  726. rename(F);
  727. auto Args = F->getFunctionType()->params();
  728. Type *Tys[] = {Args[1]};
  729. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
  730. return true;
  731. }
  732. }
  733. break;
  734. }
  735. case 'i':
  736. case 'l': {
  737. bool IsLifetimeStart = Name.startswith("lifetime.start");
  738. if (IsLifetimeStart || Name.startswith("invariant.start")) {
  739. Intrinsic::ID ID = IsLifetimeStart ?
  740. Intrinsic::lifetime_start : Intrinsic::invariant_start;
  741. auto Args = F->getFunctionType()->params();
  742. Type* ObjectPtr[1] = {Args[1]};
  743. if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
  744. rename(F);
  745. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
  746. return true;
  747. }
  748. }
  749. bool IsLifetimeEnd = Name.startswith("lifetime.end");
  750. if (IsLifetimeEnd || Name.startswith("invariant.end")) {
  751. Intrinsic::ID ID = IsLifetimeEnd ?
  752. Intrinsic::lifetime_end : Intrinsic::invariant_end;
  753. auto Args = F->getFunctionType()->params();
  754. Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
  755. if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
  756. rename(F);
  757. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
  758. return true;
  759. }
  760. }
  761. if (Name.startswith("invariant.group.barrier")) {
  762. // Rename invariant.group.barrier to launder.invariant.group
  763. auto Args = F->getFunctionType()->params();
  764. Type* ObjectPtr[1] = {Args[0]};
  765. rename(F);
  766. NewFn = Intrinsic::getDeclaration(F->getParent(),
  767. Intrinsic::launder_invariant_group, ObjectPtr);
  768. return true;
  769. }
  770. break;
  771. }
  772. case 'm': {
  773. if (Name.startswith("masked.load.")) {
  774. Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
  775. if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
  776. rename(F);
  777. NewFn = Intrinsic::getDeclaration(F->getParent(),
  778. Intrinsic::masked_load,
  779. Tys);
  780. return true;
  781. }
  782. }
  783. if (Name.startswith("masked.store.")) {
  784. auto Args = F->getFunctionType()->params();
  785. Type *Tys[] = { Args[0], Args[1] };
  786. if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
  787. rename(F);
  788. NewFn = Intrinsic::getDeclaration(F->getParent(),
  789. Intrinsic::masked_store,
  790. Tys);
  791. return true;
  792. }
  793. }
  794. // Renaming gather/scatter intrinsics with no address space overloading
  795. // to the new overload which includes an address space
  796. if (Name.startswith("masked.gather.")) {
  797. Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
  798. if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
  799. rename(F);
  800. NewFn = Intrinsic::getDeclaration(F->getParent(),
  801. Intrinsic::masked_gather, Tys);
  802. return true;
  803. }
  804. }
  805. if (Name.startswith("masked.scatter.")) {
  806. auto Args = F->getFunctionType()->params();
  807. Type *Tys[] = {Args[0], Args[1]};
  808. if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
  809. rename(F);
  810. NewFn = Intrinsic::getDeclaration(F->getParent(),
  811. Intrinsic::masked_scatter, Tys);
  812. return true;
  813. }
  814. }
  815. // Updating the memory intrinsics (memcpy/memmove/memset) that have an
  816. // alignment parameter to embedding the alignment as an attribute of
  817. // the pointer args.
  818. if (Name.startswith("memcpy.") && F->arg_size() == 5) {
  819. rename(F);
  820. // Get the types of dest, src, and len
  821. ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
  822. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
  823. ParamTypes);
  824. return true;
  825. }
  826. if (Name.startswith("memmove.") && F->arg_size() == 5) {
  827. rename(F);
  828. // Get the types of dest, src, and len
  829. ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
  830. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
  831. ParamTypes);
  832. return true;
  833. }
  834. if (Name.startswith("memset.") && F->arg_size() == 5) {
  835. rename(F);
  836. // Get the types of dest, and len
  837. const auto *FT = F->getFunctionType();
  838. Type *ParamTypes[2] = {
  839. FT->getParamType(0), // Dest
  840. FT->getParamType(2) // len
  841. };
  842. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
  843. ParamTypes);
  844. return true;
  845. }
  846. break;
  847. }
  848. case 'n': {
  849. if (Name.startswith("nvvm.")) {
  850. Name = Name.substr(5);
  851. // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
  852. Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
  853. .Cases("brev32", "brev64", Intrinsic::bitreverse)
  854. .Case("clz.i", Intrinsic::ctlz)
  855. .Case("popc.i", Intrinsic::ctpop)
  856. .Default(Intrinsic::not_intrinsic);
  857. if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
  858. NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
  859. {F->getReturnType()});
  860. return true;
  861. }
  862. // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
  863. // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
  864. //
  865. // TODO: We could add lohi.i2d.
  866. bool Expand = StringSwitch<bool>(Name)
  867. .Cases("abs.i", "abs.ll", true)
  868. .Cases("clz.ll", "popc.ll", "h2f", true)
  869. .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
  870. .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
  871. .StartsWith("atomic.load.add.f32.p", true)
  872. .StartsWith("atomic.load.add.f64.p", true)
  873. .Default(false);
  874. if (Expand) {
  875. NewFn = nullptr;
  876. return true;
  877. }
  878. }
  879. break;
  880. }
  881. case 'o':
  882. // We only need to change the name to match the mangling including the
  883. // address space.
  884. if (Name.startswith("objectsize.")) {
  885. Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
  886. if (F->arg_size() == 2 || F->arg_size() == 3 ||
  887. F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
  888. rename(F);
  889. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
  890. Tys);
  891. return true;
  892. }
  893. }
  894. break;
  895. case 'p':
  896. if (Name == "prefetch") {
  897. // Handle address space overloading.
  898. Type *Tys[] = {F->arg_begin()->getType()};
  899. if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
  900. rename(F);
  901. NewFn =
  902. Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
  903. return true;
  904. }
  905. } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
  906. rename(F);
  907. NewFn = Intrinsic::getDeclaration(F->getParent(),
  908. Intrinsic::ptr_annotation,
  909. F->arg_begin()->getType());
  910. return true;
  911. }
  912. break;
  913. case 's':
  914. if (Name == "stackprotectorcheck") {
  915. NewFn = nullptr;
  916. return true;
  917. }
  918. break;
  919. case 'v': {
  920. if (Name == "var.annotation" && F->arg_size() == 4) {
  921. rename(F);
  922. NewFn = Intrinsic::getDeclaration(F->getParent(),
  923. Intrinsic::var_annotation);
  924. return true;
  925. }
  926. break;
  927. }
  928. case 'x':
  929. if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
  930. return true;
  931. }
  932. // Remangle our intrinsic since we upgrade the mangling
  933. auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
  934. if (Result != None) {
  935. NewFn = Result.getValue();
  936. return true;
  937. }
  938. // This may not belong here. This function is effectively being overloaded
  939. // to both detect an intrinsic which needs upgrading, and to provide the
  940. // upgraded form of the intrinsic. We should perhaps have two separate
  941. // functions for this.
  942. return false;
  943. }
  944. bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
  945. NewFn = nullptr;
  946. bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
  947. assert(F != NewFn && "Intrinsic function upgraded to the same function");
  948. // Upgrade intrinsic attributes. This does not change the function.
  949. if (NewFn)
  950. F = NewFn;
  951. if (Intrinsic::ID id = F->getIntrinsicID())
  952. F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
  953. return Upgraded;
  954. }
  955. GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
  956. if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
  957. GV->getName() == "llvm.global_dtors")) ||
  958. !GV->hasInitializer())
  959. return nullptr;
  960. ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
  961. if (!ATy)
  962. return nullptr;
  963. StructType *STy = dyn_cast<StructType>(ATy->getElementType());
  964. if (!STy || STy->getNumElements() != 2)
  965. return nullptr;
  966. LLVMContext &C = GV->getContext();
  967. IRBuilder<> IRB(C);
  968. auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
  969. IRB.getInt8PtrTy());
  970. Constant *Init = GV->getInitializer();
  971. unsigned N = Init->getNumOperands();
  972. std::vector<Constant *> NewCtors(N);
  973. for (unsigned i = 0; i != N; ++i) {
  974. auto Ctor = cast<Constant>(Init->getOperand(i));
  975. NewCtors[i] = ConstantStruct::get(
  976. EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
  977. Constant::getNullValue(IRB.getInt8PtrTy()));
  978. }
  979. Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
  980. return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
  981. NewInit, GV->getName());
  982. }
  983. // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
  984. // to byte shuffles.
  985. static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
  986. Value *Op, unsigned Shift) {
  987. auto *ResultTy = cast<FixedVectorType>(Op->getType());
  988. unsigned NumElts = ResultTy->getNumElements() * 8;
  989. // Bitcast from a 64-bit element type to a byte element type.
  990. Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
  991. Op = Builder.CreateBitCast(Op, VecTy, "cast");
  992. // We'll be shuffling in zeroes.
  993. Value *Res = Constant::getNullValue(VecTy);
  994. // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  995. // we'll just return the zero vector.
  996. if (Shift < 16) {
  997. int Idxs[64];
  998. // 256/512-bit version is split into 2/4 16-byte lanes.
  999. for (unsigned l = 0; l != NumElts; l += 16)
  1000. for (unsigned i = 0; i != 16; ++i) {
  1001. unsigned Idx = NumElts + i - Shift;
  1002. if (Idx < NumElts)
  1003. Idx -= NumElts - 16; // end of lane, switch operand.
  1004. Idxs[l + i] = Idx + l;
  1005. }
  1006. Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
  1007. }
  1008. // Bitcast back to a 64-bit element type.
  1009. return Builder.CreateBitCast(Res, ResultTy, "cast");
  1010. }
  1011. // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
  1012. // to byte shuffles.
  1013. static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
  1014. unsigned Shift) {
  1015. auto *ResultTy = cast<FixedVectorType>(Op->getType());
  1016. unsigned NumElts = ResultTy->getNumElements() * 8;
  1017. // Bitcast from a 64-bit element type to a byte element type.
  1018. Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
  1019. Op = Builder.CreateBitCast(Op, VecTy, "cast");
  1020. // We'll be shuffling in zeroes.
  1021. Value *Res = Constant::getNullValue(VecTy);
  1022. // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  1023. // we'll just return the zero vector.
  1024. if (Shift < 16) {
  1025. int Idxs[64];
  1026. // 256/512-bit version is split into 2/4 16-byte lanes.
  1027. for (unsigned l = 0; l != NumElts; l += 16)
  1028. for (unsigned i = 0; i != 16; ++i) {
  1029. unsigned Idx = i + Shift;
  1030. if (Idx >= 16)
  1031. Idx += NumElts - 16; // end of lane, switch operand.
  1032. Idxs[l + i] = Idx + l;
  1033. }
  1034. Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
  1035. }
  1036. // Bitcast back to a 64-bit element type.
  1037. return Builder.CreateBitCast(Res, ResultTy, "cast");
  1038. }
  1039. static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
  1040. unsigned NumElts) {
  1041. assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
  1042. llvm::VectorType *MaskTy = FixedVectorType::get(
  1043. Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
  1044. Mask = Builder.CreateBitCast(Mask, MaskTy);
  1045. // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
  1046. // i8 and we need to extract down to the right number of elements.
  1047. if (NumElts <= 4) {
  1048. int Indices[4];
  1049. for (unsigned i = 0; i != NumElts; ++i)
  1050. Indices[i] = i;
  1051. Mask = Builder.CreateShuffleVector(
  1052. Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
  1053. }
  1054. return Mask;
  1055. }
  1056. static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
  1057. Value *Op0, Value *Op1) {
  1058. // If the mask is all ones just emit the first operation.
  1059. if (const auto *C = dyn_cast<Constant>(Mask))
  1060. if (C->isAllOnesValue())
  1061. return Op0;
  1062. Mask = getX86MaskVec(Builder, Mask,
  1063. cast<FixedVectorType>(Op0->getType())->getNumElements());
  1064. return Builder.CreateSelect(Mask, Op0, Op1);
  1065. }
  1066. static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
  1067. Value *Op0, Value *Op1) {
  1068. // If the mask is all ones just emit the first operation.
  1069. if (const auto *C = dyn_cast<Constant>(Mask))
  1070. if (C->isAllOnesValue())
  1071. return Op0;
  1072. auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
  1073. Mask->getType()->getIntegerBitWidth());
  1074. Mask = Builder.CreateBitCast(Mask, MaskTy);
  1075. Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1076. return Builder.CreateSelect(Mask, Op0, Op1);
  1077. }
  1078. // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
  1079. // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
  1080. // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
  1081. static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
  1082. Value *Op1, Value *Shift,
  1083. Value *Passthru, Value *Mask,
  1084. bool IsVALIGN) {
  1085. unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
  1086. unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
  1087. assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
  1088. assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
  1089. assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
  1090. // Mask the immediate for VALIGN.
  1091. if (IsVALIGN)
  1092. ShiftVal &= (NumElts - 1);
  1093. // If palignr is shifting the pair of vectors more than the size of two
  1094. // lanes, emit zero.
  1095. if (ShiftVal >= 32)
  1096. return llvm::Constant::getNullValue(Op0->getType());
  1097. // If palignr is shifting the pair of input vectors more than one lane,
  1098. // but less than two lanes, convert to shifting in zeroes.
  1099. if (ShiftVal > 16) {
  1100. ShiftVal -= 16;
  1101. Op1 = Op0;
  1102. Op0 = llvm::Constant::getNullValue(Op0->getType());
  1103. }
  1104. int Indices[64];
  1105. // 256-bit palignr operates on 128-bit lanes so we need to handle that
  1106. for (unsigned l = 0; l < NumElts; l += 16) {
  1107. for (unsigned i = 0; i != 16; ++i) {
  1108. unsigned Idx = ShiftVal + i;
  1109. if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
  1110. Idx += NumElts - 16; // End of lane, switch operand.
  1111. Indices[l + i] = Idx + l;
  1112. }
  1113. }
  1114. Value *Align = Builder.CreateShuffleVector(Op1, Op0,
  1115. makeArrayRef(Indices, NumElts),
  1116. "palignr");
  1117. return EmitX86Select(Builder, Mask, Align, Passthru);
  1118. }
  1119. static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
  1120. bool ZeroMask, bool IndexForm) {
  1121. Type *Ty = CI.getType();
  1122. unsigned VecWidth = Ty->getPrimitiveSizeInBits();
  1123. unsigned EltWidth = Ty->getScalarSizeInBits();
  1124. bool IsFloat = Ty->isFPOrFPVectorTy();
  1125. Intrinsic::ID IID;
  1126. if (VecWidth == 128 && EltWidth == 32 && IsFloat)
  1127. IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
  1128. else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
  1129. IID = Intrinsic::x86_avx512_vpermi2var_d_128;
  1130. else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
  1131. IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
  1132. else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
  1133. IID = Intrinsic::x86_avx512_vpermi2var_q_128;
  1134. else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
  1135. IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
  1136. else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
  1137. IID = Intrinsic::x86_avx512_vpermi2var_d_256;
  1138. else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
  1139. IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
  1140. else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
  1141. IID = Intrinsic::x86_avx512_vpermi2var_q_256;
  1142. else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
  1143. IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
  1144. else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
  1145. IID = Intrinsic::x86_avx512_vpermi2var_d_512;
  1146. else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
  1147. IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
  1148. else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
  1149. IID = Intrinsic::x86_avx512_vpermi2var_q_512;
  1150. else if (VecWidth == 128 && EltWidth == 16)
  1151. IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
  1152. else if (VecWidth == 256 && EltWidth == 16)
  1153. IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
  1154. else if (VecWidth == 512 && EltWidth == 16)
  1155. IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
  1156. else if (VecWidth == 128 && EltWidth == 8)
  1157. IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
  1158. else if (VecWidth == 256 && EltWidth == 8)
  1159. IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
  1160. else if (VecWidth == 512 && EltWidth == 8)
  1161. IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
  1162. else
  1163. llvm_unreachable("Unexpected intrinsic");
  1164. Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
  1165. CI.getArgOperand(2) };
  1166. // If this isn't index form we need to swap operand 0 and 1.
  1167. if (!IndexForm)
  1168. std::swap(Args[0], Args[1]);
  1169. Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
  1170. Args);
  1171. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
  1172. : Builder.CreateBitCast(CI.getArgOperand(1),
  1173. Ty);
  1174. return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
  1175. }
  1176. static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
  1177. Intrinsic::ID IID) {
  1178. Type *Ty = CI.getType();
  1179. Value *Op0 = CI.getOperand(0);
  1180. Value *Op1 = CI.getOperand(1);
  1181. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
  1182. Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
  1183. if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
  1184. Value *VecSrc = CI.getOperand(2);
  1185. Value *Mask = CI.getOperand(3);
  1186. Res = EmitX86Select(Builder, Mask, Res, VecSrc);
  1187. }
  1188. return Res;
  1189. }
  1190. static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
  1191. bool IsRotateRight) {
  1192. Type *Ty = CI.getType();
  1193. Value *Src = CI.getArgOperand(0);
  1194. Value *Amt = CI.getArgOperand(1);
  1195. // Amount may be scalar immediate, in which case create a splat vector.
  1196. // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
  1197. // we only care about the lowest log2 bits anyway.
  1198. if (Amt->getType() != Ty) {
  1199. unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
  1200. Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
  1201. Amt = Builder.CreateVectorSplat(NumElts, Amt);
  1202. }
  1203. Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
  1204. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
  1205. Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
  1206. if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
  1207. Value *VecSrc = CI.getOperand(2);
  1208. Value *Mask = CI.getOperand(3);
  1209. Res = EmitX86Select(Builder, Mask, Res, VecSrc);
  1210. }
  1211. return Res;
  1212. }
  1213. static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
  1214. bool IsSigned) {
  1215. Type *Ty = CI.getType();
  1216. Value *LHS = CI.getArgOperand(0);
  1217. Value *RHS = CI.getArgOperand(1);
  1218. CmpInst::Predicate Pred;
  1219. switch (Imm) {
  1220. case 0x0:
  1221. Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
  1222. break;
  1223. case 0x1:
  1224. Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
  1225. break;
  1226. case 0x2:
  1227. Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
  1228. break;
  1229. case 0x3:
  1230. Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
  1231. break;
  1232. case 0x4:
  1233. Pred = ICmpInst::ICMP_EQ;
  1234. break;
  1235. case 0x5:
  1236. Pred = ICmpInst::ICMP_NE;
  1237. break;
  1238. case 0x6:
  1239. return Constant::getNullValue(Ty); // FALSE
  1240. case 0x7:
  1241. return Constant::getAllOnesValue(Ty); // TRUE
  1242. default:
  1243. llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
  1244. }
  1245. Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
  1246. Value *Ext = Builder.CreateSExt(Cmp, Ty);
  1247. return Ext;
  1248. }
  1249. static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
  1250. bool IsShiftRight, bool ZeroMask) {
  1251. Type *Ty = CI.getType();
  1252. Value *Op0 = CI.getArgOperand(0);
  1253. Value *Op1 = CI.getArgOperand(1);
  1254. Value *Amt = CI.getArgOperand(2);
  1255. if (IsShiftRight)
  1256. std::swap(Op0, Op1);
  1257. // Amount may be scalar immediate, in which case create a splat vector.
  1258. // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
  1259. // we only care about the lowest log2 bits anyway.
  1260. if (Amt->getType() != Ty) {
  1261. unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
  1262. Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
  1263. Amt = Builder.CreateVectorSplat(NumElts, Amt);
  1264. }
  1265. Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
  1266. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
  1267. Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
  1268. unsigned NumArgs = CI.getNumArgOperands();
  1269. if (NumArgs >= 4) { // For masked intrinsics.
  1270. Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
  1271. ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
  1272. CI.getArgOperand(0);
  1273. Value *Mask = CI.getOperand(NumArgs - 1);
  1274. Res = EmitX86Select(Builder, Mask, Res, VecSrc);
  1275. }
  1276. return Res;
  1277. }
  1278. static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
  1279. Value *Ptr, Value *Data, Value *Mask,
  1280. bool Aligned) {
  1281. // Cast the pointer to the right type.
  1282. Ptr = Builder.CreateBitCast(Ptr,
  1283. llvm::PointerType::getUnqual(Data->getType()));
  1284. const Align Alignment =
  1285. Aligned
  1286. ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
  1287. : Align(1);
  1288. // If the mask is all ones just emit a regular store.
  1289. if (const auto *C = dyn_cast<Constant>(Mask))
  1290. if (C->isAllOnesValue())
  1291. return Builder.CreateAlignedStore(Data, Ptr, Alignment);
  1292. // Convert the mask from an integer type to a vector of i1.
  1293. unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
  1294. Mask = getX86MaskVec(Builder, Mask, NumElts);
  1295. return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
  1296. }
  1297. static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
  1298. Value *Ptr, Value *Passthru, Value *Mask,
  1299. bool Aligned) {
  1300. Type *ValTy = Passthru->getType();
  1301. // Cast the pointer to the right type.
  1302. Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
  1303. const Align Alignment =
  1304. Aligned
  1305. ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
  1306. 8)
  1307. : Align(1);
  1308. // If the mask is all ones just emit a regular store.
  1309. if (const auto *C = dyn_cast<Constant>(Mask))
  1310. if (C->isAllOnesValue())
  1311. return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
  1312. // Convert the mask from an integer type to a vector of i1.
  1313. unsigned NumElts =
  1314. cast<FixedVectorType>(Passthru->getType())->getNumElements();
  1315. Mask = getX86MaskVec(Builder, Mask, NumElts);
  1316. return Builder.CreateMaskedLoad(Ptr, Alignment, Mask, Passthru);
  1317. }
  1318. static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
  1319. Type *Ty = CI.getType();
  1320. Value *Op0 = CI.getArgOperand(0);
  1321. Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
  1322. Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
  1323. if (CI.getNumArgOperands() == 3)
  1324. Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
  1325. return Res;
  1326. }
  1327. static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
  1328. Type *Ty = CI.getType();
  1329. // Arguments have a vXi32 type so cast to vXi64.
  1330. Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
  1331. Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
  1332. if (IsSigned) {
  1333. // Shift left then arithmetic shift right.
  1334. Constant *ShiftAmt = ConstantInt::get(Ty, 32);
  1335. LHS = Builder.CreateShl(LHS, ShiftAmt);
  1336. LHS = Builder.CreateAShr(LHS, ShiftAmt);
  1337. RHS = Builder.CreateShl(RHS, ShiftAmt);
  1338. RHS = Builder.CreateAShr(RHS, ShiftAmt);
  1339. } else {
  1340. // Clear the upper bits.
  1341. Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
  1342. LHS = Builder.CreateAnd(LHS, Mask);
  1343. RHS = Builder.CreateAnd(RHS, Mask);
  1344. }
  1345. Value *Res = Builder.CreateMul(LHS, RHS);
  1346. if (CI.getNumArgOperands() == 4)
  1347. Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
  1348. return Res;
  1349. }
  1350. // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
  1351. static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
  1352. Value *Mask) {
  1353. unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
  1354. if (Mask) {
  1355. const auto *C = dyn_cast<Constant>(Mask);
  1356. if (!C || !C->isAllOnesValue())
  1357. Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
  1358. }
  1359. if (NumElts < 8) {
  1360. int Indices[8];
  1361. for (unsigned i = 0; i != NumElts; ++i)
  1362. Indices[i] = i;
  1363. for (unsigned i = NumElts; i != 8; ++i)
  1364. Indices[i] = NumElts + i % NumElts;
  1365. Vec = Builder.CreateShuffleVector(Vec,
  1366. Constant::getNullValue(Vec->getType()),
  1367. Indices);
  1368. }
  1369. return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
  1370. }
  1371. static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
  1372. unsigned CC, bool Signed) {
  1373. Value *Op0 = CI.getArgOperand(0);
  1374. unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
  1375. Value *Cmp;
  1376. if (CC == 3) {
  1377. Cmp = Constant::getNullValue(
  1378. FixedVectorType::get(Builder.getInt1Ty(), NumElts));
  1379. } else if (CC == 7) {
  1380. Cmp = Constant::getAllOnesValue(
  1381. FixedVectorType::get(Builder.getInt1Ty(), NumElts));
  1382. } else {
  1383. ICmpInst::Predicate Pred;
  1384. switch (CC) {
  1385. default: llvm_unreachable("Unknown condition code");
  1386. case 0: Pred = ICmpInst::ICMP_EQ; break;
  1387. case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
  1388. case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
  1389. case 4: Pred = ICmpInst::ICMP_NE; break;
  1390. case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
  1391. case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
  1392. }
  1393. Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
  1394. }
  1395. Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
  1396. return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
  1397. }
  1398. // Replace a masked intrinsic with an older unmasked intrinsic.
  1399. static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
  1400. Intrinsic::ID IID) {
  1401. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
  1402. Value *Rep = Builder.CreateCall(Intrin,
  1403. { CI.getArgOperand(0), CI.getArgOperand(1) });
  1404. return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
  1405. }
  1406. static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
  1407. Value* A = CI.getArgOperand(0);
  1408. Value* B = CI.getArgOperand(1);
  1409. Value* Src = CI.getArgOperand(2);
  1410. Value* Mask = CI.getArgOperand(3);
  1411. Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
  1412. Value* Cmp = Builder.CreateIsNotNull(AndNode);
  1413. Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
  1414. Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
  1415. Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
  1416. return Builder.CreateInsertElement(A, Select, (uint64_t)0);
  1417. }
  1418. static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
  1419. Value* Op = CI.getArgOperand(0);
  1420. Type* ReturnOp = CI.getType();
  1421. unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
  1422. Value *Mask = getX86MaskVec(Builder, Op, NumElts);
  1423. return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
  1424. }
  1425. // Replace intrinsic with unmasked version and a select.
  1426. static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
  1427. CallInst &CI, Value *&Rep) {
  1428. Name = Name.substr(12); // Remove avx512.mask.
  1429. unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
  1430. unsigned EltWidth = CI.getType()->getScalarSizeInBits();
  1431. Intrinsic::ID IID;
  1432. if (Name.startswith("max.p")) {
  1433. if (VecWidth == 128 && EltWidth == 32)
  1434. IID = Intrinsic::x86_sse_max_ps;
  1435. else if (VecWidth == 128 && EltWidth == 64)
  1436. IID = Intrinsic::x86_sse2_max_pd;
  1437. else if (VecWidth == 256 && EltWidth == 32)
  1438. IID = Intrinsic::x86_avx_max_ps_256;
  1439. else if (VecWidth == 256 && EltWidth == 64)
  1440. IID = Intrinsic::x86_avx_max_pd_256;
  1441. else
  1442. llvm_unreachable("Unexpected intrinsic");
  1443. } else if (Name.startswith("min.p")) {
  1444. if (VecWidth == 128 && EltWidth == 32)
  1445. IID = Intrinsic::x86_sse_min_ps;
  1446. else if (VecWidth == 128 && EltWidth == 64)
  1447. IID = Intrinsic::x86_sse2_min_pd;
  1448. else if (VecWidth == 256 && EltWidth == 32)
  1449. IID = Intrinsic::x86_avx_min_ps_256;
  1450. else if (VecWidth == 256 && EltWidth == 64)
  1451. IID = Intrinsic::x86_avx_min_pd_256;
  1452. else
  1453. llvm_unreachable("Unexpected intrinsic");
  1454. } else if (Name.startswith("pshuf.b.")) {
  1455. if (VecWidth == 128)
  1456. IID = Intrinsic::x86_ssse3_pshuf_b_128;
  1457. else if (VecWidth == 256)
  1458. IID = Intrinsic::x86_avx2_pshuf_b;
  1459. else if (VecWidth == 512)
  1460. IID = Intrinsic::x86_avx512_pshuf_b_512;
  1461. else
  1462. llvm_unreachable("Unexpected intrinsic");
  1463. } else if (Name.startswith("pmul.hr.sw.")) {
  1464. if (VecWidth == 128)
  1465. IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
  1466. else if (VecWidth == 256)
  1467. IID = Intrinsic::x86_avx2_pmul_hr_sw;
  1468. else if (VecWidth == 512)
  1469. IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
  1470. else
  1471. llvm_unreachable("Unexpected intrinsic");
  1472. } else if (Name.startswith("pmulh.w.")) {
  1473. if (VecWidth == 128)
  1474. IID = Intrinsic::x86_sse2_pmulh_w;
  1475. else if (VecWidth == 256)
  1476. IID = Intrinsic::x86_avx2_pmulh_w;
  1477. else if (VecWidth == 512)
  1478. IID = Intrinsic::x86_avx512_pmulh_w_512;
  1479. else
  1480. llvm_unreachable("Unexpected intrinsic");
  1481. } else if (Name.startswith("pmulhu.w.")) {
  1482. if (VecWidth == 128)
  1483. IID = Intrinsic::x86_sse2_pmulhu_w;
  1484. else if (VecWidth == 256)
  1485. IID = Intrinsic::x86_avx2_pmulhu_w;
  1486. else if (VecWidth == 512)
  1487. IID = Intrinsic::x86_avx512_pmulhu_w_512;
  1488. else
  1489. llvm_unreachable("Unexpected intrinsic");
  1490. } else if (Name.startswith("pmaddw.d.")) {
  1491. if (VecWidth == 128)
  1492. IID = Intrinsic::x86_sse2_pmadd_wd;
  1493. else if (VecWidth == 256)
  1494. IID = Intrinsic::x86_avx2_pmadd_wd;
  1495. else if (VecWidth == 512)
  1496. IID = Intrinsic::x86_avx512_pmaddw_d_512;
  1497. else
  1498. llvm_unreachable("Unexpected intrinsic");
  1499. } else if (Name.startswith("pmaddubs.w.")) {
  1500. if (VecWidth == 128)
  1501. IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
  1502. else if (VecWidth == 256)
  1503. IID = Intrinsic::x86_avx2_pmadd_ub_sw;
  1504. else if (VecWidth == 512)
  1505. IID = Intrinsic::x86_avx512_pmaddubs_w_512;
  1506. else
  1507. llvm_unreachable("Unexpected intrinsic");
  1508. } else if (Name.startswith("packsswb.")) {
  1509. if (VecWidth == 128)
  1510. IID = Intrinsic::x86_sse2_packsswb_128;
  1511. else if (VecWidth == 256)
  1512. IID = Intrinsic::x86_avx2_packsswb;
  1513. else if (VecWidth == 512)
  1514. IID = Intrinsic::x86_avx512_packsswb_512;
  1515. else
  1516. llvm_unreachable("Unexpected intrinsic");
  1517. } else if (Name.startswith("packssdw.")) {
  1518. if (VecWidth == 128)
  1519. IID = Intrinsic::x86_sse2_packssdw_128;
  1520. else if (VecWidth == 256)
  1521. IID = Intrinsic::x86_avx2_packssdw;
  1522. else if (VecWidth == 512)
  1523. IID = Intrinsic::x86_avx512_packssdw_512;
  1524. else
  1525. llvm_unreachable("Unexpected intrinsic");
  1526. } else if (Name.startswith("packuswb.")) {
  1527. if (VecWidth == 128)
  1528. IID = Intrinsic::x86_sse2_packuswb_128;
  1529. else if (VecWidth == 256)
  1530. IID = Intrinsic::x86_avx2_packuswb;
  1531. else if (VecWidth == 512)
  1532. IID = Intrinsic::x86_avx512_packuswb_512;
  1533. else
  1534. llvm_unreachable("Unexpected intrinsic");
  1535. } else if (Name.startswith("packusdw.")) {
  1536. if (VecWidth == 128)
  1537. IID = Intrinsic::x86_sse41_packusdw;
  1538. else if (VecWidth == 256)
  1539. IID = Intrinsic::x86_avx2_packusdw;
  1540. else if (VecWidth == 512)
  1541. IID = Intrinsic::x86_avx512_packusdw_512;
  1542. else
  1543. llvm_unreachable("Unexpected intrinsic");
  1544. } else if (Name.startswith("vpermilvar.")) {
  1545. if (VecWidth == 128 && EltWidth == 32)
  1546. IID = Intrinsic::x86_avx_vpermilvar_ps;
  1547. else if (VecWidth == 128 && EltWidth == 64)
  1548. IID = Intrinsic::x86_avx_vpermilvar_pd;
  1549. else if (VecWidth == 256 && EltWidth == 32)
  1550. IID = Intrinsic::x86_avx_vpermilvar_ps_256;
  1551. else if (VecWidth == 256 && EltWidth == 64)
  1552. IID = Intrinsic::x86_avx_vpermilvar_pd_256;
  1553. else if (VecWidth == 512 && EltWidth == 32)
  1554. IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
  1555. else if (VecWidth == 512 && EltWidth == 64)
  1556. IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
  1557. else
  1558. llvm_unreachable("Unexpected intrinsic");
  1559. } else if (Name == "cvtpd2dq.256") {
  1560. IID = Intrinsic::x86_avx_cvt_pd2dq_256;
  1561. } else if (Name == "cvtpd2ps.256") {
  1562. IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
  1563. } else if (Name == "cvttpd2dq.256") {
  1564. IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
  1565. } else if (Name == "cvttps2dq.128") {
  1566. IID = Intrinsic::x86_sse2_cvttps2dq;
  1567. } else if (Name == "cvttps2dq.256") {
  1568. IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
  1569. } else if (Name.startswith("permvar.")) {
  1570. bool IsFloat = CI.getType()->isFPOrFPVectorTy();
  1571. if (VecWidth == 256 && EltWidth == 32 && IsFloat)
  1572. IID = Intrinsic::x86_avx2_permps;
  1573. else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
  1574. IID = Intrinsic::x86_avx2_permd;
  1575. else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
  1576. IID = Intrinsic::x86_avx512_permvar_df_256;
  1577. else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
  1578. IID = Intrinsic::x86_avx512_permvar_di_256;
  1579. else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
  1580. IID = Intrinsic::x86_avx512_permvar_sf_512;
  1581. else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
  1582. IID = Intrinsic::x86_avx512_permvar_si_512;
  1583. else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
  1584. IID = Intrinsic::x86_avx512_permvar_df_512;
  1585. else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
  1586. IID = Intrinsic::x86_avx512_permvar_di_512;
  1587. else if (VecWidth == 128 && EltWidth == 16)
  1588. IID = Intrinsic::x86_avx512_permvar_hi_128;
  1589. else if (VecWidth == 256 && EltWidth == 16)
  1590. IID = Intrinsic::x86_avx512_permvar_hi_256;
  1591. else if (VecWidth == 512 && EltWidth == 16)
  1592. IID = Intrinsic::x86_avx512_permvar_hi_512;
  1593. else if (VecWidth == 128 && EltWidth == 8)
  1594. IID = Intrinsic::x86_avx512_permvar_qi_128;
  1595. else if (VecWidth == 256 && EltWidth == 8)
  1596. IID = Intrinsic::x86_avx512_permvar_qi_256;
  1597. else if (VecWidth == 512 && EltWidth == 8)
  1598. IID = Intrinsic::x86_avx512_permvar_qi_512;
  1599. else
  1600. llvm_unreachable("Unexpected intrinsic");
  1601. } else if (Name.startswith("dbpsadbw.")) {
  1602. if (VecWidth == 128)
  1603. IID = Intrinsic::x86_avx512_dbpsadbw_128;
  1604. else if (VecWidth == 256)
  1605. IID = Intrinsic::x86_avx512_dbpsadbw_256;
  1606. else if (VecWidth == 512)
  1607. IID = Intrinsic::x86_avx512_dbpsadbw_512;
  1608. else
  1609. llvm_unreachable("Unexpected intrinsic");
  1610. } else if (Name.startswith("pmultishift.qb.")) {
  1611. if (VecWidth == 128)
  1612. IID = Intrinsic::x86_avx512_pmultishift_qb_128;
  1613. else if (VecWidth == 256)
  1614. IID = Intrinsic::x86_avx512_pmultishift_qb_256;
  1615. else if (VecWidth == 512)
  1616. IID = Intrinsic::x86_avx512_pmultishift_qb_512;
  1617. else
  1618. llvm_unreachable("Unexpected intrinsic");
  1619. } else if (Name.startswith("conflict.")) {
  1620. if (Name[9] == 'd' && VecWidth == 128)
  1621. IID = Intrinsic::x86_avx512_conflict_d_128;
  1622. else if (Name[9] == 'd' && VecWidth == 256)
  1623. IID = Intrinsic::x86_avx512_conflict_d_256;
  1624. else if (Name[9] == 'd' && VecWidth == 512)
  1625. IID = Intrinsic::x86_avx512_conflict_d_512;
  1626. else if (Name[9] == 'q' && VecWidth == 128)
  1627. IID = Intrinsic::x86_avx512_conflict_q_128;
  1628. else if (Name[9] == 'q' && VecWidth == 256)
  1629. IID = Intrinsic::x86_avx512_conflict_q_256;
  1630. else if (Name[9] == 'q' && VecWidth == 512)
  1631. IID = Intrinsic::x86_avx512_conflict_q_512;
  1632. else
  1633. llvm_unreachable("Unexpected intrinsic");
  1634. } else if (Name.startswith("pavg.")) {
  1635. if (Name[5] == 'b' && VecWidth == 128)
  1636. IID = Intrinsic::x86_sse2_pavg_b;
  1637. else if (Name[5] == 'b' && VecWidth == 256)
  1638. IID = Intrinsic::x86_avx2_pavg_b;
  1639. else if (Name[5] == 'b' && VecWidth == 512)
  1640. IID = Intrinsic::x86_avx512_pavg_b_512;
  1641. else if (Name[5] == 'w' && VecWidth == 128)
  1642. IID = Intrinsic::x86_sse2_pavg_w;
  1643. else if (Name[5] == 'w' && VecWidth == 256)
  1644. IID = Intrinsic::x86_avx2_pavg_w;
  1645. else if (Name[5] == 'w' && VecWidth == 512)
  1646. IID = Intrinsic::x86_avx512_pavg_w_512;
  1647. else
  1648. llvm_unreachable("Unexpected intrinsic");
  1649. } else
  1650. return false;
  1651. SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
  1652. CI.arg_operands().end());
  1653. Args.pop_back();
  1654. Args.pop_back();
  1655. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
  1656. Args);
  1657. unsigned NumArgs = CI.getNumArgOperands();
  1658. Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
  1659. CI.getArgOperand(NumArgs - 2));
  1660. return true;
  1661. }
  1662. /// Upgrade comment in call to inline asm that represents an objc retain release
  1663. /// marker.
  1664. void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
  1665. size_t Pos;
  1666. if (AsmStr->find("mov\tfp") == 0 &&
  1667. AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
  1668. (Pos = AsmStr->find("# marker")) != std::string::npos) {
  1669. AsmStr->replace(Pos, 1, ";");
  1670. }
  1671. }
  1672. /// Upgrade a call to an old intrinsic. All argument and return casting must be
  1673. /// provided to seamlessly integrate with existing context.
  1674. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
  1675. Function *F = CI->getCalledFunction();
  1676. LLVMContext &C = CI->getContext();
  1677. IRBuilder<> Builder(C);
  1678. Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
  1679. assert(F && "Intrinsic call is not direct?");
  1680. if (!NewFn) {
  1681. // Get the Function's name.
  1682. StringRef Name = F->getName();
  1683. assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
  1684. Name = Name.substr(5);
  1685. bool IsX86 = Name.startswith("x86.");
  1686. if (IsX86)
  1687. Name = Name.substr(4);
  1688. bool IsNVVM = Name.startswith("nvvm.");
  1689. if (IsNVVM)
  1690. Name = Name.substr(5);
  1691. if (IsX86 && Name.startswith("sse4a.movnt.")) {
  1692. Module *M = F->getParent();
  1693. SmallVector<Metadata *, 1> Elts;
  1694. Elts.push_back(
  1695. ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
  1696. MDNode *Node = MDNode::get(C, Elts);
  1697. Value *Arg0 = CI->getArgOperand(0);
  1698. Value *Arg1 = CI->getArgOperand(1);
  1699. // Nontemporal (unaligned) store of the 0'th element of the float/double
  1700. // vector.
  1701. Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
  1702. PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
  1703. Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
  1704. Value *Extract =
  1705. Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
  1706. StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
  1707. SI->setMetadata(M->getMDKindID("nontemporal"), Node);
  1708. // Remove intrinsic.
  1709. CI->eraseFromParent();
  1710. return;
  1711. }
  1712. if (IsX86 && (Name.startswith("avx.movnt.") ||
  1713. Name.startswith("avx512.storent."))) {
  1714. Module *M = F->getParent();
  1715. SmallVector<Metadata *, 1> Elts;
  1716. Elts.push_back(
  1717. ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
  1718. MDNode *Node = MDNode::get(C, Elts);
  1719. Value *Arg0 = CI->getArgOperand(0);
  1720. Value *Arg1 = CI->getArgOperand(1);
  1721. // Convert the type of the pointer to a pointer to the stored type.
  1722. Value *BC = Builder.CreateBitCast(Arg0,
  1723. PointerType::getUnqual(Arg1->getType()),
  1724. "cast");
  1725. StoreInst *SI = Builder.CreateAlignedStore(
  1726. Arg1, BC,
  1727. Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
  1728. SI->setMetadata(M->getMDKindID("nontemporal"), Node);
  1729. // Remove intrinsic.
  1730. CI->eraseFromParent();
  1731. return;
  1732. }
  1733. if (IsX86 && Name == "sse2.storel.dq") {
  1734. Value *Arg0 = CI->getArgOperand(0);
  1735. Value *Arg1 = CI->getArgOperand(1);
  1736. auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
  1737. Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
  1738. Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
  1739. Value *BC = Builder.CreateBitCast(Arg0,
  1740. PointerType::getUnqual(Elt->getType()),
  1741. "cast");
  1742. Builder.CreateAlignedStore(Elt, BC, Align(1));
  1743. // Remove intrinsic.
  1744. CI->eraseFromParent();
  1745. return;
  1746. }
  1747. if (IsX86 && (Name.startswith("sse.storeu.") ||
  1748. Name.startswith("sse2.storeu.") ||
  1749. Name.startswith("avx.storeu."))) {
  1750. Value *Arg0 = CI->getArgOperand(0);
  1751. Value *Arg1 = CI->getArgOperand(1);
  1752. Arg0 = Builder.CreateBitCast(Arg0,
  1753. PointerType::getUnqual(Arg1->getType()),
  1754. "cast");
  1755. Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
  1756. // Remove intrinsic.
  1757. CI->eraseFromParent();
  1758. return;
  1759. }
  1760. if (IsX86 && Name == "avx512.mask.store.ss") {
  1761. Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
  1762. UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
  1763. Mask, false);
  1764. // Remove intrinsic.
  1765. CI->eraseFromParent();
  1766. return;
  1767. }
  1768. if (IsX86 && (Name.startswith("avx512.mask.store"))) {
  1769. // "avx512.mask.storeu." or "avx512.mask.store."
  1770. bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
  1771. UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
  1772. CI->getArgOperand(2), Aligned);
  1773. // Remove intrinsic.
  1774. CI->eraseFromParent();
  1775. return;
  1776. }
  1777. Value *Rep;
  1778. // Upgrade packed integer vector compare intrinsics to compare instructions.
  1779. if (IsX86 && (Name.startswith("sse2.pcmp") ||
  1780. Name.startswith("avx2.pcmp"))) {
  1781. // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
  1782. bool CmpEq = Name[9] == 'e';
  1783. Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
  1784. CI->getArgOperand(0), CI->getArgOperand(1));
  1785. Rep = Builder.CreateSExt(Rep, CI->getType(), "");
  1786. } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
  1787. Type *ExtTy = Type::getInt32Ty(C);
  1788. if (CI->getOperand(0)->getType()->isIntegerTy(8))
  1789. ExtTy = Type::getInt64Ty(C);
  1790. unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
  1791. ExtTy->getPrimitiveSizeInBits();
  1792. Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
  1793. Rep = Builder.CreateVectorSplat(NumElts, Rep);
  1794. } else if (IsX86 && (Name == "sse.sqrt.ss" ||
  1795. Name == "sse2.sqrt.sd")) {
  1796. Value *Vec = CI->getArgOperand(0);
  1797. Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
  1798. Function *Intr = Intrinsic::getDeclaration(F->getParent(),
  1799. Intrinsic::sqrt, Elt0->getType());
  1800. Elt0 = Builder.CreateCall(Intr, Elt0);
  1801. Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
  1802. } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
  1803. Name.startswith("sse2.sqrt.p") ||
  1804. Name.startswith("sse.sqrt.p"))) {
  1805. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
  1806. Intrinsic::sqrt,
  1807. CI->getType()),
  1808. {CI->getArgOperand(0)});
  1809. } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
  1810. if (CI->getNumArgOperands() == 4 &&
  1811. (!isa<ConstantInt>(CI->getArgOperand(3)) ||
  1812. cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
  1813. Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
  1814. : Intrinsic::x86_avx512_sqrt_pd_512;
  1815. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
  1816. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
  1817. IID), Args);
  1818. } else {
  1819. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
  1820. Intrinsic::sqrt,
  1821. CI->getType()),
  1822. {CI->getArgOperand(0)});
  1823. }
  1824. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  1825. CI->getArgOperand(1));
  1826. } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
  1827. Name.startswith("avx512.ptestnm"))) {
  1828. Value *Op0 = CI->getArgOperand(0);
  1829. Value *Op1 = CI->getArgOperand(1);
  1830. Value *Mask = CI->getArgOperand(2);
  1831. Rep = Builder.CreateAnd(Op0, Op1);
  1832. llvm::Type *Ty = Op0->getType();
  1833. Value *Zero = llvm::Constant::getNullValue(Ty);
  1834. ICmpInst::Predicate Pred =
  1835. Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
  1836. Rep = Builder.CreateICmp(Pred, Rep, Zero);
  1837. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
  1838. } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
  1839. unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
  1840. ->getNumElements();
  1841. Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
  1842. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  1843. CI->getArgOperand(1));
  1844. } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
  1845. unsigned NumElts = CI->getType()->getScalarSizeInBits();
  1846. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
  1847. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
  1848. int Indices[64];
  1849. for (unsigned i = 0; i != NumElts; ++i)
  1850. Indices[i] = i;
  1851. // First extract half of each vector. This gives better codegen than
  1852. // doing it in a single shuffle.
  1853. LHS = Builder.CreateShuffleVector(LHS, LHS,
  1854. makeArrayRef(Indices, NumElts / 2));
  1855. RHS = Builder.CreateShuffleVector(RHS, RHS,
  1856. makeArrayRef(Indices, NumElts / 2));
  1857. // Concat the vectors.
  1858. // NOTE: Operands have to be swapped to match intrinsic definition.
  1859. Rep = Builder.CreateShuffleVector(RHS, LHS,
  1860. makeArrayRef(Indices, NumElts));
  1861. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1862. } else if (IsX86 && Name == "avx512.kand.w") {
  1863. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1864. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  1865. Rep = Builder.CreateAnd(LHS, RHS);
  1866. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1867. } else if (IsX86 && Name == "avx512.kandn.w") {
  1868. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1869. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  1870. LHS = Builder.CreateNot(LHS);
  1871. Rep = Builder.CreateAnd(LHS, RHS);
  1872. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1873. } else if (IsX86 && Name == "avx512.kor.w") {
  1874. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1875. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  1876. Rep = Builder.CreateOr(LHS, RHS);
  1877. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1878. } else if (IsX86 && Name == "avx512.kxor.w") {
  1879. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1880. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  1881. Rep = Builder.CreateXor(LHS, RHS);
  1882. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1883. } else if (IsX86 && Name == "avx512.kxnor.w") {
  1884. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1885. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  1886. LHS = Builder.CreateNot(LHS);
  1887. Rep = Builder.CreateXor(LHS, RHS);
  1888. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1889. } else if (IsX86 && Name == "avx512.knot.w") {
  1890. Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1891. Rep = Builder.CreateNot(Rep);
  1892. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1893. } else if (IsX86 &&
  1894. (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
  1895. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1896. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  1897. Rep = Builder.CreateOr(LHS, RHS);
  1898. Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
  1899. Value *C;
  1900. if (Name[14] == 'c')
  1901. C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
  1902. else
  1903. C = ConstantInt::getNullValue(Builder.getInt16Ty());
  1904. Rep = Builder.CreateICmpEQ(Rep, C);
  1905. Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
  1906. } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
  1907. Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
  1908. Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
  1909. Name == "sse.div.ss" || Name == "sse2.div.sd")) {
  1910. Type *I32Ty = Type::getInt32Ty(C);
  1911. Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
  1912. ConstantInt::get(I32Ty, 0));
  1913. Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
  1914. ConstantInt::get(I32Ty, 0));
  1915. Value *EltOp;
  1916. if (Name.contains(".add."))
  1917. EltOp = Builder.CreateFAdd(Elt0, Elt1);
  1918. else if (Name.contains(".sub."))
  1919. EltOp = Builder.CreateFSub(Elt0, Elt1);
  1920. else if (Name.contains(".mul."))
  1921. EltOp = Builder.CreateFMul(Elt0, Elt1);
  1922. else
  1923. EltOp = Builder.CreateFDiv(Elt0, Elt1);
  1924. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
  1925. ConstantInt::get(I32Ty, 0));
  1926. } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
  1927. // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
  1928. bool CmpEq = Name[16] == 'e';
  1929. Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
  1930. } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
  1931. Type *OpTy = CI->getArgOperand(0)->getType();
  1932. unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
  1933. Intrinsic::ID IID;
  1934. switch (VecWidth) {
  1935. default: llvm_unreachable("Unexpected intrinsic");
  1936. case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
  1937. case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
  1938. case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
  1939. }
  1940. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  1941. { CI->getOperand(0), CI->getArgOperand(1) });
  1942. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
  1943. } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
  1944. Type *OpTy = CI->getArgOperand(0)->getType();
  1945. unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
  1946. unsigned EltWidth = OpTy->getScalarSizeInBits();
  1947. Intrinsic::ID IID;
  1948. if (VecWidth == 128 && EltWidth == 32)
  1949. IID = Intrinsic::x86_avx512_fpclass_ps_128;
  1950. else if (VecWidth == 256 && EltWidth == 32)
  1951. IID = Intrinsic::x86_avx512_fpclass_ps_256;
  1952. else if (VecWidth == 512 && EltWidth == 32)
  1953. IID = Intrinsic::x86_avx512_fpclass_ps_512;
  1954. else if (VecWidth == 128 && EltWidth == 64)
  1955. IID = Intrinsic::x86_avx512_fpclass_pd_128;
  1956. else if (VecWidth == 256 && EltWidth == 64)
  1957. IID = Intrinsic::x86_avx512_fpclass_pd_256;
  1958. else if (VecWidth == 512 && EltWidth == 64)
  1959. IID = Intrinsic::x86_avx512_fpclass_pd_512;
  1960. else
  1961. llvm_unreachable("Unexpected intrinsic");
  1962. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  1963. { CI->getOperand(0), CI->getArgOperand(1) });
  1964. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
  1965. } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
  1966. SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
  1967. CI->arg_operands().end());
  1968. Type *OpTy = Args[0]->getType();
  1969. unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
  1970. unsigned EltWidth = OpTy->getScalarSizeInBits();
  1971. Intrinsic::ID IID;
  1972. if (VecWidth == 128 && EltWidth == 32)
  1973. IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
  1974. else if (VecWidth == 256 && EltWidth == 32)
  1975. IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
  1976. else if (VecWidth == 512 && EltWidth == 32)
  1977. IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
  1978. else if (VecWidth == 128 && EltWidth == 64)
  1979. IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
  1980. else if (VecWidth == 256 && EltWidth == 64)
  1981. IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
  1982. else if (VecWidth == 512 && EltWidth == 64)
  1983. IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
  1984. else
  1985. llvm_unreachable("Unexpected intrinsic");
  1986. Value *Mask = Constant::getAllOnesValue(CI->getType());
  1987. if (VecWidth == 512)
  1988. std::swap(Mask, Args.back());
  1989. Args.push_back(Mask);
  1990. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  1991. Args);
  1992. } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
  1993. // Integer compare intrinsics.
  1994. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  1995. Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
  1996. } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
  1997. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  1998. Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
  1999. } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
  2000. Name.startswith("avx512.cvtw2mask.") ||
  2001. Name.startswith("avx512.cvtd2mask.") ||
  2002. Name.startswith("avx512.cvtq2mask."))) {
  2003. Value *Op = CI->getArgOperand(0);
  2004. Value *Zero = llvm::Constant::getNullValue(Op->getType());
  2005. Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
  2006. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
  2007. } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
  2008. Name == "ssse3.pabs.w.128" ||
  2009. Name == "ssse3.pabs.d.128" ||
  2010. Name.startswith("avx2.pabs") ||
  2011. Name.startswith("avx512.mask.pabs"))) {
  2012. Rep = upgradeAbs(Builder, *CI);
  2013. } else if (IsX86 && (Name == "sse41.pmaxsb" ||
  2014. Name == "sse2.pmaxs.w" ||
  2015. Name == "sse41.pmaxsd" ||
  2016. Name.startswith("avx2.pmaxs") ||
  2017. Name.startswith("avx512.mask.pmaxs"))) {
  2018. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
  2019. } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
  2020. Name == "sse41.pmaxuw" ||
  2021. Name == "sse41.pmaxud" ||
  2022. Name.startswith("avx2.pmaxu") ||
  2023. Name.startswith("avx512.mask.pmaxu"))) {
  2024. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
  2025. } else if (IsX86 && (Name == "sse41.pminsb" ||
  2026. Name == "sse2.pmins.w" ||
  2027. Name == "sse41.pminsd" ||
  2028. Name.startswith("avx2.pmins") ||
  2029. Name.startswith("avx512.mask.pmins"))) {
  2030. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
  2031. } else if (IsX86 && (Name == "sse2.pminu.b" ||
  2032. Name == "sse41.pminuw" ||
  2033. Name == "sse41.pminud" ||
  2034. Name.startswith("avx2.pminu") ||
  2035. Name.startswith("avx512.mask.pminu"))) {
  2036. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
  2037. } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
  2038. Name == "avx2.pmulu.dq" ||
  2039. Name == "avx512.pmulu.dq.512" ||
  2040. Name.startswith("avx512.mask.pmulu.dq."))) {
  2041. Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
  2042. } else if (IsX86 && (Name == "sse41.pmuldq" ||
  2043. Name == "avx2.pmul.dq" ||
  2044. Name == "avx512.pmul.dq.512" ||
  2045. Name.startswith("avx512.mask.pmul.dq."))) {
  2046. Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
  2047. } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
  2048. Name == "sse2.cvtsi2sd" ||
  2049. Name == "sse.cvtsi642ss" ||
  2050. Name == "sse2.cvtsi642sd")) {
  2051. Rep = Builder.CreateSIToFP(
  2052. CI->getArgOperand(1),
  2053. cast<VectorType>(CI->getType())->getElementType());
  2054. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
  2055. } else if (IsX86 && Name == "avx512.cvtusi2sd") {
  2056. Rep = Builder.CreateUIToFP(
  2057. CI->getArgOperand(1),
  2058. cast<VectorType>(CI->getType())->getElementType());
  2059. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
  2060. } else if (IsX86 && Name == "sse2.cvtss2sd") {
  2061. Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
  2062. Rep = Builder.CreateFPExt(
  2063. Rep, cast<VectorType>(CI->getType())->getElementType());
  2064. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
  2065. } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
  2066. Name == "sse2.cvtdq2ps" ||
  2067. Name == "avx.cvtdq2.pd.256" ||
  2068. Name == "avx.cvtdq2.ps.256" ||
  2069. Name.startswith("avx512.mask.cvtdq2pd.") ||
  2070. Name.startswith("avx512.mask.cvtudq2pd.") ||
  2071. Name.startswith("avx512.mask.cvtdq2ps.") ||
  2072. Name.startswith("avx512.mask.cvtudq2ps.") ||
  2073. Name.startswith("avx512.mask.cvtqq2pd.") ||
  2074. Name.startswith("avx512.mask.cvtuqq2pd.") ||
  2075. Name == "avx512.mask.cvtqq2ps.256" ||
  2076. Name == "avx512.mask.cvtqq2ps.512" ||
  2077. Name == "avx512.mask.cvtuqq2ps.256" ||
  2078. Name == "avx512.mask.cvtuqq2ps.512" ||
  2079. Name == "sse2.cvtps2pd" ||
  2080. Name == "avx.cvt.ps2.pd.256" ||
  2081. Name == "avx512.mask.cvtps2pd.128" ||
  2082. Name == "avx512.mask.cvtps2pd.256")) {
  2083. auto *DstTy = cast<FixedVectorType>(CI->getType());
  2084. Rep = CI->getArgOperand(0);
  2085. auto *SrcTy = cast<FixedVectorType>(Rep->getType());
  2086. unsigned NumDstElts = DstTy->getNumElements();
  2087. if (NumDstElts < SrcTy->getNumElements()) {
  2088. assert(NumDstElts == 2 && "Unexpected vector size");
  2089. Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
  2090. }
  2091. bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
  2092. bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
  2093. if (IsPS2PD)
  2094. Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
  2095. else if (CI->getNumArgOperands() == 4 &&
  2096. (!isa<ConstantInt>(CI->getArgOperand(3)) ||
  2097. cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
  2098. Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
  2099. : Intrinsic::x86_avx512_sitofp_round;
  2100. Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
  2101. { DstTy, SrcTy });
  2102. Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
  2103. } else {
  2104. Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
  2105. : Builder.CreateSIToFP(Rep, DstTy, "cvt");
  2106. }
  2107. if (CI->getNumArgOperands() >= 3)
  2108. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2109. CI->getArgOperand(1));
  2110. } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
  2111. Name.startswith("vcvtph2ps."))) {
  2112. auto *DstTy = cast<FixedVectorType>(CI->getType());
  2113. Rep = CI->getArgOperand(0);
  2114. auto *SrcTy = cast<FixedVectorType>(Rep->getType());
  2115. unsigned NumDstElts = DstTy->getNumElements();
  2116. if (NumDstElts != SrcTy->getNumElements()) {
  2117. assert(NumDstElts == 4 && "Unexpected vector size");
  2118. Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
  2119. }
  2120. Rep = Builder.CreateBitCast(
  2121. Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
  2122. Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
  2123. if (CI->getNumArgOperands() >= 3)
  2124. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2125. CI->getArgOperand(1));
  2126. } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
  2127. Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
  2128. CI->getArgOperand(1), CI->getArgOperand(2),
  2129. /*Aligned*/false);
  2130. } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
  2131. Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
  2132. CI->getArgOperand(1),CI->getArgOperand(2),
  2133. /*Aligned*/true);
  2134. } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
  2135. auto *ResultTy = cast<FixedVectorType>(CI->getType());
  2136. Type *PtrTy = ResultTy->getElementType();
  2137. // Cast the pointer to element type.
  2138. Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
  2139. llvm::PointerType::getUnqual(PtrTy));
  2140. Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
  2141. ResultTy->getNumElements());
  2142. Function *ELd = Intrinsic::getDeclaration(F->getParent(),
  2143. Intrinsic::masked_expandload,
  2144. ResultTy);
  2145. Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
  2146. } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
  2147. auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
  2148. Type *PtrTy = ResultTy->getElementType();
  2149. // Cast the pointer to element type.
  2150. Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
  2151. llvm::PointerType::getUnqual(PtrTy));
  2152. Value *MaskVec =
  2153. getX86MaskVec(Builder, CI->getArgOperand(2),
  2154. cast<FixedVectorType>(ResultTy)->getNumElements());
  2155. Function *CSt = Intrinsic::getDeclaration(F->getParent(),
  2156. Intrinsic::masked_compressstore,
  2157. ResultTy);
  2158. Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
  2159. } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
  2160. Name.startswith("avx512.mask.expand."))) {
  2161. auto *ResultTy = cast<FixedVectorType>(CI->getType());
  2162. Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
  2163. ResultTy->getNumElements());
  2164. bool IsCompress = Name[12] == 'c';
  2165. Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
  2166. : Intrinsic::x86_avx512_mask_expand;
  2167. Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
  2168. Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
  2169. MaskVec });
  2170. } else if (IsX86 && Name.startswith("xop.vpcom")) {
  2171. bool IsSigned;
  2172. if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
  2173. Name.endswith("uq"))
  2174. IsSigned = false;
  2175. else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
  2176. Name.endswith("q"))
  2177. IsSigned = true;
  2178. else
  2179. llvm_unreachable("Unknown suffix");
  2180. unsigned Imm;
  2181. if (CI->getNumArgOperands() == 3) {
  2182. Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2183. } else {
  2184. Name = Name.substr(9); // strip off "xop.vpcom"
  2185. if (Name.startswith("lt"))
  2186. Imm = 0;
  2187. else if (Name.startswith("le"))
  2188. Imm = 1;
  2189. else if (Name.startswith("gt"))
  2190. Imm = 2;
  2191. else if (Name.startswith("ge"))
  2192. Imm = 3;
  2193. else if (Name.startswith("eq"))
  2194. Imm = 4;
  2195. else if (Name.startswith("ne"))
  2196. Imm = 5;
  2197. else if (Name.startswith("false"))
  2198. Imm = 6;
  2199. else if (Name.startswith("true"))
  2200. Imm = 7;
  2201. else
  2202. llvm_unreachable("Unknown condition");
  2203. }
  2204. Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
  2205. } else if (IsX86 && Name.startswith("xop.vpcmov")) {
  2206. Value *Sel = CI->getArgOperand(2);
  2207. Value *NotSel = Builder.CreateNot(Sel);
  2208. Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
  2209. Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
  2210. Rep = Builder.CreateOr(Sel0, Sel1);
  2211. } else if (IsX86 && (Name.startswith("xop.vprot") ||
  2212. Name.startswith("avx512.prol") ||
  2213. Name.startswith("avx512.mask.prol"))) {
  2214. Rep = upgradeX86Rotate(Builder, *CI, false);
  2215. } else if (IsX86 && (Name.startswith("avx512.pror") ||
  2216. Name.startswith("avx512.mask.pror"))) {
  2217. Rep = upgradeX86Rotate(Builder, *CI, true);
  2218. } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
  2219. Name.startswith("avx512.mask.vpshld") ||
  2220. Name.startswith("avx512.maskz.vpshld"))) {
  2221. bool ZeroMask = Name[11] == 'z';
  2222. Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
  2223. } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
  2224. Name.startswith("avx512.mask.vpshrd") ||
  2225. Name.startswith("avx512.maskz.vpshrd"))) {
  2226. bool ZeroMask = Name[11] == 'z';
  2227. Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
  2228. } else if (IsX86 && Name == "sse42.crc32.64.8") {
  2229. Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
  2230. Intrinsic::x86_sse42_crc32_32_8);
  2231. Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
  2232. Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
  2233. Rep = Builder.CreateZExt(Rep, CI->getType(), "");
  2234. } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
  2235. Name.startswith("avx512.vbroadcast.s"))) {
  2236. // Replace broadcasts with a series of insertelements.
  2237. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2238. Type *EltTy = VecTy->getElementType();
  2239. unsigned EltNum = VecTy->getNumElements();
  2240. Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
  2241. EltTy->getPointerTo());
  2242. Value *Load = Builder.CreateLoad(EltTy, Cast);
  2243. Type *I32Ty = Type::getInt32Ty(C);
  2244. Rep = UndefValue::get(VecTy);
  2245. for (unsigned I = 0; I < EltNum; ++I)
  2246. Rep = Builder.CreateInsertElement(Rep, Load,
  2247. ConstantInt::get(I32Ty, I));
  2248. } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
  2249. Name.startswith("sse41.pmovzx") ||
  2250. Name.startswith("avx2.pmovsx") ||
  2251. Name.startswith("avx2.pmovzx") ||
  2252. Name.startswith("avx512.mask.pmovsx") ||
  2253. Name.startswith("avx512.mask.pmovzx"))) {
  2254. auto *DstTy = cast<FixedVectorType>(CI->getType());
  2255. unsigned NumDstElts = DstTy->getNumElements();
  2256. // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
  2257. SmallVector<int, 8> ShuffleMask(NumDstElts);
  2258. for (unsigned i = 0; i != NumDstElts; ++i)
  2259. ShuffleMask[i] = i;
  2260. Value *SV =
  2261. Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
  2262. bool DoSext = (StringRef::npos != Name.find("pmovsx"));
  2263. Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
  2264. : Builder.CreateZExt(SV, DstTy);
  2265. // If there are 3 arguments, it's a masked intrinsic so we need a select.
  2266. if (CI->getNumArgOperands() == 3)
  2267. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2268. CI->getArgOperand(1));
  2269. } else if (Name == "avx512.mask.pmov.qd.256" ||
  2270. Name == "avx512.mask.pmov.qd.512" ||
  2271. Name == "avx512.mask.pmov.wb.256" ||
  2272. Name == "avx512.mask.pmov.wb.512") {
  2273. Type *Ty = CI->getArgOperand(1)->getType();
  2274. Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
  2275. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2276. CI->getArgOperand(1));
  2277. } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
  2278. Name == "avx2.vbroadcasti128")) {
  2279. // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
  2280. Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
  2281. unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
  2282. auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
  2283. Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
  2284. PointerType::getUnqual(VT));
  2285. Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
  2286. if (NumSrcElts == 2)
  2287. Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
  2288. else
  2289. Rep = Builder.CreateShuffleVector(
  2290. Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
  2291. } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
  2292. Name.startswith("avx512.mask.shuf.f"))) {
  2293. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2294. Type *VT = CI->getType();
  2295. unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
  2296. unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
  2297. unsigned ControlBitsMask = NumLanes - 1;
  2298. unsigned NumControlBits = NumLanes / 2;
  2299. SmallVector<int, 8> ShuffleMask(0);
  2300. for (unsigned l = 0; l != NumLanes; ++l) {
  2301. unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
  2302. // We actually need the other source.
  2303. if (l >= NumLanes / 2)
  2304. LaneMask += NumLanes;
  2305. for (unsigned i = 0; i != NumElementsInLane; ++i)
  2306. ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
  2307. }
  2308. Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
  2309. CI->getArgOperand(1), ShuffleMask);
  2310. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
  2311. CI->getArgOperand(3));
  2312. }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
  2313. Name.startswith("avx512.mask.broadcasti"))) {
  2314. unsigned NumSrcElts =
  2315. cast<FixedVectorType>(CI->getArgOperand(0)->getType())
  2316. ->getNumElements();
  2317. unsigned NumDstElts =
  2318. cast<FixedVectorType>(CI->getType())->getNumElements();
  2319. SmallVector<int, 8> ShuffleMask(NumDstElts);
  2320. for (unsigned i = 0; i != NumDstElts; ++i)
  2321. ShuffleMask[i] = i % NumSrcElts;
  2322. Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
  2323. CI->getArgOperand(0),
  2324. ShuffleMask);
  2325. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2326. CI->getArgOperand(1));
  2327. } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
  2328. Name.startswith("avx2.vbroadcast") ||
  2329. Name.startswith("avx512.pbroadcast") ||
  2330. Name.startswith("avx512.mask.broadcast.s"))) {
  2331. // Replace vp?broadcasts with a vector shuffle.
  2332. Value *Op = CI->getArgOperand(0);
  2333. ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
  2334. Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
  2335. SmallVector<int, 8> M;
  2336. ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
  2337. Rep = Builder.CreateShuffleVector(Op, M);
  2338. if (CI->getNumArgOperands() == 3)
  2339. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2340. CI->getArgOperand(1));
  2341. } else if (IsX86 && (Name.startswith("sse2.padds.") ||
  2342. Name.startswith("avx2.padds.") ||
  2343. Name.startswith("avx512.padds.") ||
  2344. Name.startswith("avx512.mask.padds."))) {
  2345. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
  2346. } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
  2347. Name.startswith("avx2.psubs.") ||
  2348. Name.startswith("avx512.psubs.") ||
  2349. Name.startswith("avx512.mask.psubs."))) {
  2350. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
  2351. } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
  2352. Name.startswith("avx2.paddus.") ||
  2353. Name.startswith("avx512.mask.paddus."))) {
  2354. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
  2355. } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
  2356. Name.startswith("avx2.psubus.") ||
  2357. Name.startswith("avx512.mask.psubus."))) {
  2358. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
  2359. } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
  2360. Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
  2361. CI->getArgOperand(1),
  2362. CI->getArgOperand(2),
  2363. CI->getArgOperand(3),
  2364. CI->getArgOperand(4),
  2365. false);
  2366. } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
  2367. Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
  2368. CI->getArgOperand(1),
  2369. CI->getArgOperand(2),
  2370. CI->getArgOperand(3),
  2371. CI->getArgOperand(4),
  2372. true);
  2373. } else if (IsX86 && (Name == "sse2.psll.dq" ||
  2374. Name == "avx2.psll.dq")) {
  2375. // 128/256-bit shift left specified in bits.
  2376. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2377. Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
  2378. Shift / 8); // Shift is in bits.
  2379. } else if (IsX86 && (Name == "sse2.psrl.dq" ||
  2380. Name == "avx2.psrl.dq")) {
  2381. // 128/256-bit shift right specified in bits.
  2382. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2383. Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
  2384. Shift / 8); // Shift is in bits.
  2385. } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
  2386. Name == "avx2.psll.dq.bs" ||
  2387. Name == "avx512.psll.dq.512")) {
  2388. // 128/256/512-bit shift left specified in bytes.
  2389. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2390. Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
  2391. } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
  2392. Name == "avx2.psrl.dq.bs" ||
  2393. Name == "avx512.psrl.dq.512")) {
  2394. // 128/256/512-bit shift right specified in bytes.
  2395. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2396. Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
  2397. } else if (IsX86 && (Name == "sse41.pblendw" ||
  2398. Name.startswith("sse41.blendp") ||
  2399. Name.startswith("avx.blend.p") ||
  2400. Name == "avx2.pblendw" ||
  2401. Name.startswith("avx2.pblendd."))) {
  2402. Value *Op0 = CI->getArgOperand(0);
  2403. Value *Op1 = CI->getArgOperand(1);
  2404. unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2405. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2406. unsigned NumElts = VecTy->getNumElements();
  2407. SmallVector<int, 16> Idxs(NumElts);
  2408. for (unsigned i = 0; i != NumElts; ++i)
  2409. Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
  2410. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2411. } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
  2412. Name == "avx2.vinserti128" ||
  2413. Name.startswith("avx512.mask.insert"))) {
  2414. Value *Op0 = CI->getArgOperand(0);
  2415. Value *Op1 = CI->getArgOperand(1);
  2416. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2417. unsigned DstNumElts =
  2418. cast<FixedVectorType>(CI->getType())->getNumElements();
  2419. unsigned SrcNumElts =
  2420. cast<FixedVectorType>(Op1->getType())->getNumElements();
  2421. unsigned Scale = DstNumElts / SrcNumElts;
  2422. // Mask off the high bits of the immediate value; hardware ignores those.
  2423. Imm = Imm % Scale;
  2424. // Extend the second operand into a vector the size of the destination.
  2425. SmallVector<int, 8> Idxs(DstNumElts);
  2426. for (unsigned i = 0; i != SrcNumElts; ++i)
  2427. Idxs[i] = i;
  2428. for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
  2429. Idxs[i] = SrcNumElts;
  2430. Rep = Builder.CreateShuffleVector(Op1, Idxs);
  2431. // Insert the second operand into the first operand.
  2432. // Note that there is no guarantee that instruction lowering will actually
  2433. // produce a vinsertf128 instruction for the created shuffles. In
  2434. // particular, the 0 immediate case involves no lane changes, so it can
  2435. // be handled as a blend.
  2436. // Example of shuffle mask for 32-bit elements:
  2437. // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  2438. // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
  2439. // First fill with identify mask.
  2440. for (unsigned i = 0; i != DstNumElts; ++i)
  2441. Idxs[i] = i;
  2442. // Then replace the elements where we need to insert.
  2443. for (unsigned i = 0; i != SrcNumElts; ++i)
  2444. Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
  2445. Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
  2446. // If the intrinsic has a mask operand, handle that.
  2447. if (CI->getNumArgOperands() == 5)
  2448. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
  2449. CI->getArgOperand(3));
  2450. } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
  2451. Name == "avx2.vextracti128" ||
  2452. Name.startswith("avx512.mask.vextract"))) {
  2453. Value *Op0 = CI->getArgOperand(0);
  2454. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2455. unsigned DstNumElts =
  2456. cast<FixedVectorType>(CI->getType())->getNumElements();
  2457. unsigned SrcNumElts =
  2458. cast<FixedVectorType>(Op0->getType())->getNumElements();
  2459. unsigned Scale = SrcNumElts / DstNumElts;
  2460. // Mask off the high bits of the immediate value; hardware ignores those.
  2461. Imm = Imm % Scale;
  2462. // Get indexes for the subvector of the input vector.
  2463. SmallVector<int, 8> Idxs(DstNumElts);
  2464. for (unsigned i = 0; i != DstNumElts; ++i) {
  2465. Idxs[i] = i + (Imm * DstNumElts);
  2466. }
  2467. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2468. // If the intrinsic has a mask operand, handle that.
  2469. if (CI->getNumArgOperands() == 4)
  2470. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2471. CI->getArgOperand(2));
  2472. } else if (!IsX86 && Name == "stackprotectorcheck") {
  2473. Rep = nullptr;
  2474. } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
  2475. Name.startswith("avx512.mask.perm.di."))) {
  2476. Value *Op0 = CI->getArgOperand(0);
  2477. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2478. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2479. unsigned NumElts = VecTy->getNumElements();
  2480. SmallVector<int, 8> Idxs(NumElts);
  2481. for (unsigned i = 0; i != NumElts; ++i)
  2482. Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
  2483. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2484. if (CI->getNumArgOperands() == 4)
  2485. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2486. CI->getArgOperand(2));
  2487. } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
  2488. Name == "avx2.vperm2i128")) {
  2489. // The immediate permute control byte looks like this:
  2490. // [1:0] - select 128 bits from sources for low half of destination
  2491. // [2] - ignore
  2492. // [3] - zero low half of destination
  2493. // [5:4] - select 128 bits from sources for high half of destination
  2494. // [6] - ignore
  2495. // [7] - zero high half of destination
  2496. uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2497. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2498. unsigned HalfSize = NumElts / 2;
  2499. SmallVector<int, 8> ShuffleMask(NumElts);
  2500. // Determine which operand(s) are actually in use for this instruction.
  2501. Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
  2502. Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
  2503. // If needed, replace operands based on zero mask.
  2504. V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
  2505. V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
  2506. // Permute low half of result.
  2507. unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
  2508. for (unsigned i = 0; i < HalfSize; ++i)
  2509. ShuffleMask[i] = StartIndex + i;
  2510. // Permute high half of result.
  2511. StartIndex = (Imm & 0x10) ? HalfSize : 0;
  2512. for (unsigned i = 0; i < HalfSize; ++i)
  2513. ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
  2514. Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
  2515. } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
  2516. Name == "sse2.pshuf.d" ||
  2517. Name.startswith("avx512.mask.vpermil.p") ||
  2518. Name.startswith("avx512.mask.pshuf.d."))) {
  2519. Value *Op0 = CI->getArgOperand(0);
  2520. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2521. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2522. unsigned NumElts = VecTy->getNumElements();
  2523. // Calculate the size of each index in the immediate.
  2524. unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
  2525. unsigned IdxMask = ((1 << IdxSize) - 1);
  2526. SmallVector<int, 8> Idxs(NumElts);
  2527. // Lookup the bits for this element, wrapping around the immediate every
  2528. // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
  2529. // to offset by the first index of each group.
  2530. for (unsigned i = 0; i != NumElts; ++i)
  2531. Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
  2532. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2533. if (CI->getNumArgOperands() == 4)
  2534. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2535. CI->getArgOperand(2));
  2536. } else if (IsX86 && (Name == "sse2.pshufl.w" ||
  2537. Name.startswith("avx512.mask.pshufl.w."))) {
  2538. Value *Op0 = CI->getArgOperand(0);
  2539. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2540. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2541. SmallVector<int, 16> Idxs(NumElts);
  2542. for (unsigned l = 0; l != NumElts; l += 8) {
  2543. for (unsigned i = 0; i != 4; ++i)
  2544. Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
  2545. for (unsigned i = 4; i != 8; ++i)
  2546. Idxs[i + l] = i + l;
  2547. }
  2548. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2549. if (CI->getNumArgOperands() == 4)
  2550. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2551. CI->getArgOperand(2));
  2552. } else if (IsX86 && (Name == "sse2.pshufh.w" ||
  2553. Name.startswith("avx512.mask.pshufh.w."))) {
  2554. Value *Op0 = CI->getArgOperand(0);
  2555. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2556. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2557. SmallVector<int, 16> Idxs(NumElts);
  2558. for (unsigned l = 0; l != NumElts; l += 8) {
  2559. for (unsigned i = 0; i != 4; ++i)
  2560. Idxs[i + l] = i + l;
  2561. for (unsigned i = 0; i != 4; ++i)
  2562. Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
  2563. }
  2564. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2565. if (CI->getNumArgOperands() == 4)
  2566. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2567. CI->getArgOperand(2));
  2568. } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
  2569. Value *Op0 = CI->getArgOperand(0);
  2570. Value *Op1 = CI->getArgOperand(1);
  2571. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2572. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2573. unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2574. unsigned HalfLaneElts = NumLaneElts / 2;
  2575. SmallVector<int, 16> Idxs(NumElts);
  2576. for (unsigned i = 0; i != NumElts; ++i) {
  2577. // Base index is the starting element of the lane.
  2578. Idxs[i] = i - (i % NumLaneElts);
  2579. // If we are half way through the lane switch to the other source.
  2580. if ((i % NumLaneElts) >= HalfLaneElts)
  2581. Idxs[i] += NumElts;
  2582. // Now select the specific element. By adding HalfLaneElts bits from
  2583. // the immediate. Wrapping around the immediate every 8-bits.
  2584. Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
  2585. }
  2586. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2587. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
  2588. CI->getArgOperand(3));
  2589. } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
  2590. Name.startswith("avx512.mask.movshdup") ||
  2591. Name.startswith("avx512.mask.movsldup"))) {
  2592. Value *Op0 = CI->getArgOperand(0);
  2593. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2594. unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2595. unsigned Offset = 0;
  2596. if (Name.startswith("avx512.mask.movshdup."))
  2597. Offset = 1;
  2598. SmallVector<int, 16> Idxs(NumElts);
  2599. for (unsigned l = 0; l != NumElts; l += NumLaneElts)
  2600. for (unsigned i = 0; i != NumLaneElts; i += 2) {
  2601. Idxs[i + l + 0] = i + l + Offset;
  2602. Idxs[i + l + 1] = i + l + Offset;
  2603. }
  2604. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2605. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2606. CI->getArgOperand(1));
  2607. } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
  2608. Name.startswith("avx512.mask.unpckl."))) {
  2609. Value *Op0 = CI->getArgOperand(0);
  2610. Value *Op1 = CI->getArgOperand(1);
  2611. int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2612. int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2613. SmallVector<int, 64> Idxs(NumElts);
  2614. for (int l = 0; l != NumElts; l += NumLaneElts)
  2615. for (int i = 0; i != NumLaneElts; ++i)
  2616. Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
  2617. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2618. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2619. CI->getArgOperand(2));
  2620. } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
  2621. Name.startswith("avx512.mask.unpckh."))) {
  2622. Value *Op0 = CI->getArgOperand(0);
  2623. Value *Op1 = CI->getArgOperand(1);
  2624. int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2625. int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2626. SmallVector<int, 64> Idxs(NumElts);
  2627. for (int l = 0; l != NumElts; l += NumLaneElts)
  2628. for (int i = 0; i != NumLaneElts; ++i)
  2629. Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
  2630. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2631. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2632. CI->getArgOperand(2));
  2633. } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
  2634. Name.startswith("avx512.mask.pand."))) {
  2635. VectorType *FTy = cast<VectorType>(CI->getType());
  2636. VectorType *ITy = VectorType::getInteger(FTy);
  2637. Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
  2638. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2639. Rep = Builder.CreateBitCast(Rep, FTy);
  2640. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2641. CI->getArgOperand(2));
  2642. } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
  2643. Name.startswith("avx512.mask.pandn."))) {
  2644. VectorType *FTy = cast<VectorType>(CI->getType());
  2645. VectorType *ITy = VectorType::getInteger(FTy);
  2646. Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
  2647. Rep = Builder.CreateAnd(Rep,
  2648. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2649. Rep = Builder.CreateBitCast(Rep, FTy);
  2650. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2651. CI->getArgOperand(2));
  2652. } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
  2653. Name.startswith("avx512.mask.por."))) {
  2654. VectorType *FTy = cast<VectorType>(CI->getType());
  2655. VectorType *ITy = VectorType::getInteger(FTy);
  2656. Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
  2657. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2658. Rep = Builder.CreateBitCast(Rep, FTy);
  2659. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2660. CI->getArgOperand(2));
  2661. } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
  2662. Name.startswith("avx512.mask.pxor."))) {
  2663. VectorType *FTy = cast<VectorType>(CI->getType());
  2664. VectorType *ITy = VectorType::getInteger(FTy);
  2665. Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
  2666. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2667. Rep = Builder.CreateBitCast(Rep, FTy);
  2668. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2669. CI->getArgOperand(2));
  2670. } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
  2671. Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
  2672. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2673. CI->getArgOperand(2));
  2674. } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
  2675. Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
  2676. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2677. CI->getArgOperand(2));
  2678. } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
  2679. Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
  2680. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2681. CI->getArgOperand(2));
  2682. } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
  2683. if (Name.endswith(".512")) {
  2684. Intrinsic::ID IID;
  2685. if (Name[17] == 's')
  2686. IID = Intrinsic::x86_avx512_add_ps_512;
  2687. else
  2688. IID = Intrinsic::x86_avx512_add_pd_512;
  2689. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2690. { CI->getArgOperand(0), CI->getArgOperand(1),
  2691. CI->getArgOperand(4) });
  2692. } else {
  2693. Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
  2694. }
  2695. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2696. CI->getArgOperand(2));
  2697. } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
  2698. if (Name.endswith(".512")) {
  2699. Intrinsic::ID IID;
  2700. if (Name[17] == 's')
  2701. IID = Intrinsic::x86_avx512_div_ps_512;
  2702. else
  2703. IID = Intrinsic::x86_avx512_div_pd_512;
  2704. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2705. { CI->getArgOperand(0), CI->getArgOperand(1),
  2706. CI->getArgOperand(4) });
  2707. } else {
  2708. Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
  2709. }
  2710. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2711. CI->getArgOperand(2));
  2712. } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
  2713. if (Name.endswith(".512")) {
  2714. Intrinsic::ID IID;
  2715. if (Name[17] == 's')
  2716. IID = Intrinsic::x86_avx512_mul_ps_512;
  2717. else
  2718. IID = Intrinsic::x86_avx512_mul_pd_512;
  2719. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2720. { CI->getArgOperand(0), CI->getArgOperand(1),
  2721. CI->getArgOperand(4) });
  2722. } else {
  2723. Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
  2724. }
  2725. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2726. CI->getArgOperand(2));
  2727. } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
  2728. if (Name.endswith(".512")) {
  2729. Intrinsic::ID IID;
  2730. if (Name[17] == 's')
  2731. IID = Intrinsic::x86_avx512_sub_ps_512;
  2732. else
  2733. IID = Intrinsic::x86_avx512_sub_pd_512;
  2734. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2735. { CI->getArgOperand(0), CI->getArgOperand(1),
  2736. CI->getArgOperand(4) });
  2737. } else {
  2738. Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
  2739. }
  2740. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2741. CI->getArgOperand(2));
  2742. } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
  2743. Name.startswith("avx512.mask.min.p")) &&
  2744. Name.drop_front(18) == ".512") {
  2745. bool IsDouble = Name[17] == 'd';
  2746. bool IsMin = Name[13] == 'i';
  2747. static const Intrinsic::ID MinMaxTbl[2][2] = {
  2748. { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
  2749. { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
  2750. };
  2751. Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
  2752. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2753. { CI->getArgOperand(0), CI->getArgOperand(1),
  2754. CI->getArgOperand(4) });
  2755. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2756. CI->getArgOperand(2));
  2757. } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
  2758. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
  2759. Intrinsic::ctlz,
  2760. CI->getType()),
  2761. { CI->getArgOperand(0), Builder.getInt1(false) });
  2762. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2763. CI->getArgOperand(1));
  2764. } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
  2765. bool IsImmediate = Name[16] == 'i' ||
  2766. (Name.size() > 18 && Name[18] == 'i');
  2767. bool IsVariable = Name[16] == 'v';
  2768. char Size = Name[16] == '.' ? Name[17] :
  2769. Name[17] == '.' ? Name[18] :
  2770. Name[18] == '.' ? Name[19] :
  2771. Name[20];
  2772. Intrinsic::ID IID;
  2773. if (IsVariable && Name[17] != '.') {
  2774. if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
  2775. IID = Intrinsic::x86_avx2_psllv_q;
  2776. else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
  2777. IID = Intrinsic::x86_avx2_psllv_q_256;
  2778. else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
  2779. IID = Intrinsic::x86_avx2_psllv_d;
  2780. else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
  2781. IID = Intrinsic::x86_avx2_psllv_d_256;
  2782. else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
  2783. IID = Intrinsic::x86_avx512_psllv_w_128;
  2784. else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
  2785. IID = Intrinsic::x86_avx512_psllv_w_256;
  2786. else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
  2787. IID = Intrinsic::x86_avx512_psllv_w_512;
  2788. else
  2789. llvm_unreachable("Unexpected size");
  2790. } else if (Name.endswith(".128")) {
  2791. if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
  2792. IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
  2793. : Intrinsic::x86_sse2_psll_d;
  2794. else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
  2795. IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
  2796. : Intrinsic::x86_sse2_psll_q;
  2797. else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
  2798. IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
  2799. : Intrinsic::x86_sse2_psll_w;
  2800. else
  2801. llvm_unreachable("Unexpected size");
  2802. } else if (Name.endswith(".256")) {
  2803. if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
  2804. IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
  2805. : Intrinsic::x86_avx2_psll_d;
  2806. else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
  2807. IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
  2808. : Intrinsic::x86_avx2_psll_q;
  2809. else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
  2810. IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
  2811. : Intrinsic::x86_avx2_psll_w;
  2812. else
  2813. llvm_unreachable("Unexpected size");
  2814. } else {
  2815. if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
  2816. IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
  2817. IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
  2818. Intrinsic::x86_avx512_psll_d_512;
  2819. else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
  2820. IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
  2821. IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
  2822. Intrinsic::x86_avx512_psll_q_512;
  2823. else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
  2824. IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
  2825. : Intrinsic::x86_avx512_psll_w_512;
  2826. else
  2827. llvm_unreachable("Unexpected size");
  2828. }
  2829. Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
  2830. } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
  2831. bool IsImmediate = Name[16] == 'i' ||
  2832. (Name.size() > 18 && Name[18] == 'i');
  2833. bool IsVariable = Name[16] == 'v';
  2834. char Size = Name[16] == '.' ? Name[17] :
  2835. Name[17] == '.' ? Name[18] :
  2836. Name[18] == '.' ? Name[19] :
  2837. Name[20];
  2838. Intrinsic::ID IID;
  2839. if (IsVariable && Name[17] != '.') {
  2840. if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
  2841. IID = Intrinsic::x86_avx2_psrlv_q;
  2842. else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
  2843. IID = Intrinsic::x86_avx2_psrlv_q_256;
  2844. else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
  2845. IID = Intrinsic::x86_avx2_psrlv_d;
  2846. else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
  2847. IID = Intrinsic::x86_avx2_psrlv_d_256;
  2848. else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
  2849. IID = Intrinsic::x86_avx512_psrlv_w_128;
  2850. else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
  2851. IID = Intrinsic::x86_avx512_psrlv_w_256;
  2852. else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
  2853. IID = Intrinsic::x86_avx512_psrlv_w_512;
  2854. else
  2855. llvm_unreachable("Unexpected size");
  2856. } else if (Name.endswith(".128")) {
  2857. if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
  2858. IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
  2859. : Intrinsic::x86_sse2_psrl_d;
  2860. else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
  2861. IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
  2862. : Intrinsic::x86_sse2_psrl_q;
  2863. else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
  2864. IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
  2865. : Intrinsic::x86_sse2_psrl_w;
  2866. else
  2867. llvm_unreachable("Unexpected size");
  2868. } else if (Name.endswith(".256")) {
  2869. if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
  2870. IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
  2871. : Intrinsic::x86_avx2_psrl_d;
  2872. else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
  2873. IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
  2874. : Intrinsic::x86_avx2_psrl_q;
  2875. else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
  2876. IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
  2877. : Intrinsic::x86_avx2_psrl_w;
  2878. else
  2879. llvm_unreachable("Unexpected size");
  2880. } else {
  2881. if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
  2882. IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
  2883. IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
  2884. Intrinsic::x86_avx512_psrl_d_512;
  2885. else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
  2886. IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
  2887. IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
  2888. Intrinsic::x86_avx512_psrl_q_512;
  2889. else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
  2890. IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
  2891. : Intrinsic::x86_avx512_psrl_w_512;
  2892. else
  2893. llvm_unreachable("Unexpected size");
  2894. }
  2895. Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
  2896. } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
  2897. bool IsImmediate = Name[16] == 'i' ||
  2898. (Name.size() > 18 && Name[18] == 'i');
  2899. bool IsVariable = Name[16] == 'v';
  2900. char Size = Name[16] == '.' ? Name[17] :
  2901. Name[17] == '.' ? Name[18] :
  2902. Name[18] == '.' ? Name[19] :
  2903. Name[20];
  2904. Intrinsic::ID IID;
  2905. if (IsVariable && Name[17] != '.') {
  2906. if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
  2907. IID = Intrinsic::x86_avx2_psrav_d;
  2908. else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
  2909. IID = Intrinsic::x86_avx2_psrav_d_256;
  2910. else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
  2911. IID = Intrinsic::x86_avx512_psrav_w_128;
  2912. else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
  2913. IID = Intrinsic::x86_avx512_psrav_w_256;
  2914. else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
  2915. IID = Intrinsic::x86_avx512_psrav_w_512;
  2916. else
  2917. llvm_unreachable("Unexpected size");
  2918. } else if (Name.endswith(".128")) {
  2919. if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
  2920. IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
  2921. : Intrinsic::x86_sse2_psra_d;
  2922. else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
  2923. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
  2924. IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
  2925. Intrinsic::x86_avx512_psra_q_128;
  2926. else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
  2927. IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
  2928. : Intrinsic::x86_sse2_psra_w;
  2929. else
  2930. llvm_unreachable("Unexpected size");
  2931. } else if (Name.endswith(".256")) {
  2932. if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
  2933. IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
  2934. : Intrinsic::x86_avx2_psra_d;
  2935. else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
  2936. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
  2937. IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
  2938. Intrinsic::x86_avx512_psra_q_256;
  2939. else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
  2940. IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
  2941. : Intrinsic::x86_avx2_psra_w;
  2942. else
  2943. llvm_unreachable("Unexpected size");
  2944. } else {
  2945. if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
  2946. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
  2947. IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
  2948. Intrinsic::x86_avx512_psra_d_512;
  2949. else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
  2950. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
  2951. IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
  2952. Intrinsic::x86_avx512_psra_q_512;
  2953. else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
  2954. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
  2955. : Intrinsic::x86_avx512_psra_w_512;
  2956. else
  2957. llvm_unreachable("Unexpected size");
  2958. }
  2959. Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
  2960. } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
  2961. Rep = upgradeMaskedMove(Builder, *CI);
  2962. } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
  2963. Rep = UpgradeMaskToInt(Builder, *CI);
  2964. } else if (IsX86 && Name.endswith(".movntdqa")) {
  2965. Module *M = F->getParent();
  2966. MDNode *Node = MDNode::get(
  2967. C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
  2968. Value *Ptr = CI->getArgOperand(0);
  2969. // Convert the type of the pointer to a pointer to the stored type.
  2970. Value *BC = Builder.CreateBitCast(
  2971. Ptr, PointerType::getUnqual(CI->getType()), "cast");
  2972. LoadInst *LI = Builder.CreateAlignedLoad(
  2973. CI->getType(), BC,
  2974. Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
  2975. LI->setMetadata(M->getMDKindID("nontemporal"), Node);
  2976. Rep = LI;
  2977. } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
  2978. Name.startswith("fma.vfmsub.") ||
  2979. Name.startswith("fma.vfnmadd.") ||
  2980. Name.startswith("fma.vfnmsub."))) {
  2981. bool NegMul = Name[6] == 'n';
  2982. bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
  2983. bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
  2984. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  2985. CI->getArgOperand(2) };
  2986. if (IsScalar) {
  2987. Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  2988. Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  2989. Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  2990. }
  2991. if (NegMul && !IsScalar)
  2992. Ops[0] = Builder.CreateFNeg(Ops[0]);
  2993. if (NegMul && IsScalar)
  2994. Ops[1] = Builder.CreateFNeg(Ops[1]);
  2995. if (NegAcc)
  2996. Ops[2] = Builder.CreateFNeg(Ops[2]);
  2997. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
  2998. Intrinsic::fma,
  2999. Ops[0]->getType()),
  3000. Ops);
  3001. if (IsScalar)
  3002. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
  3003. (uint64_t)0);
  3004. } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
  3005. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3006. CI->getArgOperand(2) };
  3007. Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  3008. Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  3009. Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  3010. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
  3011. Intrinsic::fma,
  3012. Ops[0]->getType()),
  3013. Ops);
  3014. Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
  3015. Rep, (uint64_t)0);
  3016. } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
  3017. Name.startswith("avx512.maskz.vfmadd.s") ||
  3018. Name.startswith("avx512.mask3.vfmadd.s") ||
  3019. Name.startswith("avx512.mask3.vfmsub.s") ||
  3020. Name.startswith("avx512.mask3.vfnmsub.s"))) {
  3021. bool IsMask3 = Name[11] == '3';
  3022. bool IsMaskZ = Name[11] == 'z';
  3023. // Drop the "avx512.mask." to make it easier.
  3024. Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
  3025. bool NegMul = Name[2] == 'n';
  3026. bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
  3027. Value *A = CI->getArgOperand(0);
  3028. Value *B = CI->getArgOperand(1);
  3029. Value *C = CI->getArgOperand(2);
  3030. if (NegMul && (IsMask3 || IsMaskZ))
  3031. A = Builder.CreateFNeg(A);
  3032. if (NegMul && !(IsMask3 || IsMaskZ))
  3033. B = Builder.CreateFNeg(B);
  3034. if (NegAcc)
  3035. C = Builder.CreateFNeg(C);
  3036. A = Builder.CreateExtractElement(A, (uint64_t)0);
  3037. B = Builder.CreateExtractElement(B, (uint64_t)0);
  3038. C = Builder.CreateExtractElement(C, (uint64_t)0);
  3039. if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
  3040. cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
  3041. Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
  3042. Intrinsic::ID IID;
  3043. if (Name.back() == 'd')
  3044. IID = Intrinsic::x86_avx512_vfmadd_f64;
  3045. else
  3046. IID = Intrinsic::x86_avx512_vfmadd_f32;
  3047. Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
  3048. Rep = Builder.CreateCall(FMA, Ops);
  3049. } else {
  3050. Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
  3051. Intrinsic::fma,
  3052. A->getType());
  3053. Rep = Builder.CreateCall(FMA, { A, B, C });
  3054. }
  3055. Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
  3056. IsMask3 ? C : A;
  3057. // For Mask3 with NegAcc, we need to create a new extractelement that
  3058. // avoids the negation above.
  3059. if (NegAcc && IsMask3)
  3060. PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
  3061. (uint64_t)0);
  3062. Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
  3063. Rep, PassThru);
  3064. Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
  3065. Rep, (uint64_t)0);
  3066. } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
  3067. Name.startswith("avx512.mask.vfnmadd.p") ||
  3068. Name.startswith("avx512.mask.vfnmsub.p") ||
  3069. Name.startswith("avx512.mask3.vfmadd.p") ||
  3070. Name.startswith("avx512.mask3.vfmsub.p") ||
  3071. Name.startswith("avx512.mask3.vfnmsub.p") ||
  3072. Name.startswith("avx512.maskz.vfmadd.p"))) {
  3073. bool IsMask3 = Name[11] == '3';
  3074. bool IsMaskZ = Name[11] == 'z';
  3075. // Drop the "avx512.mask." to make it easier.
  3076. Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
  3077. bool NegMul = Name[2] == 'n';
  3078. bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
  3079. Value *A = CI->getArgOperand(0);
  3080. Value *B = CI->getArgOperand(1);
  3081. Value *C = CI->getArgOperand(2);
  3082. if (NegMul && (IsMask3 || IsMaskZ))
  3083. A = Builder.CreateFNeg(A);
  3084. if (NegMul && !(IsMask3 || IsMaskZ))
  3085. B = Builder.CreateFNeg(B);
  3086. if (NegAcc)
  3087. C = Builder.CreateFNeg(C);
  3088. if (CI->getNumArgOperands() == 5 &&
  3089. (!isa<ConstantInt>(CI->getArgOperand(4)) ||
  3090. cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
  3091. Intrinsic::ID IID;
  3092. // Check the character before ".512" in string.
  3093. if (Name[Name.size()-5] == 's')
  3094. IID = Intrinsic::x86_avx512_vfmadd_ps_512;
  3095. else
  3096. IID = Intrinsic::x86_avx512_vfmadd_pd_512;
  3097. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  3098. { A, B, C, CI->getArgOperand(4) });
  3099. } else {
  3100. Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
  3101. Intrinsic::fma,
  3102. A->getType());
  3103. Rep = Builder.CreateCall(FMA, { A, B, C });
  3104. }
  3105. Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
  3106. IsMask3 ? CI->getArgOperand(2) :
  3107. CI->getArgOperand(0);
  3108. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3109. } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
  3110. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3111. unsigned EltWidth = CI->getType()->getScalarSizeInBits();
  3112. Intrinsic::ID IID;
  3113. if (VecWidth == 128 && EltWidth == 32)
  3114. IID = Intrinsic::x86_fma_vfmaddsub_ps;
  3115. else if (VecWidth == 256 && EltWidth == 32)
  3116. IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
  3117. else if (VecWidth == 128 && EltWidth == 64)
  3118. IID = Intrinsic::x86_fma_vfmaddsub_pd;
  3119. else if (VecWidth == 256 && EltWidth == 64)
  3120. IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
  3121. else
  3122. llvm_unreachable("Unexpected intrinsic");
  3123. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3124. CI->getArgOperand(2) };
  3125. Ops[2] = Builder.CreateFNeg(Ops[2]);
  3126. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  3127. Ops);
  3128. } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
  3129. Name.startswith("avx512.mask3.vfmaddsub.p") ||
  3130. Name.startswith("avx512.maskz.vfmaddsub.p") ||
  3131. Name.startswith("avx512.mask3.vfmsubadd.p"))) {
  3132. bool IsMask3 = Name[11] == '3';
  3133. bool IsMaskZ = Name[11] == 'z';
  3134. // Drop the "avx512.mask." to make it easier.
  3135. Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
  3136. bool IsSubAdd = Name[3] == 's';
  3137. if (CI->getNumArgOperands() == 5) {
  3138. Intrinsic::ID IID;
  3139. // Check the character before ".512" in string.
  3140. if (Name[Name.size()-5] == 's')
  3141. IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
  3142. else
  3143. IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
  3144. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3145. CI->getArgOperand(2), CI->getArgOperand(4) };
  3146. if (IsSubAdd)
  3147. Ops[2] = Builder.CreateFNeg(Ops[2]);
  3148. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  3149. Ops);
  3150. } else {
  3151. int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  3152. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3153. CI->getArgOperand(2) };
  3154. Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
  3155. Ops[0]->getType());
  3156. Value *Odd = Builder.CreateCall(FMA, Ops);
  3157. Ops[2] = Builder.CreateFNeg(Ops[2]);
  3158. Value *Even = Builder.CreateCall(FMA, Ops);
  3159. if (IsSubAdd)
  3160. std::swap(Even, Odd);
  3161. SmallVector<int, 32> Idxs(NumElts);
  3162. for (int i = 0; i != NumElts; ++i)
  3163. Idxs[i] = i + (i % 2) * NumElts;
  3164. Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
  3165. }
  3166. Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
  3167. IsMask3 ? CI->getArgOperand(2) :
  3168. CI->getArgOperand(0);
  3169. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3170. } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
  3171. Name.startswith("avx512.maskz.pternlog."))) {
  3172. bool ZeroMask = Name[11] == 'z';
  3173. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3174. unsigned EltWidth = CI->getType()->getScalarSizeInBits();
  3175. Intrinsic::ID IID;
  3176. if (VecWidth == 128 && EltWidth == 32)
  3177. IID = Intrinsic::x86_avx512_pternlog_d_128;
  3178. else if (VecWidth == 256 && EltWidth == 32)
  3179. IID = Intrinsic::x86_avx512_pternlog_d_256;
  3180. else if (VecWidth == 512 && EltWidth == 32)
  3181. IID = Intrinsic::x86_avx512_pternlog_d_512;
  3182. else if (VecWidth == 128 && EltWidth == 64)
  3183. IID = Intrinsic::x86_avx512_pternlog_q_128;
  3184. else if (VecWidth == 256 && EltWidth == 64)
  3185. IID = Intrinsic::x86_avx512_pternlog_q_256;
  3186. else if (VecWidth == 512 && EltWidth == 64)
  3187. IID = Intrinsic::x86_avx512_pternlog_q_512;
  3188. else
  3189. llvm_unreachable("Unexpected intrinsic");
  3190. Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
  3191. CI->getArgOperand(2), CI->getArgOperand(3) };
  3192. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3193. Args);
  3194. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3195. : CI->getArgOperand(0);
  3196. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
  3197. } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
  3198. Name.startswith("avx512.maskz.vpmadd52"))) {
  3199. bool ZeroMask = Name[11] == 'z';
  3200. bool High = Name[20] == 'h' || Name[21] == 'h';
  3201. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3202. Intrinsic::ID IID;
  3203. if (VecWidth == 128 && !High)
  3204. IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
  3205. else if (VecWidth == 256 && !High)
  3206. IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
  3207. else if (VecWidth == 512 && !High)
  3208. IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
  3209. else if (VecWidth == 128 && High)
  3210. IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
  3211. else if (VecWidth == 256 && High)
  3212. IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
  3213. else if (VecWidth == 512 && High)
  3214. IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
  3215. else
  3216. llvm_unreachable("Unexpected intrinsic");
  3217. Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
  3218. CI->getArgOperand(2) };
  3219. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3220. Args);
  3221. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3222. : CI->getArgOperand(0);
  3223. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3224. } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
  3225. Name.startswith("avx512.mask.vpermt2var.") ||
  3226. Name.startswith("avx512.maskz.vpermt2var."))) {
  3227. bool ZeroMask = Name[11] == 'z';
  3228. bool IndexForm = Name[17] == 'i';
  3229. Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
  3230. } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
  3231. Name.startswith("avx512.maskz.vpdpbusd.") ||
  3232. Name.startswith("avx512.mask.vpdpbusds.") ||
  3233. Name.startswith("avx512.maskz.vpdpbusds."))) {
  3234. bool ZeroMask = Name[11] == 'z';
  3235. bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
  3236. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3237. Intrinsic::ID IID;
  3238. if (VecWidth == 128 && !IsSaturating)
  3239. IID = Intrinsic::x86_avx512_vpdpbusd_128;
  3240. else if (VecWidth == 256 && !IsSaturating)
  3241. IID = Intrinsic::x86_avx512_vpdpbusd_256;
  3242. else if (VecWidth == 512 && !IsSaturating)
  3243. IID = Intrinsic::x86_avx512_vpdpbusd_512;
  3244. else if (VecWidth == 128 && IsSaturating)
  3245. IID = Intrinsic::x86_avx512_vpdpbusds_128;
  3246. else if (VecWidth == 256 && IsSaturating)
  3247. IID = Intrinsic::x86_avx512_vpdpbusds_256;
  3248. else if (VecWidth == 512 && IsSaturating)
  3249. IID = Intrinsic::x86_avx512_vpdpbusds_512;
  3250. else
  3251. llvm_unreachable("Unexpected intrinsic");
  3252. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3253. CI->getArgOperand(2) };
  3254. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3255. Args);
  3256. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3257. : CI->getArgOperand(0);
  3258. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3259. } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
  3260. Name.startswith("avx512.maskz.vpdpwssd.") ||
  3261. Name.startswith("avx512.mask.vpdpwssds.") ||
  3262. Name.startswith("avx512.maskz.vpdpwssds."))) {
  3263. bool ZeroMask = Name[11] == 'z';
  3264. bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
  3265. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3266. Intrinsic::ID IID;
  3267. if (VecWidth == 128 && !IsSaturating)
  3268. IID = Intrinsic::x86_avx512_vpdpwssd_128;
  3269. else if (VecWidth == 256 && !IsSaturating)
  3270. IID = Intrinsic::x86_avx512_vpdpwssd_256;
  3271. else if (VecWidth == 512 && !IsSaturating)
  3272. IID = Intrinsic::x86_avx512_vpdpwssd_512;
  3273. else if (VecWidth == 128 && IsSaturating)
  3274. IID = Intrinsic::x86_avx512_vpdpwssds_128;
  3275. else if (VecWidth == 256 && IsSaturating)
  3276. IID = Intrinsic::x86_avx512_vpdpwssds_256;
  3277. else if (VecWidth == 512 && IsSaturating)
  3278. IID = Intrinsic::x86_avx512_vpdpwssds_512;
  3279. else
  3280. llvm_unreachable("Unexpected intrinsic");
  3281. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3282. CI->getArgOperand(2) };
  3283. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3284. Args);
  3285. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3286. : CI->getArgOperand(0);
  3287. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3288. } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
  3289. Name == "addcarry.u32" || Name == "addcarry.u64" ||
  3290. Name == "subborrow.u32" || Name == "subborrow.u64")) {
  3291. Intrinsic::ID IID;
  3292. if (Name[0] == 'a' && Name.back() == '2')
  3293. IID = Intrinsic::x86_addcarry_32;
  3294. else if (Name[0] == 'a' && Name.back() == '4')
  3295. IID = Intrinsic::x86_addcarry_64;
  3296. else if (Name[0] == 's' && Name.back() == '2')
  3297. IID = Intrinsic::x86_subborrow_32;
  3298. else if (Name[0] == 's' && Name.back() == '4')
  3299. IID = Intrinsic::x86_subborrow_64;
  3300. else
  3301. llvm_unreachable("Unexpected intrinsic");
  3302. // Make a call with 3 operands.
  3303. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3304. CI->getArgOperand(2)};
  3305. Value *NewCall = Builder.CreateCall(
  3306. Intrinsic::getDeclaration(CI->getModule(), IID),
  3307. Args);
  3308. // Extract the second result and store it.
  3309. Value *Data = Builder.CreateExtractValue(NewCall, 1);
  3310. // Cast the pointer to the right type.
  3311. Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
  3312. llvm::PointerType::getUnqual(Data->getType()));
  3313. Builder.CreateAlignedStore(Data, Ptr, Align(1));
  3314. // Replace the original call result with the first result of the new call.
  3315. Value *CF = Builder.CreateExtractValue(NewCall, 0);
  3316. CI->replaceAllUsesWith(CF);
  3317. Rep = nullptr;
  3318. } else if (IsX86 && Name.startswith("avx512.mask.") &&
  3319. upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
  3320. // Rep will be updated by the call in the condition.
  3321. } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
  3322. Value *Arg = CI->getArgOperand(0);
  3323. Value *Neg = Builder.CreateNeg(Arg, "neg");
  3324. Value *Cmp = Builder.CreateICmpSGE(
  3325. Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
  3326. Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
  3327. } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
  3328. Name.startswith("atomic.load.add.f64.p"))) {
  3329. Value *Ptr = CI->getArgOperand(0);
  3330. Value *Val = CI->getArgOperand(1);
  3331. Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
  3332. AtomicOrdering::SequentiallyConsistent);
  3333. } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
  3334. Name == "max.ui" || Name == "max.ull")) {
  3335. Value *Arg0 = CI->getArgOperand(0);
  3336. Value *Arg1 = CI->getArgOperand(1);
  3337. Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
  3338. ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
  3339. : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
  3340. Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
  3341. } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
  3342. Name == "min.ui" || Name == "min.ull")) {
  3343. Value *Arg0 = CI->getArgOperand(0);
  3344. Value *Arg1 = CI->getArgOperand(1);
  3345. Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
  3346. ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
  3347. : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
  3348. Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
  3349. } else if (IsNVVM && Name == "clz.ll") {
  3350. // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
  3351. Value *Arg = CI->getArgOperand(0);
  3352. Value *Ctlz = Builder.CreateCall(
  3353. Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
  3354. {Arg->getType()}),
  3355. {Arg, Builder.getFalse()}, "ctlz");
  3356. Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
  3357. } else if (IsNVVM && Name == "popc.ll") {
  3358. // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
  3359. // i64.
  3360. Value *Arg = CI->getArgOperand(0);
  3361. Value *Popc = Builder.CreateCall(
  3362. Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
  3363. {Arg->getType()}),
  3364. Arg, "ctpop");
  3365. Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
  3366. } else if (IsNVVM && Name == "h2f") {
  3367. Rep = Builder.CreateCall(Intrinsic::getDeclaration(
  3368. F->getParent(), Intrinsic::convert_from_fp16,
  3369. {Builder.getFloatTy()}),
  3370. CI->getArgOperand(0), "h2f");
  3371. } else {
  3372. llvm_unreachable("Unknown function for CallInst upgrade.");
  3373. }
  3374. if (Rep)
  3375. CI->replaceAllUsesWith(Rep);
  3376. CI->eraseFromParent();
  3377. return;
  3378. }
  3379. const auto &DefaultCase = [&NewFn, &CI]() -> void {
  3380. // Handle generic mangling change, but nothing else
  3381. assert(
  3382. (CI->getCalledFunction()->getName() != NewFn->getName()) &&
  3383. "Unknown function for CallInst upgrade and isn't just a name change");
  3384. CI->setCalledFunction(NewFn);
  3385. };
  3386. CallInst *NewCall = nullptr;
  3387. switch (NewFn->getIntrinsicID()) {
  3388. default: {
  3389. DefaultCase();
  3390. return;
  3391. }
  3392. case Intrinsic::arm_neon_vld1:
  3393. case Intrinsic::arm_neon_vld2:
  3394. case Intrinsic::arm_neon_vld3:
  3395. case Intrinsic::arm_neon_vld4:
  3396. case Intrinsic::arm_neon_vld2lane:
  3397. case Intrinsic::arm_neon_vld3lane:
  3398. case Intrinsic::arm_neon_vld4lane:
  3399. case Intrinsic::arm_neon_vst1:
  3400. case Intrinsic::arm_neon_vst2:
  3401. case Intrinsic::arm_neon_vst3:
  3402. case Intrinsic::arm_neon_vst4:
  3403. case Intrinsic::arm_neon_vst2lane:
  3404. case Intrinsic::arm_neon_vst3lane:
  3405. case Intrinsic::arm_neon_vst4lane: {
  3406. SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
  3407. CI->arg_operands().end());
  3408. NewCall = Builder.CreateCall(NewFn, Args);
  3409. break;
  3410. }
  3411. case Intrinsic::arm_neon_bfdot:
  3412. case Intrinsic::arm_neon_bfmmla:
  3413. case Intrinsic::arm_neon_bfmlalb:
  3414. case Intrinsic::arm_neon_bfmlalt:
  3415. case Intrinsic::aarch64_neon_bfdot:
  3416. case Intrinsic::aarch64_neon_bfmmla:
  3417. case Intrinsic::aarch64_neon_bfmlalb:
  3418. case Intrinsic::aarch64_neon_bfmlalt: {
  3419. SmallVector<Value *, 3> Args;
  3420. assert(CI->getNumArgOperands() == 3 &&
  3421. "Mismatch between function args and call args");
  3422. size_t OperandWidth =
  3423. CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
  3424. assert((OperandWidth == 64 || OperandWidth == 128) &&
  3425. "Unexpected operand width");
  3426. Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
  3427. auto Iter = CI->arg_operands().begin();
  3428. Args.push_back(*Iter++);
  3429. Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
  3430. Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
  3431. NewCall = Builder.CreateCall(NewFn, Args);
  3432. break;
  3433. }
  3434. case Intrinsic::bitreverse:
  3435. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
  3436. break;
  3437. case Intrinsic::ctlz:
  3438. case Intrinsic::cttz:
  3439. assert(CI->getNumArgOperands() == 1 &&
  3440. "Mismatch between function args and call args");
  3441. NewCall =
  3442. Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
  3443. break;
  3444. case Intrinsic::objectsize: {
  3445. Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
  3446. ? Builder.getFalse()
  3447. : CI->getArgOperand(2);
  3448. Value *Dynamic =
  3449. CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
  3450. NewCall = Builder.CreateCall(
  3451. NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
  3452. break;
  3453. }
  3454. case Intrinsic::ctpop:
  3455. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
  3456. break;
  3457. case Intrinsic::convert_from_fp16:
  3458. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
  3459. break;
  3460. case Intrinsic::dbg_value:
  3461. // Upgrade from the old version that had an extra offset argument.
  3462. assert(CI->getNumArgOperands() == 4);
  3463. // Drop nonzero offsets instead of attempting to upgrade them.
  3464. if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
  3465. if (Offset->isZeroValue()) {
  3466. NewCall = Builder.CreateCall(
  3467. NewFn,
  3468. {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
  3469. break;
  3470. }
  3471. CI->eraseFromParent();
  3472. return;
  3473. case Intrinsic::ptr_annotation:
  3474. // Upgrade from versions that lacked the annotation attribute argument.
  3475. assert(CI->getNumArgOperands() == 4 &&
  3476. "Before LLVM 12.0 this intrinsic took four arguments");
  3477. // Create a new call with an added null annotation attribute argument.
  3478. NewCall = Builder.CreateCall(
  3479. NewFn,
  3480. {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
  3481. CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
  3482. NewCall->takeName(CI);
  3483. CI->replaceAllUsesWith(NewCall);
  3484. CI->eraseFromParent();
  3485. return;
  3486. case Intrinsic::var_annotation:
  3487. // Upgrade from versions that lacked the annotation attribute argument.
  3488. assert(CI->getNumArgOperands() == 4 &&
  3489. "Before LLVM 12.0 this intrinsic took four arguments");
  3490. // Create a new call with an added null annotation attribute argument.
  3491. NewCall = Builder.CreateCall(
  3492. NewFn,
  3493. {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
  3494. CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
  3495. CI->eraseFromParent();
  3496. return;
  3497. case Intrinsic::x86_xop_vfrcz_ss:
  3498. case Intrinsic::x86_xop_vfrcz_sd:
  3499. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
  3500. break;
  3501. case Intrinsic::x86_xop_vpermil2pd:
  3502. case Intrinsic::x86_xop_vpermil2ps:
  3503. case Intrinsic::x86_xop_vpermil2pd_256:
  3504. case Intrinsic::x86_xop_vpermil2ps_256: {
  3505. SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
  3506. CI->arg_operands().end());
  3507. VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
  3508. VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
  3509. Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
  3510. NewCall = Builder.CreateCall(NewFn, Args);
  3511. break;
  3512. }
  3513. case Intrinsic::x86_sse41_ptestc:
  3514. case Intrinsic::x86_sse41_ptestz:
  3515. case Intrinsic::x86_sse41_ptestnzc: {
  3516. // The arguments for these intrinsics used to be v4f32, and changed
  3517. // to v2i64. This is purely a nop, since those are bitwise intrinsics.
  3518. // So, the only thing required is a bitcast for both arguments.
  3519. // First, check the arguments have the old type.
  3520. Value *Arg0 = CI->getArgOperand(0);
  3521. if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
  3522. return;
  3523. // Old intrinsic, add bitcasts
  3524. Value *Arg1 = CI->getArgOperand(1);
  3525. auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
  3526. Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
  3527. Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
  3528. NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
  3529. break;
  3530. }
  3531. case Intrinsic::x86_rdtscp: {
  3532. // This used to take 1 arguments. If we have no arguments, it is already
  3533. // upgraded.
  3534. if (CI->getNumOperands() == 0)
  3535. return;
  3536. NewCall = Builder.CreateCall(NewFn);
  3537. // Extract the second result and store it.
  3538. Value *Data = Builder.CreateExtractValue(NewCall, 1);
  3539. // Cast the pointer to the right type.
  3540. Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
  3541. llvm::PointerType::getUnqual(Data->getType()));
  3542. Builder.CreateAlignedStore(Data, Ptr, Align(1));
  3543. // Replace the original call result with the first result of the new call.
  3544. Value *TSC = Builder.CreateExtractValue(NewCall, 0);
  3545. NewCall->takeName(CI);
  3546. CI->replaceAllUsesWith(TSC);
  3547. CI->eraseFromParent();
  3548. return;
  3549. }
  3550. case Intrinsic::x86_sse41_insertps:
  3551. case Intrinsic::x86_sse41_dppd:
  3552. case Intrinsic::x86_sse41_dpps:
  3553. case Intrinsic::x86_sse41_mpsadbw:
  3554. case Intrinsic::x86_avx_dp_ps_256:
  3555. case Intrinsic::x86_avx2_mpsadbw: {
  3556. // Need to truncate the last argument from i32 to i8 -- this argument models
  3557. // an inherently 8-bit immediate operand to these x86 instructions.
  3558. SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
  3559. CI->arg_operands().end());
  3560. // Replace the last argument with a trunc.
  3561. Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
  3562. NewCall = Builder.CreateCall(NewFn, Args);
  3563. break;
  3564. }
  3565. case Intrinsic::x86_avx512_mask_cmp_pd_128:
  3566. case Intrinsic::x86_avx512_mask_cmp_pd_256:
  3567. case Intrinsic::x86_avx512_mask_cmp_pd_512:
  3568. case Intrinsic::x86_avx512_mask_cmp_ps_128:
  3569. case Intrinsic::x86_avx512_mask_cmp_ps_256:
  3570. case Intrinsic::x86_avx512_mask_cmp_ps_512: {
  3571. SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
  3572. CI->arg_operands().end());
  3573. unsigned NumElts =
  3574. cast<FixedVectorType>(Args[0]->getType())->getNumElements();
  3575. Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
  3576. NewCall = Builder.CreateCall(NewFn, Args);
  3577. Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
  3578. NewCall->takeName(CI);
  3579. CI->replaceAllUsesWith(Res);
  3580. CI->eraseFromParent();
  3581. return;
  3582. }
  3583. case Intrinsic::thread_pointer: {
  3584. NewCall = Builder.CreateCall(NewFn, {});
  3585. break;
  3586. }
  3587. case Intrinsic::invariant_start:
  3588. case Intrinsic::invariant_end:
  3589. case Intrinsic::masked_load:
  3590. case Intrinsic::masked_store:
  3591. case Intrinsic::masked_gather:
  3592. case Intrinsic::masked_scatter: {
  3593. SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
  3594. CI->arg_operands().end());
  3595. NewCall = Builder.CreateCall(NewFn, Args);
  3596. break;
  3597. }
  3598. case Intrinsic::memcpy:
  3599. case Intrinsic::memmove:
  3600. case Intrinsic::memset: {
  3601. // We have to make sure that the call signature is what we're expecting.
  3602. // We only want to change the old signatures by removing the alignment arg:
  3603. // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
  3604. // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
  3605. // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
  3606. // -> @llvm.memset...(i8*, i8, i[32|64], i1)
  3607. // Note: i8*'s in the above can be any pointer type
  3608. if (CI->getNumArgOperands() != 5) {
  3609. DefaultCase();
  3610. return;
  3611. }
  3612. // Remove alignment argument (3), and add alignment attributes to the
  3613. // dest/src pointers.
  3614. Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
  3615. CI->getArgOperand(2), CI->getArgOperand(4)};
  3616. NewCall = Builder.CreateCall(NewFn, Args);
  3617. auto *MemCI = cast<MemIntrinsic>(NewCall);
  3618. // All mem intrinsics support dest alignment.
  3619. const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
  3620. MemCI->setDestAlignment(Align->getMaybeAlignValue());
  3621. // Memcpy/Memmove also support source alignment.
  3622. if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
  3623. MTI->setSourceAlignment(Align->getMaybeAlignValue());
  3624. break;
  3625. }
  3626. }
  3627. assert(NewCall && "Should have either set this variable or returned through "
  3628. "the default case");
  3629. NewCall->takeName(CI);
  3630. CI->replaceAllUsesWith(NewCall);
  3631. CI->eraseFromParent();
  3632. }
  3633. void llvm::UpgradeCallsToIntrinsic(Function *F) {
  3634. assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
  3635. // Check if this function should be upgraded and get the replacement function
  3636. // if there is one.
  3637. Function *NewFn;
  3638. if (UpgradeIntrinsicFunction(F, NewFn)) {
  3639. // Replace all users of the old function with the new function or new
  3640. // instructions. This is not a range loop because the call is deleted.
  3641. for (User *U : make_early_inc_range(F->users()))
  3642. if (CallInst *CI = dyn_cast<CallInst>(U))
  3643. UpgradeIntrinsicCall(CI, NewFn);
  3644. // Remove old function, no longer used, from the module.
  3645. F->eraseFromParent();
  3646. }
  3647. }
  3648. MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
  3649. // Check if the tag uses struct-path aware TBAA format.
  3650. if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
  3651. return &MD;
  3652. auto &Context = MD.getContext();
  3653. if (MD.getNumOperands() == 3) {
  3654. Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
  3655. MDNode *ScalarType = MDNode::get(Context, Elts);
  3656. // Create a MDNode <ScalarType, ScalarType, offset 0, const>
  3657. Metadata *Elts2[] = {ScalarType, ScalarType,
  3658. ConstantAsMetadata::get(
  3659. Constant::getNullValue(Type::getInt64Ty(Context))),
  3660. MD.getOperand(2)};
  3661. return MDNode::get(Context, Elts2);
  3662. }
  3663. // Create a MDNode <MD, MD, offset 0>
  3664. Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
  3665. Type::getInt64Ty(Context)))};
  3666. return MDNode::get(Context, Elts);
  3667. }
  3668. Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
  3669. Instruction *&Temp) {
  3670. if (Opc != Instruction::BitCast)
  3671. return nullptr;
  3672. Temp = nullptr;
  3673. Type *SrcTy = V->getType();
  3674. if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
  3675. SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
  3676. LLVMContext &Context = V->getContext();
  3677. // We have no information about target data layout, so we assume that
  3678. // the maximum pointer size is 64bit.
  3679. Type *MidTy = Type::getInt64Ty(Context);
  3680. Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
  3681. return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
  3682. }
  3683. return nullptr;
  3684. }
  3685. Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
  3686. if (Opc != Instruction::BitCast)
  3687. return nullptr;
  3688. Type *SrcTy = C->getType();
  3689. if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
  3690. SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
  3691. LLVMContext &Context = C->getContext();
  3692. // We have no information about target data layout, so we assume that
  3693. // the maximum pointer size is 64bit.
  3694. Type *MidTy = Type::getInt64Ty(Context);
  3695. return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
  3696. DestTy);
  3697. }
  3698. return nullptr;
  3699. }
  3700. /// Check the debug info version number, if it is out-dated, drop the debug
  3701. /// info. Return true if module is modified.
  3702. bool llvm::UpgradeDebugInfo(Module &M) {
  3703. unsigned Version = getDebugMetadataVersionFromModule(M);
  3704. if (Version == DEBUG_METADATA_VERSION) {
  3705. bool BrokenDebugInfo = false;
  3706. if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
  3707. report_fatal_error("Broken module found, compilation aborted!");
  3708. if (!BrokenDebugInfo)
  3709. // Everything is ok.
  3710. return false;
  3711. else {
  3712. // Diagnose malformed debug info.
  3713. DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
  3714. M.getContext().diagnose(Diag);
  3715. }
  3716. }
  3717. bool Modified = StripDebugInfo(M);
  3718. if (Modified && Version != DEBUG_METADATA_VERSION) {
  3719. // Diagnose a version mismatch.
  3720. DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
  3721. M.getContext().diagnose(DiagVersion);
  3722. }
  3723. return Modified;
  3724. }
  3725. /// This checks for objc retain release marker which should be upgraded. It
  3726. /// returns true if module is modified.
  3727. static bool UpgradeRetainReleaseMarker(Module &M) {
  3728. bool Changed = false;
  3729. const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
  3730. NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
  3731. if (ModRetainReleaseMarker) {
  3732. MDNode *Op = ModRetainReleaseMarker->getOperand(0);
  3733. if (Op) {
  3734. MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
  3735. if (ID) {
  3736. SmallVector<StringRef, 4> ValueComp;
  3737. ID->getString().split(ValueComp, "#");
  3738. if (ValueComp.size() == 2) {
  3739. std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
  3740. ID = MDString::get(M.getContext(), NewValue);
  3741. }
  3742. M.addModuleFlag(Module::Error, MarkerKey, ID);
  3743. M.eraseNamedMetadata(ModRetainReleaseMarker);
  3744. Changed = true;
  3745. }
  3746. }
  3747. }
  3748. return Changed;
  3749. }
  3750. void llvm::UpgradeARCRuntime(Module &M) {
  3751. // This lambda converts normal function calls to ARC runtime functions to
  3752. // intrinsic calls.
  3753. auto UpgradeToIntrinsic = [&](const char *OldFunc,
  3754. llvm::Intrinsic::ID IntrinsicFunc) {
  3755. Function *Fn = M.getFunction(OldFunc);
  3756. if (!Fn)
  3757. return;
  3758. Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
  3759. for (User *U : make_early_inc_range(Fn->users())) {
  3760. CallInst *CI = dyn_cast<CallInst>(U);
  3761. if (!CI || CI->getCalledFunction() != Fn)
  3762. continue;
  3763. IRBuilder<> Builder(CI->getParent(), CI->getIterator());
  3764. FunctionType *NewFuncTy = NewFn->getFunctionType();
  3765. SmallVector<Value *, 2> Args;
  3766. // Don't upgrade the intrinsic if it's not valid to bitcast the return
  3767. // value to the return type of the old function.
  3768. if (NewFuncTy->getReturnType() != CI->getType() &&
  3769. !CastInst::castIsValid(Instruction::BitCast, CI,
  3770. NewFuncTy->getReturnType()))
  3771. continue;
  3772. bool InvalidCast = false;
  3773. for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
  3774. Value *Arg = CI->getArgOperand(I);
  3775. // Bitcast argument to the parameter type of the new function if it's
  3776. // not a variadic argument.
  3777. if (I < NewFuncTy->getNumParams()) {
  3778. // Don't upgrade the intrinsic if it's not valid to bitcast the argument
  3779. // to the parameter type of the new function.
  3780. if (!CastInst::castIsValid(Instruction::BitCast, Arg,
  3781. NewFuncTy->getParamType(I))) {
  3782. InvalidCast = true;
  3783. break;
  3784. }
  3785. Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
  3786. }
  3787. Args.push_back(Arg);
  3788. }
  3789. if (InvalidCast)
  3790. continue;
  3791. // Create a call instruction that calls the new function.
  3792. CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
  3793. NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
  3794. NewCall->takeName(CI);
  3795. // Bitcast the return value back to the type of the old call.
  3796. Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
  3797. if (!CI->use_empty())
  3798. CI->replaceAllUsesWith(NewRetVal);
  3799. CI->eraseFromParent();
  3800. }
  3801. if (Fn->use_empty())
  3802. Fn->eraseFromParent();
  3803. };
  3804. // Unconditionally convert a call to "clang.arc.use" to a call to
  3805. // "llvm.objc.clang.arc.use".
  3806. UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
  3807. // Upgrade the retain release marker. If there is no need to upgrade
  3808. // the marker, that means either the module is already new enough to contain
  3809. // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
  3810. if (!UpgradeRetainReleaseMarker(M))
  3811. return;
  3812. std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
  3813. {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
  3814. {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
  3815. {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
  3816. {"objc_autoreleaseReturnValue",
  3817. llvm::Intrinsic::objc_autoreleaseReturnValue},
  3818. {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
  3819. {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
  3820. {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
  3821. {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
  3822. {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
  3823. {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
  3824. {"objc_release", llvm::Intrinsic::objc_release},
  3825. {"objc_retain", llvm::Intrinsic::objc_retain},
  3826. {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
  3827. {"objc_retainAutoreleaseReturnValue",
  3828. llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
  3829. {"objc_retainAutoreleasedReturnValue",
  3830. llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
  3831. {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
  3832. {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
  3833. {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
  3834. {"objc_unsafeClaimAutoreleasedReturnValue",
  3835. llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
  3836. {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
  3837. {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
  3838. {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
  3839. {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
  3840. {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
  3841. {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
  3842. {"objc_arc_annotation_topdown_bbstart",
  3843. llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
  3844. {"objc_arc_annotation_topdown_bbend",
  3845. llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
  3846. {"objc_arc_annotation_bottomup_bbstart",
  3847. llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
  3848. {"objc_arc_annotation_bottomup_bbend",
  3849. llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
  3850. for (auto &I : RuntimeFuncs)
  3851. UpgradeToIntrinsic(I.first, I.second);
  3852. }
  3853. bool llvm::UpgradeModuleFlags(Module &M) {
  3854. NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
  3855. if (!ModFlags)
  3856. return false;
  3857. bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
  3858. bool HasSwiftVersionFlag = false;
  3859. uint8_t SwiftMajorVersion, SwiftMinorVersion;
  3860. uint32_t SwiftABIVersion;
  3861. auto Int8Ty = Type::getInt8Ty(M.getContext());
  3862. auto Int32Ty = Type::getInt32Ty(M.getContext());
  3863. for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
  3864. MDNode *Op = ModFlags->getOperand(I);
  3865. if (Op->getNumOperands() != 3)
  3866. continue;
  3867. MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
  3868. if (!ID)
  3869. continue;
  3870. if (ID->getString() == "Objective-C Image Info Version")
  3871. HasObjCFlag = true;
  3872. if (ID->getString() == "Objective-C Class Properties")
  3873. HasClassProperties = true;
  3874. // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
  3875. // field was Error and now they are Max.
  3876. if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
  3877. if (auto *Behavior =
  3878. mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
  3879. if (Behavior->getLimitedValue() == Module::Error) {
  3880. Type *Int32Ty = Type::getInt32Ty(M.getContext());
  3881. Metadata *Ops[3] = {
  3882. ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
  3883. MDString::get(M.getContext(), ID->getString()),
  3884. Op->getOperand(2)};
  3885. ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
  3886. Changed = true;
  3887. }
  3888. }
  3889. }
  3890. // Upgrade Objective-C Image Info Section. Removed the whitespce in the
  3891. // section name so that llvm-lto will not complain about mismatching
  3892. // module flags that is functionally the same.
  3893. if (ID->getString() == "Objective-C Image Info Section") {
  3894. if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
  3895. SmallVector<StringRef, 4> ValueComp;
  3896. Value->getString().split(ValueComp, " ");
  3897. if (ValueComp.size() != 1) {
  3898. std::string NewValue;
  3899. for (auto &S : ValueComp)
  3900. NewValue += S.str();
  3901. Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
  3902. MDString::get(M.getContext(), NewValue)};
  3903. ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
  3904. Changed = true;
  3905. }
  3906. }
  3907. }
  3908. // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
  3909. // If the higher bits are set, it adds new module flag for swift info.
  3910. if (ID->getString() == "Objective-C Garbage Collection") {
  3911. auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
  3912. if (Md) {
  3913. assert(Md->getValue() && "Expected non-empty metadata");
  3914. auto Type = Md->getValue()->getType();
  3915. if (Type == Int8Ty)
  3916. continue;
  3917. unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
  3918. if ((Val & 0xff) != Val) {
  3919. HasSwiftVersionFlag = true;
  3920. SwiftABIVersion = (Val & 0xff00) >> 8;
  3921. SwiftMajorVersion = (Val & 0xff000000) >> 24;
  3922. SwiftMinorVersion = (Val & 0xff0000) >> 16;
  3923. }
  3924. Metadata *Ops[3] = {
  3925. ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
  3926. Op->getOperand(1),
  3927. ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
  3928. ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
  3929. Changed = true;
  3930. }
  3931. }
  3932. }
  3933. // "Objective-C Class Properties" is recently added for Objective-C. We
  3934. // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
  3935. // flag of value 0, so we can correclty downgrade this flag when trying to
  3936. // link an ObjC bitcode without this module flag with an ObjC bitcode with
  3937. // this module flag.
  3938. if (HasObjCFlag && !HasClassProperties) {
  3939. M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
  3940. (uint32_t)0);
  3941. Changed = true;
  3942. }
  3943. if (HasSwiftVersionFlag) {
  3944. M.addModuleFlag(Module::Error, "Swift ABI Version",
  3945. SwiftABIVersion);
  3946. M.addModuleFlag(Module::Error, "Swift Major Version",
  3947. ConstantInt::get(Int8Ty, SwiftMajorVersion));
  3948. M.addModuleFlag(Module::Error, "Swift Minor Version",
  3949. ConstantInt::get(Int8Ty, SwiftMinorVersion));
  3950. Changed = true;
  3951. }
  3952. return Changed;
  3953. }
  3954. void llvm::UpgradeSectionAttributes(Module &M) {
  3955. auto TrimSpaces = [](StringRef Section) -> std::string {
  3956. SmallVector<StringRef, 5> Components;
  3957. Section.split(Components, ',');
  3958. SmallString<32> Buffer;
  3959. raw_svector_ostream OS(Buffer);
  3960. for (auto Component : Components)
  3961. OS << ',' << Component.trim();
  3962. return std::string(OS.str().substr(1));
  3963. };
  3964. for (auto &GV : M.globals()) {
  3965. if (!GV.hasSection())
  3966. continue;
  3967. StringRef Section = GV.getSection();
  3968. if (!Section.startswith("__DATA, __objc_catlist"))
  3969. continue;
  3970. // __DATA, __objc_catlist, regular, no_dead_strip
  3971. // __DATA,__objc_catlist,regular,no_dead_strip
  3972. GV.setSection(TrimSpaces(Section));
  3973. }
  3974. }
  3975. namespace {
  3976. // Prior to LLVM 10.0, the strictfp attribute could be used on individual
  3977. // callsites within a function that did not also have the strictfp attribute.
  3978. // Since 10.0, if strict FP semantics are needed within a function, the
  3979. // function must have the strictfp attribute and all calls within the function
  3980. // must also have the strictfp attribute. This latter restriction is
  3981. // necessary to prevent unwanted libcall simplification when a function is
  3982. // being cloned (such as for inlining).
  3983. //
  3984. // The "dangling" strictfp attribute usage was only used to prevent constant
  3985. // folding and other libcall simplification. The nobuiltin attribute on the
  3986. // callsite has the same effect.
  3987. struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
  3988. StrictFPUpgradeVisitor() {}
  3989. void visitCallBase(CallBase &Call) {
  3990. if (!Call.isStrictFP())
  3991. return;
  3992. if (isa<ConstrainedFPIntrinsic>(&Call))
  3993. return;
  3994. // If we get here, the caller doesn't have the strictfp attribute
  3995. // but this callsite does. Replace the strictfp attribute with nobuiltin.
  3996. Call.removeAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
  3997. Call.addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
  3998. }
  3999. };
  4000. } // namespace
  4001. void llvm::UpgradeFunctionAttributes(Function &F) {
  4002. // If a function definition doesn't have the strictfp attribute,
  4003. // convert any callsite strictfp attributes to nobuiltin.
  4004. if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
  4005. StrictFPUpgradeVisitor SFPV;
  4006. SFPV.visit(F);
  4007. }
  4008. if (F.getCallingConv() == CallingConv::X86_INTR &&
  4009. !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
  4010. Type *ByValTy = cast<PointerType>(F.getArg(0)->getType())->getElementType();
  4011. Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
  4012. F.addParamAttr(0, NewAttr);
  4013. }
  4014. }
  4015. static bool isOldLoopArgument(Metadata *MD) {
  4016. auto *T = dyn_cast_or_null<MDTuple>(MD);
  4017. if (!T)
  4018. return false;
  4019. if (T->getNumOperands() < 1)
  4020. return false;
  4021. auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
  4022. if (!S)
  4023. return false;
  4024. return S->getString().startswith("llvm.vectorizer.");
  4025. }
  4026. static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
  4027. StringRef OldPrefix = "llvm.vectorizer.";
  4028. assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
  4029. if (OldTag == "llvm.vectorizer.unroll")
  4030. return MDString::get(C, "llvm.loop.interleave.count");
  4031. return MDString::get(
  4032. C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
  4033. .str());
  4034. }
  4035. static Metadata *upgradeLoopArgument(Metadata *MD) {
  4036. auto *T = dyn_cast_or_null<MDTuple>(MD);
  4037. if (!T)
  4038. return MD;
  4039. if (T->getNumOperands() < 1)
  4040. return MD;
  4041. auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
  4042. if (!OldTag)
  4043. return MD;
  4044. if (!OldTag->getString().startswith("llvm.vectorizer."))
  4045. return MD;
  4046. // This has an old tag. Upgrade it.
  4047. SmallVector<Metadata *, 8> Ops;
  4048. Ops.reserve(T->getNumOperands());
  4049. Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
  4050. for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
  4051. Ops.push_back(T->getOperand(I));
  4052. return MDTuple::get(T->getContext(), Ops);
  4053. }
  4054. MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
  4055. auto *T = dyn_cast<MDTuple>(&N);
  4056. if (!T)
  4057. return &N;
  4058. if (none_of(T->operands(), isOldLoopArgument))
  4059. return &N;
  4060. SmallVector<Metadata *, 8> Ops;
  4061. Ops.reserve(T->getNumOperands());
  4062. for (Metadata *MD : T->operands())
  4063. Ops.push_back(upgradeLoopArgument(MD));
  4064. return MDTuple::get(T->getContext(), Ops);
  4065. }
  4066. std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
  4067. Triple T(TT);
  4068. // For AMDGPU we uprgrade older DataLayouts to include the default globals
  4069. // address space of 1.
  4070. if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
  4071. return DL.empty() ? std::string("G1") : (DL + "-G1").str();
  4072. }
  4073. std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
  4074. // If X86, and the datalayout matches the expected format, add pointer size
  4075. // address spaces to the datalayout.
  4076. if (!T.isX86() || DL.contains(AddrSpaces))
  4077. return std::string(DL);
  4078. SmallVector<StringRef, 4> Groups;
  4079. Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
  4080. if (!R.match(DL, &Groups))
  4081. return std::string(DL);
  4082. return (Groups[1] + AddrSpaces + Groups[3]).str();
  4083. }
  4084. void llvm::UpgradeAttributes(AttrBuilder &B) {
  4085. StringRef FramePointer;
  4086. if (B.contains("no-frame-pointer-elim")) {
  4087. // The value can be "true" or "false".
  4088. for (const auto &I : B.td_attrs())
  4089. if (I.first == "no-frame-pointer-elim")
  4090. FramePointer = I.second == "true" ? "all" : "none";
  4091. B.removeAttribute("no-frame-pointer-elim");
  4092. }
  4093. if (B.contains("no-frame-pointer-elim-non-leaf")) {
  4094. // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
  4095. if (FramePointer != "all")
  4096. FramePointer = "non-leaf";
  4097. B.removeAttribute("no-frame-pointer-elim-non-leaf");
  4098. }
  4099. if (!FramePointer.empty())
  4100. B.addAttribute("frame-pointer", FramePointer);
  4101. if (B.contains("null-pointer-is-valid")) {
  4102. // The value can be "true" or "false".
  4103. bool NullPointerIsValid = false;
  4104. for (const auto &I : B.td_attrs())
  4105. if (I.first == "null-pointer-is-valid")
  4106. NullPointerIsValid = I.second == "true";
  4107. B.removeAttribute("null-pointer-is-valid");
  4108. if (NullPointerIsValid)
  4109. B.addAttribute(Attribute::NullPointerIsValid);
  4110. }
  4111. }