AutoUpgrade.cpp 202 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630
  1. //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the auto-upgrade helper functions.
  10. // This is where deprecated IR intrinsics and other IR features are updated to
  11. // current specifications.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/IR/AutoUpgrade.h"
  15. #include "llvm/ADT/StringSwitch.h"
  16. #include "llvm/IR/Constants.h"
  17. #include "llvm/IR/DebugInfo.h"
  18. #include "llvm/IR/DiagnosticInfo.h"
  19. #include "llvm/IR/Function.h"
  20. #include "llvm/IR/IRBuilder.h"
  21. #include "llvm/IR/InstVisitor.h"
  22. #include "llvm/IR/Instruction.h"
  23. #include "llvm/IR/IntrinsicInst.h"
  24. #include "llvm/IR/Intrinsics.h"
  25. #include "llvm/IR/IntrinsicsAArch64.h"
  26. #include "llvm/IR/IntrinsicsARM.h"
  27. #include "llvm/IR/IntrinsicsX86.h"
  28. #include "llvm/IR/LLVMContext.h"
  29. #include "llvm/IR/Module.h"
  30. #include "llvm/IR/Verifier.h"
  31. #include "llvm/Support/ErrorHandling.h"
  32. #include "llvm/Support/Regex.h"
  33. #include <cstring>
  34. using namespace llvm;
  35. static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
  36. // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
  37. // changed their type from v4f32 to v2i64.
  38. static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
  39. Function *&NewFn) {
  40. // Check whether this is an old version of the function, which received
  41. // v4f32 arguments.
  42. Type *Arg0Type = F->getFunctionType()->getParamType(0);
  43. if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
  44. return false;
  45. // Yes, it's old, replace it with new version.
  46. rename(F);
  47. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  48. return true;
  49. }
  50. // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
  51. // arguments have changed their type from i32 to i8.
  52. static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
  53. Function *&NewFn) {
  54. // Check that the last argument is an i32.
  55. Type *LastArgType = F->getFunctionType()->getParamType(
  56. F->getFunctionType()->getNumParams() - 1);
  57. if (!LastArgType->isIntegerTy(32))
  58. return false;
  59. // Move this function aside and map down.
  60. rename(F);
  61. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  62. return true;
  63. }
  64. // Upgrade the declaration of fp compare intrinsics that change return type
  65. // from scalar to vXi1 mask.
  66. static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
  67. Function *&NewFn) {
  68. // Check if the return type is a vector.
  69. if (F->getReturnType()->isVectorTy())
  70. return false;
  71. rename(F);
  72. NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
  73. return true;
  74. }
  75. static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
  76. // All of the intrinsics matches below should be marked with which llvm
  77. // version started autoupgrading them. At some point in the future we would
  78. // like to use this information to remove upgrade code for some older
  79. // intrinsics. It is currently undecided how we will determine that future
  80. // point.
  81. if (Name == "addcarryx.u32" || // Added in 8.0
  82. Name == "addcarryx.u64" || // Added in 8.0
  83. Name == "addcarry.u32" || // Added in 8.0
  84. Name == "addcarry.u64" || // Added in 8.0
  85. Name == "subborrow.u32" || // Added in 8.0
  86. Name == "subborrow.u64" || // Added in 8.0
  87. Name.startswith("sse2.padds.") || // Added in 8.0
  88. Name.startswith("sse2.psubs.") || // Added in 8.0
  89. Name.startswith("sse2.paddus.") || // Added in 8.0
  90. Name.startswith("sse2.psubus.") || // Added in 8.0
  91. Name.startswith("avx2.padds.") || // Added in 8.0
  92. Name.startswith("avx2.psubs.") || // Added in 8.0
  93. Name.startswith("avx2.paddus.") || // Added in 8.0
  94. Name.startswith("avx2.psubus.") || // Added in 8.0
  95. Name.startswith("avx512.padds.") || // Added in 8.0
  96. Name.startswith("avx512.psubs.") || // Added in 8.0
  97. Name.startswith("avx512.mask.padds.") || // Added in 8.0
  98. Name.startswith("avx512.mask.psubs.") || // Added in 8.0
  99. Name.startswith("avx512.mask.paddus.") || // Added in 8.0
  100. Name.startswith("avx512.mask.psubus.") || // Added in 8.0
  101. Name=="ssse3.pabs.b.128" || // Added in 6.0
  102. Name=="ssse3.pabs.w.128" || // Added in 6.0
  103. Name=="ssse3.pabs.d.128" || // Added in 6.0
  104. Name.startswith("fma4.vfmadd.s") || // Added in 7.0
  105. Name.startswith("fma.vfmadd.") || // Added in 7.0
  106. Name.startswith("fma.vfmsub.") || // Added in 7.0
  107. Name.startswith("fma.vfmsubadd.") || // Added in 7.0
  108. Name.startswith("fma.vfnmadd.") || // Added in 7.0
  109. Name.startswith("fma.vfnmsub.") || // Added in 7.0
  110. Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
  111. Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
  112. Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
  113. Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
  114. Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
  115. Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
  116. Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
  117. Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
  118. Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
  119. Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
  120. Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
  121. Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
  122. Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
  123. Name.startswith("avx512.kunpck") || //added in 6.0
  124. Name.startswith("avx2.pabs.") || // Added in 6.0
  125. Name.startswith("avx512.mask.pabs.") || // Added in 6.0
  126. Name.startswith("avx512.broadcastm") || // Added in 6.0
  127. Name == "sse.sqrt.ss" || // Added in 7.0
  128. Name == "sse2.sqrt.sd" || // Added in 7.0
  129. Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
  130. Name.startswith("avx.sqrt.p") || // Added in 7.0
  131. Name.startswith("sse2.sqrt.p") || // Added in 7.0
  132. Name.startswith("sse.sqrt.p") || // Added in 7.0
  133. Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
  134. Name.startswith("sse2.pcmpeq.") || // Added in 3.1
  135. Name.startswith("sse2.pcmpgt.") || // Added in 3.1
  136. Name.startswith("avx2.pcmpeq.") || // Added in 3.1
  137. Name.startswith("avx2.pcmpgt.") || // Added in 3.1
  138. Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
  139. Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
  140. Name.startswith("avx.vperm2f128.") || // Added in 6.0
  141. Name == "avx2.vperm2i128" || // Added in 6.0
  142. Name == "sse.add.ss" || // Added in 4.0
  143. Name == "sse2.add.sd" || // Added in 4.0
  144. Name == "sse.sub.ss" || // Added in 4.0
  145. Name == "sse2.sub.sd" || // Added in 4.0
  146. Name == "sse.mul.ss" || // Added in 4.0
  147. Name == "sse2.mul.sd" || // Added in 4.0
  148. Name == "sse.div.ss" || // Added in 4.0
  149. Name == "sse2.div.sd" || // Added in 4.0
  150. Name == "sse41.pmaxsb" || // Added in 3.9
  151. Name == "sse2.pmaxs.w" || // Added in 3.9
  152. Name == "sse41.pmaxsd" || // Added in 3.9
  153. Name == "sse2.pmaxu.b" || // Added in 3.9
  154. Name == "sse41.pmaxuw" || // Added in 3.9
  155. Name == "sse41.pmaxud" || // Added in 3.9
  156. Name == "sse41.pminsb" || // Added in 3.9
  157. Name == "sse2.pmins.w" || // Added in 3.9
  158. Name == "sse41.pminsd" || // Added in 3.9
  159. Name == "sse2.pminu.b" || // Added in 3.9
  160. Name == "sse41.pminuw" || // Added in 3.9
  161. Name == "sse41.pminud" || // Added in 3.9
  162. Name == "avx512.kand.w" || // Added in 7.0
  163. Name == "avx512.kandn.w" || // Added in 7.0
  164. Name == "avx512.knot.w" || // Added in 7.0
  165. Name == "avx512.kor.w" || // Added in 7.0
  166. Name == "avx512.kxor.w" || // Added in 7.0
  167. Name == "avx512.kxnor.w" || // Added in 7.0
  168. Name == "avx512.kortestc.w" || // Added in 7.0
  169. Name == "avx512.kortestz.w" || // Added in 7.0
  170. Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
  171. Name.startswith("avx2.pmax") || // Added in 3.9
  172. Name.startswith("avx2.pmin") || // Added in 3.9
  173. Name.startswith("avx512.mask.pmax") || // Added in 4.0
  174. Name.startswith("avx512.mask.pmin") || // Added in 4.0
  175. Name.startswith("avx2.vbroadcast") || // Added in 3.8
  176. Name.startswith("avx2.pbroadcast") || // Added in 3.8
  177. Name.startswith("avx.vpermil.") || // Added in 3.1
  178. Name.startswith("sse2.pshuf") || // Added in 3.9
  179. Name.startswith("avx512.pbroadcast") || // Added in 3.9
  180. Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
  181. Name.startswith("avx512.mask.movddup") || // Added in 3.9
  182. Name.startswith("avx512.mask.movshdup") || // Added in 3.9
  183. Name.startswith("avx512.mask.movsldup") || // Added in 3.9
  184. Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
  185. Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
  186. Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
  187. Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
  188. Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
  189. Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
  190. Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
  191. Name.startswith("avx512.mask.punpckl") || // Added in 3.9
  192. Name.startswith("avx512.mask.punpckh") || // Added in 3.9
  193. Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
  194. Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
  195. Name.startswith("avx512.mask.pand.") || // Added in 3.9
  196. Name.startswith("avx512.mask.pandn.") || // Added in 3.9
  197. Name.startswith("avx512.mask.por.") || // Added in 3.9
  198. Name.startswith("avx512.mask.pxor.") || // Added in 3.9
  199. Name.startswith("avx512.mask.and.") || // Added in 3.9
  200. Name.startswith("avx512.mask.andn.") || // Added in 3.9
  201. Name.startswith("avx512.mask.or.") || // Added in 3.9
  202. Name.startswith("avx512.mask.xor.") || // Added in 3.9
  203. Name.startswith("avx512.mask.padd.") || // Added in 4.0
  204. Name.startswith("avx512.mask.psub.") || // Added in 4.0
  205. Name.startswith("avx512.mask.pmull.") || // Added in 4.0
  206. Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
  207. Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
  208. Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
  209. Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
  210. Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
  211. Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
  212. Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
  213. Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
  214. Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
  215. Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
  216. Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
  217. Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
  218. Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
  219. Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
  220. Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
  221. Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
  222. Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
  223. Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
  224. Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
  225. Name == "avx512.cvtusi2sd" || // Added in 7.0
  226. Name.startswith("avx512.mask.permvar.") || // Added in 7.0
  227. Name == "sse2.pmulu.dq" || // Added in 7.0
  228. Name == "sse41.pmuldq" || // Added in 7.0
  229. Name == "avx2.pmulu.dq" || // Added in 7.0
  230. Name == "avx2.pmul.dq" || // Added in 7.0
  231. Name == "avx512.pmulu.dq.512" || // Added in 7.0
  232. Name == "avx512.pmul.dq.512" || // Added in 7.0
  233. Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
  234. Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
  235. Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
  236. Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
  237. Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
  238. Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
  239. Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
  240. Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
  241. Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
  242. Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
  243. Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
  244. Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
  245. Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
  246. Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
  247. Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
  248. Name.startswith("avx512.cmp.p") || // Added in 12.0
  249. Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
  250. Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
  251. Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
  252. Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
  253. Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
  254. Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
  255. Name.startswith("avx512.mask.psll.d") || // Added in 4.0
  256. Name.startswith("avx512.mask.psll.q") || // Added in 4.0
  257. Name.startswith("avx512.mask.psll.w") || // Added in 4.0
  258. Name.startswith("avx512.mask.psra.d") || // Added in 4.0
  259. Name.startswith("avx512.mask.psra.q") || // Added in 4.0
  260. Name.startswith("avx512.mask.psra.w") || // Added in 4.0
  261. Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
  262. Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
  263. Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
  264. Name.startswith("avx512.mask.pslli") || // Added in 4.0
  265. Name.startswith("avx512.mask.psrai") || // Added in 4.0
  266. Name.startswith("avx512.mask.psrli") || // Added in 4.0
  267. Name.startswith("avx512.mask.psllv") || // Added in 4.0
  268. Name.startswith("avx512.mask.psrav") || // Added in 4.0
  269. Name.startswith("avx512.mask.psrlv") || // Added in 4.0
  270. Name.startswith("sse41.pmovsx") || // Added in 3.8
  271. Name.startswith("sse41.pmovzx") || // Added in 3.9
  272. Name.startswith("avx2.pmovsx") || // Added in 3.9
  273. Name.startswith("avx2.pmovzx") || // Added in 3.9
  274. Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
  275. Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
  276. Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
  277. Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
  278. Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
  279. Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
  280. Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
  281. Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
  282. Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
  283. Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
  284. Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
  285. Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
  286. Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
  287. Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
  288. Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
  289. Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
  290. Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
  291. Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
  292. Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
  293. Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
  294. Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
  295. Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
  296. Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
  297. Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
  298. Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
  299. Name.startswith("avx512.vpshld.") || // Added in 8.0
  300. Name.startswith("avx512.vpshrd.") || // Added in 8.0
  301. Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
  302. Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
  303. Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
  304. Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
  305. Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
  306. Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
  307. Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
  308. Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
  309. Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
  310. Name.startswith("avx512.mask.conflict.") || // Added in 9.0
  311. Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
  312. Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
  313. Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
  314. Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
  315. Name == "sse.cvtsi2ss" || // Added in 7.0
  316. Name == "sse.cvtsi642ss" || // Added in 7.0
  317. Name == "sse2.cvtsi2sd" || // Added in 7.0
  318. Name == "sse2.cvtsi642sd" || // Added in 7.0
  319. Name == "sse2.cvtss2sd" || // Added in 7.0
  320. Name == "sse2.cvtdq2pd" || // Added in 3.9
  321. Name == "sse2.cvtdq2ps" || // Added in 7.0
  322. Name == "sse2.cvtps2pd" || // Added in 3.9
  323. Name == "avx.cvtdq2.pd.256" || // Added in 3.9
  324. Name == "avx.cvtdq2.ps.256" || // Added in 7.0
  325. Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
  326. Name.startswith("vcvtph2ps.") || // Added in 11.0
  327. Name.startswith("avx.vinsertf128.") || // Added in 3.7
  328. Name == "avx2.vinserti128" || // Added in 3.7
  329. Name.startswith("avx512.mask.insert") || // Added in 4.0
  330. Name.startswith("avx.vextractf128.") || // Added in 3.7
  331. Name == "avx2.vextracti128" || // Added in 3.7
  332. Name.startswith("avx512.mask.vextract") || // Added in 4.0
  333. Name.startswith("sse4a.movnt.") || // Added in 3.9
  334. Name.startswith("avx.movnt.") || // Added in 3.2
  335. Name.startswith("avx512.storent.") || // Added in 3.9
  336. Name == "sse41.movntdqa" || // Added in 5.0
  337. Name == "avx2.movntdqa" || // Added in 5.0
  338. Name == "avx512.movntdqa" || // Added in 5.0
  339. Name == "sse2.storel.dq" || // Added in 3.9
  340. Name.startswith("sse.storeu.") || // Added in 3.9
  341. Name.startswith("sse2.storeu.") || // Added in 3.9
  342. Name.startswith("avx.storeu.") || // Added in 3.9
  343. Name.startswith("avx512.mask.storeu.") || // Added in 3.9
  344. Name.startswith("avx512.mask.store.p") || // Added in 3.9
  345. Name.startswith("avx512.mask.store.b.") || // Added in 3.9
  346. Name.startswith("avx512.mask.store.w.") || // Added in 3.9
  347. Name.startswith("avx512.mask.store.d.") || // Added in 3.9
  348. Name.startswith("avx512.mask.store.q.") || // Added in 3.9
  349. Name == "avx512.mask.store.ss" || // Added in 7.0
  350. Name.startswith("avx512.mask.loadu.") || // Added in 3.9
  351. Name.startswith("avx512.mask.load.") || // Added in 3.9
  352. Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
  353. Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
  354. Name.startswith("avx512.mask.expand.b") || // Added in 9.0
  355. Name.startswith("avx512.mask.expand.w") || // Added in 9.0
  356. Name.startswith("avx512.mask.expand.d") || // Added in 9.0
  357. Name.startswith("avx512.mask.expand.q") || // Added in 9.0
  358. Name.startswith("avx512.mask.expand.p") || // Added in 9.0
  359. Name.startswith("avx512.mask.compress.b") || // Added in 9.0
  360. Name.startswith("avx512.mask.compress.w") || // Added in 9.0
  361. Name.startswith("avx512.mask.compress.d") || // Added in 9.0
  362. Name.startswith("avx512.mask.compress.q") || // Added in 9.0
  363. Name.startswith("avx512.mask.compress.p") || // Added in 9.0
  364. Name == "sse42.crc32.64.8" || // Added in 3.4
  365. Name.startswith("avx.vbroadcast.s") || // Added in 3.5
  366. Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
  367. Name.startswith("avx512.mask.palignr.") || // Added in 3.9
  368. Name.startswith("avx512.mask.valign.") || // Added in 4.0
  369. Name.startswith("sse2.psll.dq") || // Added in 3.7
  370. Name.startswith("sse2.psrl.dq") || // Added in 3.7
  371. Name.startswith("avx2.psll.dq") || // Added in 3.7
  372. Name.startswith("avx2.psrl.dq") || // Added in 3.7
  373. Name.startswith("avx512.psll.dq") || // Added in 3.9
  374. Name.startswith("avx512.psrl.dq") || // Added in 3.9
  375. Name == "sse41.pblendw" || // Added in 3.7
  376. Name.startswith("sse41.blendp") || // Added in 3.7
  377. Name.startswith("avx.blend.p") || // Added in 3.7
  378. Name == "avx2.pblendw" || // Added in 3.7
  379. Name.startswith("avx2.pblendd.") || // Added in 3.7
  380. Name.startswith("avx.vbroadcastf128") || // Added in 4.0
  381. Name == "avx2.vbroadcasti128" || // Added in 3.7
  382. Name.startswith("avx512.mask.broadcastf32x4.") || // Added in 6.0
  383. Name.startswith("avx512.mask.broadcastf64x2.") || // Added in 6.0
  384. Name.startswith("avx512.mask.broadcastf32x8.") || // Added in 6.0
  385. Name.startswith("avx512.mask.broadcastf64x4.") || // Added in 6.0
  386. Name.startswith("avx512.mask.broadcasti32x4.") || // Added in 6.0
  387. Name.startswith("avx512.mask.broadcasti64x2.") || // Added in 6.0
  388. Name.startswith("avx512.mask.broadcasti32x8.") || // Added in 6.0
  389. Name.startswith("avx512.mask.broadcasti64x4.") || // Added in 6.0
  390. Name == "xop.vpcmov" || // Added in 3.8
  391. Name == "xop.vpcmov.256" || // Added in 5.0
  392. Name.startswith("avx512.mask.move.s") || // Added in 4.0
  393. Name.startswith("avx512.cvtmask2") || // Added in 5.0
  394. Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
  395. Name.startswith("xop.vprot") || // Added in 8.0
  396. Name.startswith("avx512.prol") || // Added in 8.0
  397. Name.startswith("avx512.pror") || // Added in 8.0
  398. Name.startswith("avx512.mask.prorv.") || // Added in 8.0
  399. Name.startswith("avx512.mask.pror.") || // Added in 8.0
  400. Name.startswith("avx512.mask.prolv.") || // Added in 8.0
  401. Name.startswith("avx512.mask.prol.") || // Added in 8.0
  402. Name.startswith("avx512.ptestm") || //Added in 6.0
  403. Name.startswith("avx512.ptestnm") || //Added in 6.0
  404. Name.startswith("avx512.mask.pavg")) // Added in 6.0
  405. return true;
  406. return false;
  407. }
  408. static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
  409. Function *&NewFn) {
  410. // Only handle intrinsics that start with "x86.".
  411. if (!Name.startswith("x86."))
  412. return false;
  413. // Remove "x86." prefix.
  414. Name = Name.substr(4);
  415. if (ShouldUpgradeX86Intrinsic(F, Name)) {
  416. NewFn = nullptr;
  417. return true;
  418. }
  419. if (Name == "rdtscp") { // Added in 8.0
  420. // If this intrinsic has 0 operands, it's the new version.
  421. if (F->getFunctionType()->getNumParams() == 0)
  422. return false;
  423. rename(F);
  424. NewFn = Intrinsic::getDeclaration(F->getParent(),
  425. Intrinsic::x86_rdtscp);
  426. return true;
  427. }
  428. // SSE4.1 ptest functions may have an old signature.
  429. if (Name.startswith("sse41.ptest")) { // Added in 3.2
  430. if (Name.substr(11) == "c")
  431. return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
  432. if (Name.substr(11) == "z")
  433. return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
  434. if (Name.substr(11) == "nzc")
  435. return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
  436. }
  437. // Several blend and other instructions with masks used the wrong number of
  438. // bits.
  439. if (Name == "sse41.insertps") // Added in 3.6
  440. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
  441. NewFn);
  442. if (Name == "sse41.dppd") // Added in 3.6
  443. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
  444. NewFn);
  445. if (Name == "sse41.dpps") // Added in 3.6
  446. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
  447. NewFn);
  448. if (Name == "sse41.mpsadbw") // Added in 3.6
  449. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
  450. NewFn);
  451. if (Name == "avx.dp.ps.256") // Added in 3.6
  452. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
  453. NewFn);
  454. if (Name == "avx2.mpsadbw") // Added in 3.6
  455. return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
  456. NewFn);
  457. if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
  458. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
  459. NewFn);
  460. if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
  461. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
  462. NewFn);
  463. if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
  464. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
  465. NewFn);
  466. if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
  467. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
  468. NewFn);
  469. if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
  470. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
  471. NewFn);
  472. if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
  473. return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
  474. NewFn);
  475. // frcz.ss/sd may need to have an argument dropped. Added in 3.2
  476. if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
  477. rename(F);
  478. NewFn = Intrinsic::getDeclaration(F->getParent(),
  479. Intrinsic::x86_xop_vfrcz_ss);
  480. return true;
  481. }
  482. if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
  483. rename(F);
  484. NewFn = Intrinsic::getDeclaration(F->getParent(),
  485. Intrinsic::x86_xop_vfrcz_sd);
  486. return true;
  487. }
  488. // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
  489. if (Name.startswith("xop.vpermil2")) { // Added in 3.9
  490. auto Idx = F->getFunctionType()->getParamType(2);
  491. if (Idx->isFPOrFPVectorTy()) {
  492. rename(F);
  493. unsigned IdxSize = Idx->getPrimitiveSizeInBits();
  494. unsigned EltSize = Idx->getScalarSizeInBits();
  495. Intrinsic::ID Permil2ID;
  496. if (EltSize == 64 && IdxSize == 128)
  497. Permil2ID = Intrinsic::x86_xop_vpermil2pd;
  498. else if (EltSize == 32 && IdxSize == 128)
  499. Permil2ID = Intrinsic::x86_xop_vpermil2ps;
  500. else if (EltSize == 64 && IdxSize == 256)
  501. Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
  502. else
  503. Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
  504. NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
  505. return true;
  506. }
  507. }
  508. if (Name == "seh.recoverfp") {
  509. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
  510. return true;
  511. }
  512. return false;
  513. }
  514. static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
  515. assert(F && "Illegal to upgrade a non-existent Function.");
  516. // Quickly eliminate it, if it's not a candidate.
  517. StringRef Name = F->getName();
  518. if (Name.size() <= 8 || !Name.startswith("llvm."))
  519. return false;
  520. Name = Name.substr(5); // Strip off "llvm."
  521. switch (Name[0]) {
  522. default: break;
  523. case 'a': {
  524. if (Name.startswith("arm.rbit") || Name.startswith("aarch64.rbit")) {
  525. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
  526. F->arg_begin()->getType());
  527. return true;
  528. }
  529. if (Name.startswith("aarch64.neon.frintn")) {
  530. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
  531. F->arg_begin()->getType());
  532. return true;
  533. }
  534. if (Name.startswith("aarch64.neon.rbit")) {
  535. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
  536. F->arg_begin()->getType());
  537. return true;
  538. }
  539. if (Name.startswith("arm.neon.vclz")) {
  540. Type* args[2] = {
  541. F->arg_begin()->getType(),
  542. Type::getInt1Ty(F->getContext())
  543. };
  544. // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
  545. // the end of the name. Change name from llvm.arm.neon.vclz.* to
  546. // llvm.ctlz.*
  547. FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
  548. NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
  549. "llvm.ctlz." + Name.substr(14), F->getParent());
  550. return true;
  551. }
  552. if (Name.startswith("arm.neon.vcnt")) {
  553. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
  554. F->arg_begin()->getType());
  555. return true;
  556. }
  557. static const Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
  558. if (vldRegex.match(Name)) {
  559. auto fArgs = F->getFunctionType()->params();
  560. SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
  561. // Can't use Intrinsic::getDeclaration here as the return types might
  562. // then only be structurally equal.
  563. FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
  564. StringRef Suffix =
  565. F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
  566. NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
  567. "llvm." + Name + "." + Suffix, F->getParent());
  568. return true;
  569. }
  570. static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
  571. if (vstRegex.match(Name)) {
  572. static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
  573. Intrinsic::arm_neon_vst2,
  574. Intrinsic::arm_neon_vst3,
  575. Intrinsic::arm_neon_vst4};
  576. static const Intrinsic::ID StoreLaneInts[] = {
  577. Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
  578. Intrinsic::arm_neon_vst4lane
  579. };
  580. auto fArgs = F->getFunctionType()->params();
  581. Type *Tys[] = {fArgs[0], fArgs[1]};
  582. if (!Name.contains("lane"))
  583. NewFn = Intrinsic::getDeclaration(F->getParent(),
  584. StoreInts[fArgs.size() - 3], Tys);
  585. else
  586. NewFn = Intrinsic::getDeclaration(F->getParent(),
  587. StoreLaneInts[fArgs.size() - 5], Tys);
  588. return true;
  589. }
  590. if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
  591. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
  592. return true;
  593. }
  594. if (Name.startswith("arm.neon.vqadds.")) {
  595. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
  596. F->arg_begin()->getType());
  597. return true;
  598. }
  599. if (Name.startswith("arm.neon.vqaddu.")) {
  600. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
  601. F->arg_begin()->getType());
  602. return true;
  603. }
  604. if (Name.startswith("arm.neon.vqsubs.")) {
  605. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
  606. F->arg_begin()->getType());
  607. return true;
  608. }
  609. if (Name.startswith("arm.neon.vqsubu.")) {
  610. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
  611. F->arg_begin()->getType());
  612. return true;
  613. }
  614. if (Name.startswith("aarch64.neon.addp")) {
  615. if (F->arg_size() != 2)
  616. break; // Invalid IR.
  617. VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
  618. if (Ty && Ty->getElementType()->isFloatingPointTy()) {
  619. NewFn = Intrinsic::getDeclaration(F->getParent(),
  620. Intrinsic::aarch64_neon_faddp, Ty);
  621. return true;
  622. }
  623. }
  624. // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
  625. // respectively
  626. if ((Name.startswith("arm.neon.bfdot.") ||
  627. Name.startswith("aarch64.neon.bfdot.")) &&
  628. Name.endswith("i8")) {
  629. Intrinsic::ID IID =
  630. StringSwitch<Intrinsic::ID>(Name)
  631. .Cases("arm.neon.bfdot.v2f32.v8i8",
  632. "arm.neon.bfdot.v4f32.v16i8",
  633. Intrinsic::arm_neon_bfdot)
  634. .Cases("aarch64.neon.bfdot.v2f32.v8i8",
  635. "aarch64.neon.bfdot.v4f32.v16i8",
  636. Intrinsic::aarch64_neon_bfdot)
  637. .Default(Intrinsic::not_intrinsic);
  638. if (IID == Intrinsic::not_intrinsic)
  639. break;
  640. size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
  641. assert((OperandWidth == 64 || OperandWidth == 128) &&
  642. "Unexpected operand width");
  643. LLVMContext &Ctx = F->getParent()->getContext();
  644. std::array<Type *, 2> Tys {{
  645. F->getReturnType(),
  646. FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
  647. }};
  648. NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
  649. return true;
  650. }
  651. // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
  652. // and accept v8bf16 instead of v16i8
  653. if ((Name.startswith("arm.neon.bfm") ||
  654. Name.startswith("aarch64.neon.bfm")) &&
  655. Name.endswith(".v4f32.v16i8")) {
  656. Intrinsic::ID IID =
  657. StringSwitch<Intrinsic::ID>(Name)
  658. .Case("arm.neon.bfmmla.v4f32.v16i8",
  659. Intrinsic::arm_neon_bfmmla)
  660. .Case("arm.neon.bfmlalb.v4f32.v16i8",
  661. Intrinsic::arm_neon_bfmlalb)
  662. .Case("arm.neon.bfmlalt.v4f32.v16i8",
  663. Intrinsic::arm_neon_bfmlalt)
  664. .Case("aarch64.neon.bfmmla.v4f32.v16i8",
  665. Intrinsic::aarch64_neon_bfmmla)
  666. .Case("aarch64.neon.bfmlalb.v4f32.v16i8",
  667. Intrinsic::aarch64_neon_bfmlalb)
  668. .Case("aarch64.neon.bfmlalt.v4f32.v16i8",
  669. Intrinsic::aarch64_neon_bfmlalt)
  670. .Default(Intrinsic::not_intrinsic);
  671. if (IID == Intrinsic::not_intrinsic)
  672. break;
  673. std::array<Type *, 0> Tys;
  674. NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
  675. return true;
  676. }
  677. if (Name == "arm.mve.vctp64" &&
  678. cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
  679. // A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
  680. // function and deal with it below in UpgradeIntrinsicCall.
  681. rename(F);
  682. return true;
  683. }
  684. // These too are changed to accept a v2i1 insteead of the old v4i1.
  685. if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
  686. Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
  687. Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
  688. Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
  689. Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
  690. Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
  691. Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
  692. Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
  693. Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
  694. Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
  695. Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
  696. Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" ||
  697. Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" ||
  698. Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
  699. return true;
  700. if (Name == "amdgcn.alignbit") {
  701. // Target specific intrinsic became redundant
  702. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
  703. {F->getReturnType()});
  704. return true;
  705. }
  706. break;
  707. }
  708. case 'c': {
  709. if (Name.startswith("ctlz.") && F->arg_size() == 1) {
  710. rename(F);
  711. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
  712. F->arg_begin()->getType());
  713. return true;
  714. }
  715. if (Name.startswith("cttz.") && F->arg_size() == 1) {
  716. rename(F);
  717. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
  718. F->arg_begin()->getType());
  719. return true;
  720. }
  721. break;
  722. }
  723. case 'd': {
  724. if (Name == "dbg.value" && F->arg_size() == 4) {
  725. rename(F);
  726. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
  727. return true;
  728. }
  729. break;
  730. }
  731. case 'e': {
  732. SmallVector<StringRef, 2> Groups;
  733. static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
  734. if (R.match(Name, &Groups)) {
  735. Intrinsic::ID ID;
  736. ID = StringSwitch<Intrinsic::ID>(Groups[1])
  737. .Case("add", Intrinsic::vector_reduce_add)
  738. .Case("mul", Intrinsic::vector_reduce_mul)
  739. .Case("and", Intrinsic::vector_reduce_and)
  740. .Case("or", Intrinsic::vector_reduce_or)
  741. .Case("xor", Intrinsic::vector_reduce_xor)
  742. .Case("smax", Intrinsic::vector_reduce_smax)
  743. .Case("smin", Intrinsic::vector_reduce_smin)
  744. .Case("umax", Intrinsic::vector_reduce_umax)
  745. .Case("umin", Intrinsic::vector_reduce_umin)
  746. .Case("fmax", Intrinsic::vector_reduce_fmax)
  747. .Case("fmin", Intrinsic::vector_reduce_fmin)
  748. .Default(Intrinsic::not_intrinsic);
  749. if (ID != Intrinsic::not_intrinsic) {
  750. rename(F);
  751. auto Args = F->getFunctionType()->params();
  752. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
  753. return true;
  754. }
  755. }
  756. static const Regex R2(
  757. "^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
  758. Groups.clear();
  759. if (R2.match(Name, &Groups)) {
  760. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  761. if (Groups[1] == "fadd")
  762. ID = Intrinsic::vector_reduce_fadd;
  763. if (Groups[1] == "fmul")
  764. ID = Intrinsic::vector_reduce_fmul;
  765. if (ID != Intrinsic::not_intrinsic) {
  766. rename(F);
  767. auto Args = F->getFunctionType()->params();
  768. Type *Tys[] = {Args[1]};
  769. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
  770. return true;
  771. }
  772. }
  773. break;
  774. }
  775. case 'i':
  776. case 'l': {
  777. bool IsLifetimeStart = Name.startswith("lifetime.start");
  778. if (IsLifetimeStart || Name.startswith("invariant.start")) {
  779. Intrinsic::ID ID = IsLifetimeStart ?
  780. Intrinsic::lifetime_start : Intrinsic::invariant_start;
  781. auto Args = F->getFunctionType()->params();
  782. Type* ObjectPtr[1] = {Args[1]};
  783. if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
  784. rename(F);
  785. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
  786. return true;
  787. }
  788. }
  789. bool IsLifetimeEnd = Name.startswith("lifetime.end");
  790. if (IsLifetimeEnd || Name.startswith("invariant.end")) {
  791. Intrinsic::ID ID = IsLifetimeEnd ?
  792. Intrinsic::lifetime_end : Intrinsic::invariant_end;
  793. auto Args = F->getFunctionType()->params();
  794. Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
  795. if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
  796. rename(F);
  797. NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
  798. return true;
  799. }
  800. }
  801. if (Name.startswith("invariant.group.barrier")) {
  802. // Rename invariant.group.barrier to launder.invariant.group
  803. auto Args = F->getFunctionType()->params();
  804. Type* ObjectPtr[1] = {Args[0]};
  805. rename(F);
  806. NewFn = Intrinsic::getDeclaration(F->getParent(),
  807. Intrinsic::launder_invariant_group, ObjectPtr);
  808. return true;
  809. }
  810. break;
  811. }
  812. case 'm': {
  813. if (Name.startswith("masked.load.")) {
  814. Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
  815. if (F->getName() !=
  816. Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
  817. rename(F);
  818. NewFn = Intrinsic::getDeclaration(F->getParent(),
  819. Intrinsic::masked_load,
  820. Tys);
  821. return true;
  822. }
  823. }
  824. if (Name.startswith("masked.store.")) {
  825. auto Args = F->getFunctionType()->params();
  826. Type *Tys[] = { Args[0], Args[1] };
  827. if (F->getName() !=
  828. Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
  829. rename(F);
  830. NewFn = Intrinsic::getDeclaration(F->getParent(),
  831. Intrinsic::masked_store,
  832. Tys);
  833. return true;
  834. }
  835. }
  836. // Renaming gather/scatter intrinsics with no address space overloading
  837. // to the new overload which includes an address space
  838. if (Name.startswith("masked.gather.")) {
  839. Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
  840. if (F->getName() !=
  841. Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
  842. rename(F);
  843. NewFn = Intrinsic::getDeclaration(F->getParent(),
  844. Intrinsic::masked_gather, Tys);
  845. return true;
  846. }
  847. }
  848. if (Name.startswith("masked.scatter.")) {
  849. auto Args = F->getFunctionType()->params();
  850. Type *Tys[] = {Args[0], Args[1]};
  851. if (F->getName() !=
  852. Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
  853. rename(F);
  854. NewFn = Intrinsic::getDeclaration(F->getParent(),
  855. Intrinsic::masked_scatter, Tys);
  856. return true;
  857. }
  858. }
  859. // Updating the memory intrinsics (memcpy/memmove/memset) that have an
  860. // alignment parameter to embedding the alignment as an attribute of
  861. // the pointer args.
  862. if (Name.startswith("memcpy.") && F->arg_size() == 5) {
  863. rename(F);
  864. // Get the types of dest, src, and len
  865. ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
  866. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
  867. ParamTypes);
  868. return true;
  869. }
  870. if (Name.startswith("memmove.") && F->arg_size() == 5) {
  871. rename(F);
  872. // Get the types of dest, src, and len
  873. ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
  874. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
  875. ParamTypes);
  876. return true;
  877. }
  878. if (Name.startswith("memset.") && F->arg_size() == 5) {
  879. rename(F);
  880. // Get the types of dest, and len
  881. const auto *FT = F->getFunctionType();
  882. Type *ParamTypes[2] = {
  883. FT->getParamType(0), // Dest
  884. FT->getParamType(2) // len
  885. };
  886. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
  887. ParamTypes);
  888. return true;
  889. }
  890. break;
  891. }
  892. case 'n': {
  893. if (Name.startswith("nvvm.")) {
  894. Name = Name.substr(5);
  895. // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
  896. Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
  897. .Cases("brev32", "brev64", Intrinsic::bitreverse)
  898. .Case("clz.i", Intrinsic::ctlz)
  899. .Case("popc.i", Intrinsic::ctpop)
  900. .Default(Intrinsic::not_intrinsic);
  901. if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
  902. NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
  903. {F->getReturnType()});
  904. return true;
  905. }
  906. // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
  907. // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
  908. //
  909. // TODO: We could add lohi.i2d.
  910. bool Expand = StringSwitch<bool>(Name)
  911. .Cases("abs.i", "abs.ll", true)
  912. .Cases("clz.ll", "popc.ll", "h2f", true)
  913. .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
  914. .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
  915. .StartsWith("atomic.load.add.f32.p", true)
  916. .StartsWith("atomic.load.add.f64.p", true)
  917. .Default(false);
  918. if (Expand) {
  919. NewFn = nullptr;
  920. return true;
  921. }
  922. }
  923. break;
  924. }
  925. case 'o':
  926. // We only need to change the name to match the mangling including the
  927. // address space.
  928. if (Name.startswith("objectsize.")) {
  929. Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
  930. if (F->arg_size() == 2 || F->arg_size() == 3 ||
  931. F->getName() !=
  932. Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
  933. rename(F);
  934. NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
  935. Tys);
  936. return true;
  937. }
  938. }
  939. break;
  940. case 'p':
  941. if (Name == "prefetch") {
  942. // Handle address space overloading.
  943. Type *Tys[] = {F->arg_begin()->getType()};
  944. if (F->getName() !=
  945. Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
  946. rename(F);
  947. NewFn =
  948. Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
  949. return true;
  950. }
  951. } else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
  952. rename(F);
  953. NewFn = Intrinsic::getDeclaration(F->getParent(),
  954. Intrinsic::ptr_annotation,
  955. F->arg_begin()->getType());
  956. return true;
  957. }
  958. break;
  959. case 's':
  960. if (Name == "stackprotectorcheck") {
  961. NewFn = nullptr;
  962. return true;
  963. }
  964. break;
  965. case 'v': {
  966. if (Name == "var.annotation" && F->arg_size() == 4) {
  967. rename(F);
  968. NewFn = Intrinsic::getDeclaration(F->getParent(),
  969. Intrinsic::var_annotation);
  970. return true;
  971. }
  972. break;
  973. }
  974. case 'x':
  975. if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
  976. return true;
  977. }
  978. // Remangle our intrinsic since we upgrade the mangling
  979. auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
  980. if (Result != None) {
  981. NewFn = Result.getValue();
  982. return true;
  983. }
  984. // This may not belong here. This function is effectively being overloaded
  985. // to both detect an intrinsic which needs upgrading, and to provide the
  986. // upgraded form of the intrinsic. We should perhaps have two separate
  987. // functions for this.
  988. return false;
  989. }
  990. bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
  991. NewFn = nullptr;
  992. bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
  993. assert(F != NewFn && "Intrinsic function upgraded to the same function");
  994. // Upgrade intrinsic attributes. This does not change the function.
  995. if (NewFn)
  996. F = NewFn;
  997. if (Intrinsic::ID id = F->getIntrinsicID())
  998. F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
  999. return Upgraded;
  1000. }
  1001. GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
  1002. if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
  1003. GV->getName() == "llvm.global_dtors")) ||
  1004. !GV->hasInitializer())
  1005. return nullptr;
  1006. ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
  1007. if (!ATy)
  1008. return nullptr;
  1009. StructType *STy = dyn_cast<StructType>(ATy->getElementType());
  1010. if (!STy || STy->getNumElements() != 2)
  1011. return nullptr;
  1012. LLVMContext &C = GV->getContext();
  1013. IRBuilder<> IRB(C);
  1014. auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
  1015. IRB.getInt8PtrTy());
  1016. Constant *Init = GV->getInitializer();
  1017. unsigned N = Init->getNumOperands();
  1018. std::vector<Constant *> NewCtors(N);
  1019. for (unsigned i = 0; i != N; ++i) {
  1020. auto Ctor = cast<Constant>(Init->getOperand(i));
  1021. NewCtors[i] = ConstantStruct::get(
  1022. EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
  1023. Constant::getNullValue(IRB.getInt8PtrTy()));
  1024. }
  1025. Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
  1026. return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
  1027. NewInit, GV->getName());
  1028. }
  1029. // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
  1030. // to byte shuffles.
  1031. static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
  1032. Value *Op, unsigned Shift) {
  1033. auto *ResultTy = cast<FixedVectorType>(Op->getType());
  1034. unsigned NumElts = ResultTy->getNumElements() * 8;
  1035. // Bitcast from a 64-bit element type to a byte element type.
  1036. Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
  1037. Op = Builder.CreateBitCast(Op, VecTy, "cast");
  1038. // We'll be shuffling in zeroes.
  1039. Value *Res = Constant::getNullValue(VecTy);
  1040. // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  1041. // we'll just return the zero vector.
  1042. if (Shift < 16) {
  1043. int Idxs[64];
  1044. // 256/512-bit version is split into 2/4 16-byte lanes.
  1045. for (unsigned l = 0; l != NumElts; l += 16)
  1046. for (unsigned i = 0; i != 16; ++i) {
  1047. unsigned Idx = NumElts + i - Shift;
  1048. if (Idx < NumElts)
  1049. Idx -= NumElts - 16; // end of lane, switch operand.
  1050. Idxs[l + i] = Idx + l;
  1051. }
  1052. Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
  1053. }
  1054. // Bitcast back to a 64-bit element type.
  1055. return Builder.CreateBitCast(Res, ResultTy, "cast");
  1056. }
  1057. // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
  1058. // to byte shuffles.
  1059. static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,
  1060. unsigned Shift) {
  1061. auto *ResultTy = cast<FixedVectorType>(Op->getType());
  1062. unsigned NumElts = ResultTy->getNumElements() * 8;
  1063. // Bitcast from a 64-bit element type to a byte element type.
  1064. Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
  1065. Op = Builder.CreateBitCast(Op, VecTy, "cast");
  1066. // We'll be shuffling in zeroes.
  1067. Value *Res = Constant::getNullValue(VecTy);
  1068. // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
  1069. // we'll just return the zero vector.
  1070. if (Shift < 16) {
  1071. int Idxs[64];
  1072. // 256/512-bit version is split into 2/4 16-byte lanes.
  1073. for (unsigned l = 0; l != NumElts; l += 16)
  1074. for (unsigned i = 0; i != 16; ++i) {
  1075. unsigned Idx = i + Shift;
  1076. if (Idx >= 16)
  1077. Idx += NumElts - 16; // end of lane, switch operand.
  1078. Idxs[l + i] = Idx + l;
  1079. }
  1080. Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
  1081. }
  1082. // Bitcast back to a 64-bit element type.
  1083. return Builder.CreateBitCast(Res, ResultTy, "cast");
  1084. }
  1085. static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
  1086. unsigned NumElts) {
  1087. assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
  1088. llvm::VectorType *MaskTy = FixedVectorType::get(
  1089. Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
  1090. Mask = Builder.CreateBitCast(Mask, MaskTy);
  1091. // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
  1092. // i8 and we need to extract down to the right number of elements.
  1093. if (NumElts <= 4) {
  1094. int Indices[4];
  1095. for (unsigned i = 0; i != NumElts; ++i)
  1096. Indices[i] = i;
  1097. Mask = Builder.CreateShuffleVector(
  1098. Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
  1099. }
  1100. return Mask;
  1101. }
  1102. static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
  1103. Value *Op0, Value *Op1) {
  1104. // If the mask is all ones just emit the first operation.
  1105. if (const auto *C = dyn_cast<Constant>(Mask))
  1106. if (C->isAllOnesValue())
  1107. return Op0;
  1108. Mask = getX86MaskVec(Builder, Mask,
  1109. cast<FixedVectorType>(Op0->getType())->getNumElements());
  1110. return Builder.CreateSelect(Mask, Op0, Op1);
  1111. }
  1112. static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
  1113. Value *Op0, Value *Op1) {
  1114. // If the mask is all ones just emit the first operation.
  1115. if (const auto *C = dyn_cast<Constant>(Mask))
  1116. if (C->isAllOnesValue())
  1117. return Op0;
  1118. auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
  1119. Mask->getType()->getIntegerBitWidth());
  1120. Mask = Builder.CreateBitCast(Mask, MaskTy);
  1121. Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
  1122. return Builder.CreateSelect(Mask, Op0, Op1);
  1123. }
  1124. // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
  1125. // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
  1126. // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
  1127. static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
  1128. Value *Op1, Value *Shift,
  1129. Value *Passthru, Value *Mask,
  1130. bool IsVALIGN) {
  1131. unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
  1132. unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
  1133. assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
  1134. assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
  1135. assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
  1136. // Mask the immediate for VALIGN.
  1137. if (IsVALIGN)
  1138. ShiftVal &= (NumElts - 1);
  1139. // If palignr is shifting the pair of vectors more than the size of two
  1140. // lanes, emit zero.
  1141. if (ShiftVal >= 32)
  1142. return llvm::Constant::getNullValue(Op0->getType());
  1143. // If palignr is shifting the pair of input vectors more than one lane,
  1144. // but less than two lanes, convert to shifting in zeroes.
  1145. if (ShiftVal > 16) {
  1146. ShiftVal -= 16;
  1147. Op1 = Op0;
  1148. Op0 = llvm::Constant::getNullValue(Op0->getType());
  1149. }
  1150. int Indices[64];
  1151. // 256-bit palignr operates on 128-bit lanes so we need to handle that
  1152. for (unsigned l = 0; l < NumElts; l += 16) {
  1153. for (unsigned i = 0; i != 16; ++i) {
  1154. unsigned Idx = ShiftVal + i;
  1155. if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
  1156. Idx += NumElts - 16; // End of lane, switch operand.
  1157. Indices[l + i] = Idx + l;
  1158. }
  1159. }
  1160. Value *Align = Builder.CreateShuffleVector(Op1, Op0,
  1161. makeArrayRef(Indices, NumElts),
  1162. "palignr");
  1163. return EmitX86Select(Builder, Mask, Align, Passthru);
  1164. }
  1165. static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
  1166. bool ZeroMask, bool IndexForm) {
  1167. Type *Ty = CI.getType();
  1168. unsigned VecWidth = Ty->getPrimitiveSizeInBits();
  1169. unsigned EltWidth = Ty->getScalarSizeInBits();
  1170. bool IsFloat = Ty->isFPOrFPVectorTy();
  1171. Intrinsic::ID IID;
  1172. if (VecWidth == 128 && EltWidth == 32 && IsFloat)
  1173. IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
  1174. else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
  1175. IID = Intrinsic::x86_avx512_vpermi2var_d_128;
  1176. else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
  1177. IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
  1178. else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
  1179. IID = Intrinsic::x86_avx512_vpermi2var_q_128;
  1180. else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
  1181. IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
  1182. else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
  1183. IID = Intrinsic::x86_avx512_vpermi2var_d_256;
  1184. else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
  1185. IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
  1186. else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
  1187. IID = Intrinsic::x86_avx512_vpermi2var_q_256;
  1188. else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
  1189. IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
  1190. else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
  1191. IID = Intrinsic::x86_avx512_vpermi2var_d_512;
  1192. else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
  1193. IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
  1194. else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
  1195. IID = Intrinsic::x86_avx512_vpermi2var_q_512;
  1196. else if (VecWidth == 128 && EltWidth == 16)
  1197. IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
  1198. else if (VecWidth == 256 && EltWidth == 16)
  1199. IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
  1200. else if (VecWidth == 512 && EltWidth == 16)
  1201. IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
  1202. else if (VecWidth == 128 && EltWidth == 8)
  1203. IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
  1204. else if (VecWidth == 256 && EltWidth == 8)
  1205. IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
  1206. else if (VecWidth == 512 && EltWidth == 8)
  1207. IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
  1208. else
  1209. llvm_unreachable("Unexpected intrinsic");
  1210. Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
  1211. CI.getArgOperand(2) };
  1212. // If this isn't index form we need to swap operand 0 and 1.
  1213. if (!IndexForm)
  1214. std::swap(Args[0], Args[1]);
  1215. Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
  1216. Args);
  1217. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
  1218. : Builder.CreateBitCast(CI.getArgOperand(1),
  1219. Ty);
  1220. return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
  1221. }
  1222. static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
  1223. Intrinsic::ID IID) {
  1224. Type *Ty = CI.getType();
  1225. Value *Op0 = CI.getOperand(0);
  1226. Value *Op1 = CI.getOperand(1);
  1227. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
  1228. Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
  1229. if (CI.arg_size() == 4) { // For masked intrinsics.
  1230. Value *VecSrc = CI.getOperand(2);
  1231. Value *Mask = CI.getOperand(3);
  1232. Res = EmitX86Select(Builder, Mask, Res, VecSrc);
  1233. }
  1234. return Res;
  1235. }
  1236. static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
  1237. bool IsRotateRight) {
  1238. Type *Ty = CI.getType();
  1239. Value *Src = CI.getArgOperand(0);
  1240. Value *Amt = CI.getArgOperand(1);
  1241. // Amount may be scalar immediate, in which case create a splat vector.
  1242. // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
  1243. // we only care about the lowest log2 bits anyway.
  1244. if (Amt->getType() != Ty) {
  1245. unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
  1246. Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
  1247. Amt = Builder.CreateVectorSplat(NumElts, Amt);
  1248. }
  1249. Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
  1250. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
  1251. Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
  1252. if (CI.arg_size() == 4) { // For masked intrinsics.
  1253. Value *VecSrc = CI.getOperand(2);
  1254. Value *Mask = CI.getOperand(3);
  1255. Res = EmitX86Select(Builder, Mask, Res, VecSrc);
  1256. }
  1257. return Res;
  1258. }
  1259. static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
  1260. bool IsSigned) {
  1261. Type *Ty = CI.getType();
  1262. Value *LHS = CI.getArgOperand(0);
  1263. Value *RHS = CI.getArgOperand(1);
  1264. CmpInst::Predicate Pred;
  1265. switch (Imm) {
  1266. case 0x0:
  1267. Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
  1268. break;
  1269. case 0x1:
  1270. Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
  1271. break;
  1272. case 0x2:
  1273. Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
  1274. break;
  1275. case 0x3:
  1276. Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
  1277. break;
  1278. case 0x4:
  1279. Pred = ICmpInst::ICMP_EQ;
  1280. break;
  1281. case 0x5:
  1282. Pred = ICmpInst::ICMP_NE;
  1283. break;
  1284. case 0x6:
  1285. return Constant::getNullValue(Ty); // FALSE
  1286. case 0x7:
  1287. return Constant::getAllOnesValue(Ty); // TRUE
  1288. default:
  1289. llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
  1290. }
  1291. Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
  1292. Value *Ext = Builder.CreateSExt(Cmp, Ty);
  1293. return Ext;
  1294. }
  1295. static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
  1296. bool IsShiftRight, bool ZeroMask) {
  1297. Type *Ty = CI.getType();
  1298. Value *Op0 = CI.getArgOperand(0);
  1299. Value *Op1 = CI.getArgOperand(1);
  1300. Value *Amt = CI.getArgOperand(2);
  1301. if (IsShiftRight)
  1302. std::swap(Op0, Op1);
  1303. // Amount may be scalar immediate, in which case create a splat vector.
  1304. // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
  1305. // we only care about the lowest log2 bits anyway.
  1306. if (Amt->getType() != Ty) {
  1307. unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
  1308. Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
  1309. Amt = Builder.CreateVectorSplat(NumElts, Amt);
  1310. }
  1311. Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
  1312. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
  1313. Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
  1314. unsigned NumArgs = CI.arg_size();
  1315. if (NumArgs >= 4) { // For masked intrinsics.
  1316. Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
  1317. ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
  1318. CI.getArgOperand(0);
  1319. Value *Mask = CI.getOperand(NumArgs - 1);
  1320. Res = EmitX86Select(Builder, Mask, Res, VecSrc);
  1321. }
  1322. return Res;
  1323. }
  1324. static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
  1325. Value *Ptr, Value *Data, Value *Mask,
  1326. bool Aligned) {
  1327. // Cast the pointer to the right type.
  1328. Ptr = Builder.CreateBitCast(Ptr,
  1329. llvm::PointerType::getUnqual(Data->getType()));
  1330. const Align Alignment =
  1331. Aligned
  1332. ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
  1333. : Align(1);
  1334. // If the mask is all ones just emit a regular store.
  1335. if (const auto *C = dyn_cast<Constant>(Mask))
  1336. if (C->isAllOnesValue())
  1337. return Builder.CreateAlignedStore(Data, Ptr, Alignment);
  1338. // Convert the mask from an integer type to a vector of i1.
  1339. unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
  1340. Mask = getX86MaskVec(Builder, Mask, NumElts);
  1341. return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
  1342. }
  1343. static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
  1344. Value *Ptr, Value *Passthru, Value *Mask,
  1345. bool Aligned) {
  1346. Type *ValTy = Passthru->getType();
  1347. // Cast the pointer to the right type.
  1348. Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
  1349. const Align Alignment =
  1350. Aligned
  1351. ? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
  1352. 8)
  1353. : Align(1);
  1354. // If the mask is all ones just emit a regular store.
  1355. if (const auto *C = dyn_cast<Constant>(Mask))
  1356. if (C->isAllOnesValue())
  1357. return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
  1358. // Convert the mask from an integer type to a vector of i1.
  1359. unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
  1360. Mask = getX86MaskVec(Builder, Mask, NumElts);
  1361. return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
  1362. }
  1363. static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
  1364. Type *Ty = CI.getType();
  1365. Value *Op0 = CI.getArgOperand(0);
  1366. Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
  1367. Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
  1368. if (CI.arg_size() == 3)
  1369. Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
  1370. return Res;
  1371. }
  1372. static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
  1373. Type *Ty = CI.getType();
  1374. // Arguments have a vXi32 type so cast to vXi64.
  1375. Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
  1376. Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
  1377. if (IsSigned) {
  1378. // Shift left then arithmetic shift right.
  1379. Constant *ShiftAmt = ConstantInt::get(Ty, 32);
  1380. LHS = Builder.CreateShl(LHS, ShiftAmt);
  1381. LHS = Builder.CreateAShr(LHS, ShiftAmt);
  1382. RHS = Builder.CreateShl(RHS, ShiftAmt);
  1383. RHS = Builder.CreateAShr(RHS, ShiftAmt);
  1384. } else {
  1385. // Clear the upper bits.
  1386. Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
  1387. LHS = Builder.CreateAnd(LHS, Mask);
  1388. RHS = Builder.CreateAnd(RHS, Mask);
  1389. }
  1390. Value *Res = Builder.CreateMul(LHS, RHS);
  1391. if (CI.arg_size() == 4)
  1392. Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
  1393. return Res;
  1394. }
  1395. // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
  1396. static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
  1397. Value *Mask) {
  1398. unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
  1399. if (Mask) {
  1400. const auto *C = dyn_cast<Constant>(Mask);
  1401. if (!C || !C->isAllOnesValue())
  1402. Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
  1403. }
  1404. if (NumElts < 8) {
  1405. int Indices[8];
  1406. for (unsigned i = 0; i != NumElts; ++i)
  1407. Indices[i] = i;
  1408. for (unsigned i = NumElts; i != 8; ++i)
  1409. Indices[i] = NumElts + i % NumElts;
  1410. Vec = Builder.CreateShuffleVector(Vec,
  1411. Constant::getNullValue(Vec->getType()),
  1412. Indices);
  1413. }
  1414. return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
  1415. }
  1416. static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
  1417. unsigned CC, bool Signed) {
  1418. Value *Op0 = CI.getArgOperand(0);
  1419. unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
  1420. Value *Cmp;
  1421. if (CC == 3) {
  1422. Cmp = Constant::getNullValue(
  1423. FixedVectorType::get(Builder.getInt1Ty(), NumElts));
  1424. } else if (CC == 7) {
  1425. Cmp = Constant::getAllOnesValue(
  1426. FixedVectorType::get(Builder.getInt1Ty(), NumElts));
  1427. } else {
  1428. ICmpInst::Predicate Pred;
  1429. switch (CC) {
  1430. default: llvm_unreachable("Unknown condition code");
  1431. case 0: Pred = ICmpInst::ICMP_EQ; break;
  1432. case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
  1433. case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
  1434. case 4: Pred = ICmpInst::ICMP_NE; break;
  1435. case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
  1436. case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
  1437. }
  1438. Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
  1439. }
  1440. Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
  1441. return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
  1442. }
  1443. // Replace a masked intrinsic with an older unmasked intrinsic.
  1444. static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
  1445. Intrinsic::ID IID) {
  1446. Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
  1447. Value *Rep = Builder.CreateCall(Intrin,
  1448. { CI.getArgOperand(0), CI.getArgOperand(1) });
  1449. return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
  1450. }
  1451. static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
  1452. Value* A = CI.getArgOperand(0);
  1453. Value* B = CI.getArgOperand(1);
  1454. Value* Src = CI.getArgOperand(2);
  1455. Value* Mask = CI.getArgOperand(3);
  1456. Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
  1457. Value* Cmp = Builder.CreateIsNotNull(AndNode);
  1458. Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
  1459. Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
  1460. Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
  1461. return Builder.CreateInsertElement(A, Select, (uint64_t)0);
  1462. }
  1463. static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
  1464. Value* Op = CI.getArgOperand(0);
  1465. Type* ReturnOp = CI.getType();
  1466. unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
  1467. Value *Mask = getX86MaskVec(Builder, Op, NumElts);
  1468. return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
  1469. }
  1470. // Replace intrinsic with unmasked version and a select.
  1471. static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
  1472. CallInst &CI, Value *&Rep) {
  1473. Name = Name.substr(12); // Remove avx512.mask.
  1474. unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
  1475. unsigned EltWidth = CI.getType()->getScalarSizeInBits();
  1476. Intrinsic::ID IID;
  1477. if (Name.startswith("max.p")) {
  1478. if (VecWidth == 128 && EltWidth == 32)
  1479. IID = Intrinsic::x86_sse_max_ps;
  1480. else if (VecWidth == 128 && EltWidth == 64)
  1481. IID = Intrinsic::x86_sse2_max_pd;
  1482. else if (VecWidth == 256 && EltWidth == 32)
  1483. IID = Intrinsic::x86_avx_max_ps_256;
  1484. else if (VecWidth == 256 && EltWidth == 64)
  1485. IID = Intrinsic::x86_avx_max_pd_256;
  1486. else
  1487. llvm_unreachable("Unexpected intrinsic");
  1488. } else if (Name.startswith("min.p")) {
  1489. if (VecWidth == 128 && EltWidth == 32)
  1490. IID = Intrinsic::x86_sse_min_ps;
  1491. else if (VecWidth == 128 && EltWidth == 64)
  1492. IID = Intrinsic::x86_sse2_min_pd;
  1493. else if (VecWidth == 256 && EltWidth == 32)
  1494. IID = Intrinsic::x86_avx_min_ps_256;
  1495. else if (VecWidth == 256 && EltWidth == 64)
  1496. IID = Intrinsic::x86_avx_min_pd_256;
  1497. else
  1498. llvm_unreachable("Unexpected intrinsic");
  1499. } else if (Name.startswith("pshuf.b.")) {
  1500. if (VecWidth == 128)
  1501. IID = Intrinsic::x86_ssse3_pshuf_b_128;
  1502. else if (VecWidth == 256)
  1503. IID = Intrinsic::x86_avx2_pshuf_b;
  1504. else if (VecWidth == 512)
  1505. IID = Intrinsic::x86_avx512_pshuf_b_512;
  1506. else
  1507. llvm_unreachable("Unexpected intrinsic");
  1508. } else if (Name.startswith("pmul.hr.sw.")) {
  1509. if (VecWidth == 128)
  1510. IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
  1511. else if (VecWidth == 256)
  1512. IID = Intrinsic::x86_avx2_pmul_hr_sw;
  1513. else if (VecWidth == 512)
  1514. IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
  1515. else
  1516. llvm_unreachable("Unexpected intrinsic");
  1517. } else if (Name.startswith("pmulh.w.")) {
  1518. if (VecWidth == 128)
  1519. IID = Intrinsic::x86_sse2_pmulh_w;
  1520. else if (VecWidth == 256)
  1521. IID = Intrinsic::x86_avx2_pmulh_w;
  1522. else if (VecWidth == 512)
  1523. IID = Intrinsic::x86_avx512_pmulh_w_512;
  1524. else
  1525. llvm_unreachable("Unexpected intrinsic");
  1526. } else if (Name.startswith("pmulhu.w.")) {
  1527. if (VecWidth == 128)
  1528. IID = Intrinsic::x86_sse2_pmulhu_w;
  1529. else if (VecWidth == 256)
  1530. IID = Intrinsic::x86_avx2_pmulhu_w;
  1531. else if (VecWidth == 512)
  1532. IID = Intrinsic::x86_avx512_pmulhu_w_512;
  1533. else
  1534. llvm_unreachable("Unexpected intrinsic");
  1535. } else if (Name.startswith("pmaddw.d.")) {
  1536. if (VecWidth == 128)
  1537. IID = Intrinsic::x86_sse2_pmadd_wd;
  1538. else if (VecWidth == 256)
  1539. IID = Intrinsic::x86_avx2_pmadd_wd;
  1540. else if (VecWidth == 512)
  1541. IID = Intrinsic::x86_avx512_pmaddw_d_512;
  1542. else
  1543. llvm_unreachable("Unexpected intrinsic");
  1544. } else if (Name.startswith("pmaddubs.w.")) {
  1545. if (VecWidth == 128)
  1546. IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
  1547. else if (VecWidth == 256)
  1548. IID = Intrinsic::x86_avx2_pmadd_ub_sw;
  1549. else if (VecWidth == 512)
  1550. IID = Intrinsic::x86_avx512_pmaddubs_w_512;
  1551. else
  1552. llvm_unreachable("Unexpected intrinsic");
  1553. } else if (Name.startswith("packsswb.")) {
  1554. if (VecWidth == 128)
  1555. IID = Intrinsic::x86_sse2_packsswb_128;
  1556. else if (VecWidth == 256)
  1557. IID = Intrinsic::x86_avx2_packsswb;
  1558. else if (VecWidth == 512)
  1559. IID = Intrinsic::x86_avx512_packsswb_512;
  1560. else
  1561. llvm_unreachable("Unexpected intrinsic");
  1562. } else if (Name.startswith("packssdw.")) {
  1563. if (VecWidth == 128)
  1564. IID = Intrinsic::x86_sse2_packssdw_128;
  1565. else if (VecWidth == 256)
  1566. IID = Intrinsic::x86_avx2_packssdw;
  1567. else if (VecWidth == 512)
  1568. IID = Intrinsic::x86_avx512_packssdw_512;
  1569. else
  1570. llvm_unreachable("Unexpected intrinsic");
  1571. } else if (Name.startswith("packuswb.")) {
  1572. if (VecWidth == 128)
  1573. IID = Intrinsic::x86_sse2_packuswb_128;
  1574. else if (VecWidth == 256)
  1575. IID = Intrinsic::x86_avx2_packuswb;
  1576. else if (VecWidth == 512)
  1577. IID = Intrinsic::x86_avx512_packuswb_512;
  1578. else
  1579. llvm_unreachable("Unexpected intrinsic");
  1580. } else if (Name.startswith("packusdw.")) {
  1581. if (VecWidth == 128)
  1582. IID = Intrinsic::x86_sse41_packusdw;
  1583. else if (VecWidth == 256)
  1584. IID = Intrinsic::x86_avx2_packusdw;
  1585. else if (VecWidth == 512)
  1586. IID = Intrinsic::x86_avx512_packusdw_512;
  1587. else
  1588. llvm_unreachable("Unexpected intrinsic");
  1589. } else if (Name.startswith("vpermilvar.")) {
  1590. if (VecWidth == 128 && EltWidth == 32)
  1591. IID = Intrinsic::x86_avx_vpermilvar_ps;
  1592. else if (VecWidth == 128 && EltWidth == 64)
  1593. IID = Intrinsic::x86_avx_vpermilvar_pd;
  1594. else if (VecWidth == 256 && EltWidth == 32)
  1595. IID = Intrinsic::x86_avx_vpermilvar_ps_256;
  1596. else if (VecWidth == 256 && EltWidth == 64)
  1597. IID = Intrinsic::x86_avx_vpermilvar_pd_256;
  1598. else if (VecWidth == 512 && EltWidth == 32)
  1599. IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
  1600. else if (VecWidth == 512 && EltWidth == 64)
  1601. IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
  1602. else
  1603. llvm_unreachable("Unexpected intrinsic");
  1604. } else if (Name == "cvtpd2dq.256") {
  1605. IID = Intrinsic::x86_avx_cvt_pd2dq_256;
  1606. } else if (Name == "cvtpd2ps.256") {
  1607. IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
  1608. } else if (Name == "cvttpd2dq.256") {
  1609. IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
  1610. } else if (Name == "cvttps2dq.128") {
  1611. IID = Intrinsic::x86_sse2_cvttps2dq;
  1612. } else if (Name == "cvttps2dq.256") {
  1613. IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
  1614. } else if (Name.startswith("permvar.")) {
  1615. bool IsFloat = CI.getType()->isFPOrFPVectorTy();
  1616. if (VecWidth == 256 && EltWidth == 32 && IsFloat)
  1617. IID = Intrinsic::x86_avx2_permps;
  1618. else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
  1619. IID = Intrinsic::x86_avx2_permd;
  1620. else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
  1621. IID = Intrinsic::x86_avx512_permvar_df_256;
  1622. else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
  1623. IID = Intrinsic::x86_avx512_permvar_di_256;
  1624. else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
  1625. IID = Intrinsic::x86_avx512_permvar_sf_512;
  1626. else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
  1627. IID = Intrinsic::x86_avx512_permvar_si_512;
  1628. else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
  1629. IID = Intrinsic::x86_avx512_permvar_df_512;
  1630. else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
  1631. IID = Intrinsic::x86_avx512_permvar_di_512;
  1632. else if (VecWidth == 128 && EltWidth == 16)
  1633. IID = Intrinsic::x86_avx512_permvar_hi_128;
  1634. else if (VecWidth == 256 && EltWidth == 16)
  1635. IID = Intrinsic::x86_avx512_permvar_hi_256;
  1636. else if (VecWidth == 512 && EltWidth == 16)
  1637. IID = Intrinsic::x86_avx512_permvar_hi_512;
  1638. else if (VecWidth == 128 && EltWidth == 8)
  1639. IID = Intrinsic::x86_avx512_permvar_qi_128;
  1640. else if (VecWidth == 256 && EltWidth == 8)
  1641. IID = Intrinsic::x86_avx512_permvar_qi_256;
  1642. else if (VecWidth == 512 && EltWidth == 8)
  1643. IID = Intrinsic::x86_avx512_permvar_qi_512;
  1644. else
  1645. llvm_unreachable("Unexpected intrinsic");
  1646. } else if (Name.startswith("dbpsadbw.")) {
  1647. if (VecWidth == 128)
  1648. IID = Intrinsic::x86_avx512_dbpsadbw_128;
  1649. else if (VecWidth == 256)
  1650. IID = Intrinsic::x86_avx512_dbpsadbw_256;
  1651. else if (VecWidth == 512)
  1652. IID = Intrinsic::x86_avx512_dbpsadbw_512;
  1653. else
  1654. llvm_unreachable("Unexpected intrinsic");
  1655. } else if (Name.startswith("pmultishift.qb.")) {
  1656. if (VecWidth == 128)
  1657. IID = Intrinsic::x86_avx512_pmultishift_qb_128;
  1658. else if (VecWidth == 256)
  1659. IID = Intrinsic::x86_avx512_pmultishift_qb_256;
  1660. else if (VecWidth == 512)
  1661. IID = Intrinsic::x86_avx512_pmultishift_qb_512;
  1662. else
  1663. llvm_unreachable("Unexpected intrinsic");
  1664. } else if (Name.startswith("conflict.")) {
  1665. if (Name[9] == 'd' && VecWidth == 128)
  1666. IID = Intrinsic::x86_avx512_conflict_d_128;
  1667. else if (Name[9] == 'd' && VecWidth == 256)
  1668. IID = Intrinsic::x86_avx512_conflict_d_256;
  1669. else if (Name[9] == 'd' && VecWidth == 512)
  1670. IID = Intrinsic::x86_avx512_conflict_d_512;
  1671. else if (Name[9] == 'q' && VecWidth == 128)
  1672. IID = Intrinsic::x86_avx512_conflict_q_128;
  1673. else if (Name[9] == 'q' && VecWidth == 256)
  1674. IID = Intrinsic::x86_avx512_conflict_q_256;
  1675. else if (Name[9] == 'q' && VecWidth == 512)
  1676. IID = Intrinsic::x86_avx512_conflict_q_512;
  1677. else
  1678. llvm_unreachable("Unexpected intrinsic");
  1679. } else if (Name.startswith("pavg.")) {
  1680. if (Name[5] == 'b' && VecWidth == 128)
  1681. IID = Intrinsic::x86_sse2_pavg_b;
  1682. else if (Name[5] == 'b' && VecWidth == 256)
  1683. IID = Intrinsic::x86_avx2_pavg_b;
  1684. else if (Name[5] == 'b' && VecWidth == 512)
  1685. IID = Intrinsic::x86_avx512_pavg_b_512;
  1686. else if (Name[5] == 'w' && VecWidth == 128)
  1687. IID = Intrinsic::x86_sse2_pavg_w;
  1688. else if (Name[5] == 'w' && VecWidth == 256)
  1689. IID = Intrinsic::x86_avx2_pavg_w;
  1690. else if (Name[5] == 'w' && VecWidth == 512)
  1691. IID = Intrinsic::x86_avx512_pavg_w_512;
  1692. else
  1693. llvm_unreachable("Unexpected intrinsic");
  1694. } else
  1695. return false;
  1696. SmallVector<Value *, 4> Args(CI.args());
  1697. Args.pop_back();
  1698. Args.pop_back();
  1699. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
  1700. Args);
  1701. unsigned NumArgs = CI.arg_size();
  1702. Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
  1703. CI.getArgOperand(NumArgs - 2));
  1704. return true;
  1705. }
  1706. /// Upgrade comment in call to inline asm that represents an objc retain release
  1707. /// marker.
  1708. void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
  1709. size_t Pos;
  1710. if (AsmStr->find("mov\tfp") == 0 &&
  1711. AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
  1712. (Pos = AsmStr->find("# marker")) != std::string::npos) {
  1713. AsmStr->replace(Pos, 1, ";");
  1714. }
  1715. }
  1716. static Value *UpgradeARMIntrinsicCall(StringRef Name, CallInst *CI, Function *F,
  1717. IRBuilder<> &Builder) {
  1718. if (Name == "mve.vctp64.old") {
  1719. // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
  1720. // correct type.
  1721. Value *VCTP = Builder.CreateCall(
  1722. Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
  1723. CI->getArgOperand(0), CI->getName());
  1724. Value *C1 = Builder.CreateCall(
  1725. Intrinsic::getDeclaration(
  1726. F->getParent(), Intrinsic::arm_mve_pred_v2i,
  1727. {VectorType::get(Builder.getInt1Ty(), 2, false)}),
  1728. VCTP);
  1729. return Builder.CreateCall(
  1730. Intrinsic::getDeclaration(
  1731. F->getParent(), Intrinsic::arm_mve_pred_i2v,
  1732. {VectorType::get(Builder.getInt1Ty(), 4, false)}),
  1733. C1);
  1734. } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
  1735. Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
  1736. Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
  1737. Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
  1738. Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
  1739. Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
  1740. Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
  1741. Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
  1742. Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
  1743. Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
  1744. Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
  1745. Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
  1746. Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
  1747. Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
  1748. std::vector<Type *> Tys;
  1749. unsigned ID = CI->getIntrinsicID();
  1750. Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
  1751. switch (ID) {
  1752. case Intrinsic::arm_mve_mull_int_predicated:
  1753. case Intrinsic::arm_mve_vqdmull_predicated:
  1754. case Intrinsic::arm_mve_vldr_gather_base_predicated:
  1755. Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
  1756. break;
  1757. case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
  1758. case Intrinsic::arm_mve_vstr_scatter_base_predicated:
  1759. case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
  1760. Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
  1761. V2I1Ty};
  1762. break;
  1763. case Intrinsic::arm_mve_vldr_gather_offset_predicated:
  1764. Tys = {CI->getType(), CI->getOperand(0)->getType(),
  1765. CI->getOperand(1)->getType(), V2I1Ty};
  1766. break;
  1767. case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
  1768. Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
  1769. CI->getOperand(2)->getType(), V2I1Ty};
  1770. break;
  1771. case Intrinsic::arm_cde_vcx1q_predicated:
  1772. case Intrinsic::arm_cde_vcx1qa_predicated:
  1773. case Intrinsic::arm_cde_vcx2q_predicated:
  1774. case Intrinsic::arm_cde_vcx2qa_predicated:
  1775. case Intrinsic::arm_cde_vcx3q_predicated:
  1776. case Intrinsic::arm_cde_vcx3qa_predicated:
  1777. Tys = {CI->getOperand(1)->getType(), V2I1Ty};
  1778. break;
  1779. default:
  1780. llvm_unreachable("Unhandled Intrinsic!");
  1781. }
  1782. std::vector<Value *> Ops;
  1783. for (Value *Op : CI->args()) {
  1784. Type *Ty = Op->getType();
  1785. if (Ty->getScalarSizeInBits() == 1) {
  1786. Value *C1 = Builder.CreateCall(
  1787. Intrinsic::getDeclaration(
  1788. F->getParent(), Intrinsic::arm_mve_pred_v2i,
  1789. {VectorType::get(Builder.getInt1Ty(), 4, false)}),
  1790. Op);
  1791. Op = Builder.CreateCall(
  1792. Intrinsic::getDeclaration(F->getParent(),
  1793. Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
  1794. C1);
  1795. }
  1796. Ops.push_back(Op);
  1797. }
  1798. Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
  1799. return Builder.CreateCall(Fn, Ops, CI->getName());
  1800. }
  1801. llvm_unreachable("Unknown function for ARM CallInst upgrade.");
  1802. }
  1803. /// Upgrade a call to an old intrinsic. All argument and return casting must be
  1804. /// provided to seamlessly integrate with existing context.
  1805. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
  1806. Function *F = CI->getCalledFunction();
  1807. LLVMContext &C = CI->getContext();
  1808. IRBuilder<> Builder(C);
  1809. Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
  1810. assert(F && "Intrinsic call is not direct?");
  1811. if (!NewFn) {
  1812. // Get the Function's name.
  1813. StringRef Name = F->getName();
  1814. assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
  1815. Name = Name.substr(5);
  1816. bool IsX86 = Name.startswith("x86.");
  1817. if (IsX86)
  1818. Name = Name.substr(4);
  1819. bool IsNVVM = Name.startswith("nvvm.");
  1820. if (IsNVVM)
  1821. Name = Name.substr(5);
  1822. bool IsARM = Name.startswith("arm.");
  1823. if (IsARM)
  1824. Name = Name.substr(4);
  1825. if (IsX86 && Name.startswith("sse4a.movnt.")) {
  1826. Module *M = F->getParent();
  1827. SmallVector<Metadata *, 1> Elts;
  1828. Elts.push_back(
  1829. ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
  1830. MDNode *Node = MDNode::get(C, Elts);
  1831. Value *Arg0 = CI->getArgOperand(0);
  1832. Value *Arg1 = CI->getArgOperand(1);
  1833. // Nontemporal (unaligned) store of the 0'th element of the float/double
  1834. // vector.
  1835. Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
  1836. PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
  1837. Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
  1838. Value *Extract =
  1839. Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
  1840. StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
  1841. SI->setMetadata(M->getMDKindID("nontemporal"), Node);
  1842. // Remove intrinsic.
  1843. CI->eraseFromParent();
  1844. return;
  1845. }
  1846. if (IsX86 && (Name.startswith("avx.movnt.") ||
  1847. Name.startswith("avx512.storent."))) {
  1848. Module *M = F->getParent();
  1849. SmallVector<Metadata *, 1> Elts;
  1850. Elts.push_back(
  1851. ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
  1852. MDNode *Node = MDNode::get(C, Elts);
  1853. Value *Arg0 = CI->getArgOperand(0);
  1854. Value *Arg1 = CI->getArgOperand(1);
  1855. // Convert the type of the pointer to a pointer to the stored type.
  1856. Value *BC = Builder.CreateBitCast(Arg0,
  1857. PointerType::getUnqual(Arg1->getType()),
  1858. "cast");
  1859. StoreInst *SI = Builder.CreateAlignedStore(
  1860. Arg1, BC,
  1861. Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
  1862. SI->setMetadata(M->getMDKindID("nontemporal"), Node);
  1863. // Remove intrinsic.
  1864. CI->eraseFromParent();
  1865. return;
  1866. }
  1867. if (IsX86 && Name == "sse2.storel.dq") {
  1868. Value *Arg0 = CI->getArgOperand(0);
  1869. Value *Arg1 = CI->getArgOperand(1);
  1870. auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
  1871. Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
  1872. Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
  1873. Value *BC = Builder.CreateBitCast(Arg0,
  1874. PointerType::getUnqual(Elt->getType()),
  1875. "cast");
  1876. Builder.CreateAlignedStore(Elt, BC, Align(1));
  1877. // Remove intrinsic.
  1878. CI->eraseFromParent();
  1879. return;
  1880. }
  1881. if (IsX86 && (Name.startswith("sse.storeu.") ||
  1882. Name.startswith("sse2.storeu.") ||
  1883. Name.startswith("avx.storeu."))) {
  1884. Value *Arg0 = CI->getArgOperand(0);
  1885. Value *Arg1 = CI->getArgOperand(1);
  1886. Arg0 = Builder.CreateBitCast(Arg0,
  1887. PointerType::getUnqual(Arg1->getType()),
  1888. "cast");
  1889. Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
  1890. // Remove intrinsic.
  1891. CI->eraseFromParent();
  1892. return;
  1893. }
  1894. if (IsX86 && Name == "avx512.mask.store.ss") {
  1895. Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
  1896. UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
  1897. Mask, false);
  1898. // Remove intrinsic.
  1899. CI->eraseFromParent();
  1900. return;
  1901. }
  1902. if (IsX86 && (Name.startswith("avx512.mask.store"))) {
  1903. // "avx512.mask.storeu." or "avx512.mask.store."
  1904. bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
  1905. UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
  1906. CI->getArgOperand(2), Aligned);
  1907. // Remove intrinsic.
  1908. CI->eraseFromParent();
  1909. return;
  1910. }
  1911. Value *Rep;
  1912. // Upgrade packed integer vector compare intrinsics to compare instructions.
  1913. if (IsX86 && (Name.startswith("sse2.pcmp") ||
  1914. Name.startswith("avx2.pcmp"))) {
  1915. // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
  1916. bool CmpEq = Name[9] == 'e';
  1917. Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
  1918. CI->getArgOperand(0), CI->getArgOperand(1));
  1919. Rep = Builder.CreateSExt(Rep, CI->getType(), "");
  1920. } else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
  1921. Type *ExtTy = Type::getInt32Ty(C);
  1922. if (CI->getOperand(0)->getType()->isIntegerTy(8))
  1923. ExtTy = Type::getInt64Ty(C);
  1924. unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
  1925. ExtTy->getPrimitiveSizeInBits();
  1926. Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
  1927. Rep = Builder.CreateVectorSplat(NumElts, Rep);
  1928. } else if (IsX86 && (Name == "sse.sqrt.ss" ||
  1929. Name == "sse2.sqrt.sd")) {
  1930. Value *Vec = CI->getArgOperand(0);
  1931. Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
  1932. Function *Intr = Intrinsic::getDeclaration(F->getParent(),
  1933. Intrinsic::sqrt, Elt0->getType());
  1934. Elt0 = Builder.CreateCall(Intr, Elt0);
  1935. Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
  1936. } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
  1937. Name.startswith("sse2.sqrt.p") ||
  1938. Name.startswith("sse.sqrt.p"))) {
  1939. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
  1940. Intrinsic::sqrt,
  1941. CI->getType()),
  1942. {CI->getArgOperand(0)});
  1943. } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
  1944. if (CI->arg_size() == 4 &&
  1945. (!isa<ConstantInt>(CI->getArgOperand(3)) ||
  1946. cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
  1947. Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
  1948. : Intrinsic::x86_avx512_sqrt_pd_512;
  1949. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
  1950. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
  1951. IID), Args);
  1952. } else {
  1953. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
  1954. Intrinsic::sqrt,
  1955. CI->getType()),
  1956. {CI->getArgOperand(0)});
  1957. }
  1958. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  1959. CI->getArgOperand(1));
  1960. } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
  1961. Name.startswith("avx512.ptestnm"))) {
  1962. Value *Op0 = CI->getArgOperand(0);
  1963. Value *Op1 = CI->getArgOperand(1);
  1964. Value *Mask = CI->getArgOperand(2);
  1965. Rep = Builder.CreateAnd(Op0, Op1);
  1966. llvm::Type *Ty = Op0->getType();
  1967. Value *Zero = llvm::Constant::getNullValue(Ty);
  1968. ICmpInst::Predicate Pred =
  1969. Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
  1970. Rep = Builder.CreateICmp(Pred, Rep, Zero);
  1971. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
  1972. } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
  1973. unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
  1974. ->getNumElements();
  1975. Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
  1976. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  1977. CI->getArgOperand(1));
  1978. } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
  1979. unsigned NumElts = CI->getType()->getScalarSizeInBits();
  1980. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
  1981. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
  1982. int Indices[64];
  1983. for (unsigned i = 0; i != NumElts; ++i)
  1984. Indices[i] = i;
  1985. // First extract half of each vector. This gives better codegen than
  1986. // doing it in a single shuffle.
  1987. LHS = Builder.CreateShuffleVector(LHS, LHS,
  1988. makeArrayRef(Indices, NumElts / 2));
  1989. RHS = Builder.CreateShuffleVector(RHS, RHS,
  1990. makeArrayRef(Indices, NumElts / 2));
  1991. // Concat the vectors.
  1992. // NOTE: Operands have to be swapped to match intrinsic definition.
  1993. Rep = Builder.CreateShuffleVector(RHS, LHS,
  1994. makeArrayRef(Indices, NumElts));
  1995. Rep = Builder.CreateBitCast(Rep, CI->getType());
  1996. } else if (IsX86 && Name == "avx512.kand.w") {
  1997. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  1998. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  1999. Rep = Builder.CreateAnd(LHS, RHS);
  2000. Rep = Builder.CreateBitCast(Rep, CI->getType());
  2001. } else if (IsX86 && Name == "avx512.kandn.w") {
  2002. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  2003. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  2004. LHS = Builder.CreateNot(LHS);
  2005. Rep = Builder.CreateAnd(LHS, RHS);
  2006. Rep = Builder.CreateBitCast(Rep, CI->getType());
  2007. } else if (IsX86 && Name == "avx512.kor.w") {
  2008. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  2009. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  2010. Rep = Builder.CreateOr(LHS, RHS);
  2011. Rep = Builder.CreateBitCast(Rep, CI->getType());
  2012. } else if (IsX86 && Name == "avx512.kxor.w") {
  2013. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  2014. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  2015. Rep = Builder.CreateXor(LHS, RHS);
  2016. Rep = Builder.CreateBitCast(Rep, CI->getType());
  2017. } else if (IsX86 && Name == "avx512.kxnor.w") {
  2018. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  2019. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  2020. LHS = Builder.CreateNot(LHS);
  2021. Rep = Builder.CreateXor(LHS, RHS);
  2022. Rep = Builder.CreateBitCast(Rep, CI->getType());
  2023. } else if (IsX86 && Name == "avx512.knot.w") {
  2024. Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  2025. Rep = Builder.CreateNot(Rep);
  2026. Rep = Builder.CreateBitCast(Rep, CI->getType());
  2027. } else if (IsX86 &&
  2028. (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w")) {
  2029. Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
  2030. Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
  2031. Rep = Builder.CreateOr(LHS, RHS);
  2032. Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
  2033. Value *C;
  2034. if (Name[14] == 'c')
  2035. C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
  2036. else
  2037. C = ConstantInt::getNullValue(Builder.getInt16Ty());
  2038. Rep = Builder.CreateICmpEQ(Rep, C);
  2039. Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
  2040. } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
  2041. Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
  2042. Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
  2043. Name == "sse.div.ss" || Name == "sse2.div.sd")) {
  2044. Type *I32Ty = Type::getInt32Ty(C);
  2045. Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
  2046. ConstantInt::get(I32Ty, 0));
  2047. Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
  2048. ConstantInt::get(I32Ty, 0));
  2049. Value *EltOp;
  2050. if (Name.contains(".add."))
  2051. EltOp = Builder.CreateFAdd(Elt0, Elt1);
  2052. else if (Name.contains(".sub."))
  2053. EltOp = Builder.CreateFSub(Elt0, Elt1);
  2054. else if (Name.contains(".mul."))
  2055. EltOp = Builder.CreateFMul(Elt0, Elt1);
  2056. else
  2057. EltOp = Builder.CreateFDiv(Elt0, Elt1);
  2058. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
  2059. ConstantInt::get(I32Ty, 0));
  2060. } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
  2061. // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
  2062. bool CmpEq = Name[16] == 'e';
  2063. Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
  2064. } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
  2065. Type *OpTy = CI->getArgOperand(0)->getType();
  2066. unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
  2067. Intrinsic::ID IID;
  2068. switch (VecWidth) {
  2069. default: llvm_unreachable("Unexpected intrinsic");
  2070. case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
  2071. case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
  2072. case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
  2073. }
  2074. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2075. { CI->getOperand(0), CI->getArgOperand(1) });
  2076. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
  2077. } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
  2078. Type *OpTy = CI->getArgOperand(0)->getType();
  2079. unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
  2080. unsigned EltWidth = OpTy->getScalarSizeInBits();
  2081. Intrinsic::ID IID;
  2082. if (VecWidth == 128 && EltWidth == 32)
  2083. IID = Intrinsic::x86_avx512_fpclass_ps_128;
  2084. else if (VecWidth == 256 && EltWidth == 32)
  2085. IID = Intrinsic::x86_avx512_fpclass_ps_256;
  2086. else if (VecWidth == 512 && EltWidth == 32)
  2087. IID = Intrinsic::x86_avx512_fpclass_ps_512;
  2088. else if (VecWidth == 128 && EltWidth == 64)
  2089. IID = Intrinsic::x86_avx512_fpclass_pd_128;
  2090. else if (VecWidth == 256 && EltWidth == 64)
  2091. IID = Intrinsic::x86_avx512_fpclass_pd_256;
  2092. else if (VecWidth == 512 && EltWidth == 64)
  2093. IID = Intrinsic::x86_avx512_fpclass_pd_512;
  2094. else
  2095. llvm_unreachable("Unexpected intrinsic");
  2096. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2097. { CI->getOperand(0), CI->getArgOperand(1) });
  2098. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
  2099. } else if (IsX86 && Name.startswith("avx512.cmp.p")) {
  2100. SmallVector<Value *, 4> Args(CI->args());
  2101. Type *OpTy = Args[0]->getType();
  2102. unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
  2103. unsigned EltWidth = OpTy->getScalarSizeInBits();
  2104. Intrinsic::ID IID;
  2105. if (VecWidth == 128 && EltWidth == 32)
  2106. IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
  2107. else if (VecWidth == 256 && EltWidth == 32)
  2108. IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
  2109. else if (VecWidth == 512 && EltWidth == 32)
  2110. IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
  2111. else if (VecWidth == 128 && EltWidth == 64)
  2112. IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
  2113. else if (VecWidth == 256 && EltWidth == 64)
  2114. IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
  2115. else if (VecWidth == 512 && EltWidth == 64)
  2116. IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
  2117. else
  2118. llvm_unreachable("Unexpected intrinsic");
  2119. Value *Mask = Constant::getAllOnesValue(CI->getType());
  2120. if (VecWidth == 512)
  2121. std::swap(Mask, Args.back());
  2122. Args.push_back(Mask);
  2123. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2124. Args);
  2125. } else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
  2126. // Integer compare intrinsics.
  2127. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2128. Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
  2129. } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
  2130. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2131. Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
  2132. } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
  2133. Name.startswith("avx512.cvtw2mask.") ||
  2134. Name.startswith("avx512.cvtd2mask.") ||
  2135. Name.startswith("avx512.cvtq2mask."))) {
  2136. Value *Op = CI->getArgOperand(0);
  2137. Value *Zero = llvm::Constant::getNullValue(Op->getType());
  2138. Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
  2139. Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
  2140. } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
  2141. Name == "ssse3.pabs.w.128" ||
  2142. Name == "ssse3.pabs.d.128" ||
  2143. Name.startswith("avx2.pabs") ||
  2144. Name.startswith("avx512.mask.pabs"))) {
  2145. Rep = upgradeAbs(Builder, *CI);
  2146. } else if (IsX86 && (Name == "sse41.pmaxsb" ||
  2147. Name == "sse2.pmaxs.w" ||
  2148. Name == "sse41.pmaxsd" ||
  2149. Name.startswith("avx2.pmaxs") ||
  2150. Name.startswith("avx512.mask.pmaxs"))) {
  2151. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
  2152. } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
  2153. Name == "sse41.pmaxuw" ||
  2154. Name == "sse41.pmaxud" ||
  2155. Name.startswith("avx2.pmaxu") ||
  2156. Name.startswith("avx512.mask.pmaxu"))) {
  2157. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
  2158. } else if (IsX86 && (Name == "sse41.pminsb" ||
  2159. Name == "sse2.pmins.w" ||
  2160. Name == "sse41.pminsd" ||
  2161. Name.startswith("avx2.pmins") ||
  2162. Name.startswith("avx512.mask.pmins"))) {
  2163. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
  2164. } else if (IsX86 && (Name == "sse2.pminu.b" ||
  2165. Name == "sse41.pminuw" ||
  2166. Name == "sse41.pminud" ||
  2167. Name.startswith("avx2.pminu") ||
  2168. Name.startswith("avx512.mask.pminu"))) {
  2169. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
  2170. } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
  2171. Name == "avx2.pmulu.dq" ||
  2172. Name == "avx512.pmulu.dq.512" ||
  2173. Name.startswith("avx512.mask.pmulu.dq."))) {
  2174. Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
  2175. } else if (IsX86 && (Name == "sse41.pmuldq" ||
  2176. Name == "avx2.pmul.dq" ||
  2177. Name == "avx512.pmul.dq.512" ||
  2178. Name.startswith("avx512.mask.pmul.dq."))) {
  2179. Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
  2180. } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
  2181. Name == "sse2.cvtsi2sd" ||
  2182. Name == "sse.cvtsi642ss" ||
  2183. Name == "sse2.cvtsi642sd")) {
  2184. Rep = Builder.CreateSIToFP(
  2185. CI->getArgOperand(1),
  2186. cast<VectorType>(CI->getType())->getElementType());
  2187. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
  2188. } else if (IsX86 && Name == "avx512.cvtusi2sd") {
  2189. Rep = Builder.CreateUIToFP(
  2190. CI->getArgOperand(1),
  2191. cast<VectorType>(CI->getType())->getElementType());
  2192. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
  2193. } else if (IsX86 && Name == "sse2.cvtss2sd") {
  2194. Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
  2195. Rep = Builder.CreateFPExt(
  2196. Rep, cast<VectorType>(CI->getType())->getElementType());
  2197. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
  2198. } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
  2199. Name == "sse2.cvtdq2ps" ||
  2200. Name == "avx.cvtdq2.pd.256" ||
  2201. Name == "avx.cvtdq2.ps.256" ||
  2202. Name.startswith("avx512.mask.cvtdq2pd.") ||
  2203. Name.startswith("avx512.mask.cvtudq2pd.") ||
  2204. Name.startswith("avx512.mask.cvtdq2ps.") ||
  2205. Name.startswith("avx512.mask.cvtudq2ps.") ||
  2206. Name.startswith("avx512.mask.cvtqq2pd.") ||
  2207. Name.startswith("avx512.mask.cvtuqq2pd.") ||
  2208. Name == "avx512.mask.cvtqq2ps.256" ||
  2209. Name == "avx512.mask.cvtqq2ps.512" ||
  2210. Name == "avx512.mask.cvtuqq2ps.256" ||
  2211. Name == "avx512.mask.cvtuqq2ps.512" ||
  2212. Name == "sse2.cvtps2pd" ||
  2213. Name == "avx.cvt.ps2.pd.256" ||
  2214. Name == "avx512.mask.cvtps2pd.128" ||
  2215. Name == "avx512.mask.cvtps2pd.256")) {
  2216. auto *DstTy = cast<FixedVectorType>(CI->getType());
  2217. Rep = CI->getArgOperand(0);
  2218. auto *SrcTy = cast<FixedVectorType>(Rep->getType());
  2219. unsigned NumDstElts = DstTy->getNumElements();
  2220. if (NumDstElts < SrcTy->getNumElements()) {
  2221. assert(NumDstElts == 2 && "Unexpected vector size");
  2222. Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
  2223. }
  2224. bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
  2225. bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
  2226. if (IsPS2PD)
  2227. Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
  2228. else if (CI->arg_size() == 4 &&
  2229. (!isa<ConstantInt>(CI->getArgOperand(3)) ||
  2230. cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
  2231. Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
  2232. : Intrinsic::x86_avx512_sitofp_round;
  2233. Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
  2234. { DstTy, SrcTy });
  2235. Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
  2236. } else {
  2237. Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
  2238. : Builder.CreateSIToFP(Rep, DstTy, "cvt");
  2239. }
  2240. if (CI->arg_size() >= 3)
  2241. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2242. CI->getArgOperand(1));
  2243. } else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
  2244. Name.startswith("vcvtph2ps."))) {
  2245. auto *DstTy = cast<FixedVectorType>(CI->getType());
  2246. Rep = CI->getArgOperand(0);
  2247. auto *SrcTy = cast<FixedVectorType>(Rep->getType());
  2248. unsigned NumDstElts = DstTy->getNumElements();
  2249. if (NumDstElts != SrcTy->getNumElements()) {
  2250. assert(NumDstElts == 4 && "Unexpected vector size");
  2251. Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
  2252. }
  2253. Rep = Builder.CreateBitCast(
  2254. Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
  2255. Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
  2256. if (CI->arg_size() >= 3)
  2257. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2258. CI->getArgOperand(1));
  2259. } else if (IsX86 && Name.startswith("avx512.mask.load")) {
  2260. // "avx512.mask.loadu." or "avx512.mask.load."
  2261. bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
  2262. Rep =
  2263. UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
  2264. CI->getArgOperand(2), Aligned);
  2265. } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
  2266. auto *ResultTy = cast<FixedVectorType>(CI->getType());
  2267. Type *PtrTy = ResultTy->getElementType();
  2268. // Cast the pointer to element type.
  2269. Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
  2270. llvm::PointerType::getUnqual(PtrTy));
  2271. Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
  2272. ResultTy->getNumElements());
  2273. Function *ELd = Intrinsic::getDeclaration(F->getParent(),
  2274. Intrinsic::masked_expandload,
  2275. ResultTy);
  2276. Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
  2277. } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
  2278. auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
  2279. Type *PtrTy = ResultTy->getElementType();
  2280. // Cast the pointer to element type.
  2281. Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
  2282. llvm::PointerType::getUnqual(PtrTy));
  2283. Value *MaskVec =
  2284. getX86MaskVec(Builder, CI->getArgOperand(2),
  2285. cast<FixedVectorType>(ResultTy)->getNumElements());
  2286. Function *CSt = Intrinsic::getDeclaration(F->getParent(),
  2287. Intrinsic::masked_compressstore,
  2288. ResultTy);
  2289. Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
  2290. } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
  2291. Name.startswith("avx512.mask.expand."))) {
  2292. auto *ResultTy = cast<FixedVectorType>(CI->getType());
  2293. Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
  2294. ResultTy->getNumElements());
  2295. bool IsCompress = Name[12] == 'c';
  2296. Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
  2297. : Intrinsic::x86_avx512_mask_expand;
  2298. Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
  2299. Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
  2300. MaskVec });
  2301. } else if (IsX86 && Name.startswith("xop.vpcom")) {
  2302. bool IsSigned;
  2303. if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
  2304. Name.endswith("uq"))
  2305. IsSigned = false;
  2306. else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
  2307. Name.endswith("q"))
  2308. IsSigned = true;
  2309. else
  2310. llvm_unreachable("Unknown suffix");
  2311. unsigned Imm;
  2312. if (CI->arg_size() == 3) {
  2313. Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2314. } else {
  2315. Name = Name.substr(9); // strip off "xop.vpcom"
  2316. if (Name.startswith("lt"))
  2317. Imm = 0;
  2318. else if (Name.startswith("le"))
  2319. Imm = 1;
  2320. else if (Name.startswith("gt"))
  2321. Imm = 2;
  2322. else if (Name.startswith("ge"))
  2323. Imm = 3;
  2324. else if (Name.startswith("eq"))
  2325. Imm = 4;
  2326. else if (Name.startswith("ne"))
  2327. Imm = 5;
  2328. else if (Name.startswith("false"))
  2329. Imm = 6;
  2330. else if (Name.startswith("true"))
  2331. Imm = 7;
  2332. else
  2333. llvm_unreachable("Unknown condition");
  2334. }
  2335. Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
  2336. } else if (IsX86 && Name.startswith("xop.vpcmov")) {
  2337. Value *Sel = CI->getArgOperand(2);
  2338. Value *NotSel = Builder.CreateNot(Sel);
  2339. Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
  2340. Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
  2341. Rep = Builder.CreateOr(Sel0, Sel1);
  2342. } else if (IsX86 && (Name.startswith("xop.vprot") ||
  2343. Name.startswith("avx512.prol") ||
  2344. Name.startswith("avx512.mask.prol"))) {
  2345. Rep = upgradeX86Rotate(Builder, *CI, false);
  2346. } else if (IsX86 && (Name.startswith("avx512.pror") ||
  2347. Name.startswith("avx512.mask.pror"))) {
  2348. Rep = upgradeX86Rotate(Builder, *CI, true);
  2349. } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
  2350. Name.startswith("avx512.mask.vpshld") ||
  2351. Name.startswith("avx512.maskz.vpshld"))) {
  2352. bool ZeroMask = Name[11] == 'z';
  2353. Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
  2354. } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
  2355. Name.startswith("avx512.mask.vpshrd") ||
  2356. Name.startswith("avx512.maskz.vpshrd"))) {
  2357. bool ZeroMask = Name[11] == 'z';
  2358. Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
  2359. } else if (IsX86 && Name == "sse42.crc32.64.8") {
  2360. Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
  2361. Intrinsic::x86_sse42_crc32_32_8);
  2362. Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
  2363. Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
  2364. Rep = Builder.CreateZExt(Rep, CI->getType(), "");
  2365. } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
  2366. Name.startswith("avx512.vbroadcast.s"))) {
  2367. // Replace broadcasts with a series of insertelements.
  2368. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2369. Type *EltTy = VecTy->getElementType();
  2370. unsigned EltNum = VecTy->getNumElements();
  2371. Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
  2372. EltTy->getPointerTo());
  2373. Value *Load = Builder.CreateLoad(EltTy, Cast);
  2374. Type *I32Ty = Type::getInt32Ty(C);
  2375. Rep = PoisonValue::get(VecTy);
  2376. for (unsigned I = 0; I < EltNum; ++I)
  2377. Rep = Builder.CreateInsertElement(Rep, Load,
  2378. ConstantInt::get(I32Ty, I));
  2379. } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
  2380. Name.startswith("sse41.pmovzx") ||
  2381. Name.startswith("avx2.pmovsx") ||
  2382. Name.startswith("avx2.pmovzx") ||
  2383. Name.startswith("avx512.mask.pmovsx") ||
  2384. Name.startswith("avx512.mask.pmovzx"))) {
  2385. auto *DstTy = cast<FixedVectorType>(CI->getType());
  2386. unsigned NumDstElts = DstTy->getNumElements();
  2387. // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
  2388. SmallVector<int, 8> ShuffleMask(NumDstElts);
  2389. for (unsigned i = 0; i != NumDstElts; ++i)
  2390. ShuffleMask[i] = i;
  2391. Value *SV =
  2392. Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
  2393. bool DoSext = (StringRef::npos != Name.find("pmovsx"));
  2394. Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
  2395. : Builder.CreateZExt(SV, DstTy);
  2396. // If there are 3 arguments, it's a masked intrinsic so we need a select.
  2397. if (CI->arg_size() == 3)
  2398. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2399. CI->getArgOperand(1));
  2400. } else if (Name == "avx512.mask.pmov.qd.256" ||
  2401. Name == "avx512.mask.pmov.qd.512" ||
  2402. Name == "avx512.mask.pmov.wb.256" ||
  2403. Name == "avx512.mask.pmov.wb.512") {
  2404. Type *Ty = CI->getArgOperand(1)->getType();
  2405. Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
  2406. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2407. CI->getArgOperand(1));
  2408. } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
  2409. Name == "avx2.vbroadcasti128")) {
  2410. // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
  2411. Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
  2412. unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
  2413. auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
  2414. Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
  2415. PointerType::getUnqual(VT));
  2416. Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
  2417. if (NumSrcElts == 2)
  2418. Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
  2419. else
  2420. Rep = Builder.CreateShuffleVector(
  2421. Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
  2422. } else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") ||
  2423. Name.startswith("avx512.mask.shuf.f"))) {
  2424. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2425. Type *VT = CI->getType();
  2426. unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
  2427. unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
  2428. unsigned ControlBitsMask = NumLanes - 1;
  2429. unsigned NumControlBits = NumLanes / 2;
  2430. SmallVector<int, 8> ShuffleMask(0);
  2431. for (unsigned l = 0; l != NumLanes; ++l) {
  2432. unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
  2433. // We actually need the other source.
  2434. if (l >= NumLanes / 2)
  2435. LaneMask += NumLanes;
  2436. for (unsigned i = 0; i != NumElementsInLane; ++i)
  2437. ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
  2438. }
  2439. Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
  2440. CI->getArgOperand(1), ShuffleMask);
  2441. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
  2442. CI->getArgOperand(3));
  2443. }else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") ||
  2444. Name.startswith("avx512.mask.broadcasti"))) {
  2445. unsigned NumSrcElts =
  2446. cast<FixedVectorType>(CI->getArgOperand(0)->getType())
  2447. ->getNumElements();
  2448. unsigned NumDstElts =
  2449. cast<FixedVectorType>(CI->getType())->getNumElements();
  2450. SmallVector<int, 8> ShuffleMask(NumDstElts);
  2451. for (unsigned i = 0; i != NumDstElts; ++i)
  2452. ShuffleMask[i] = i % NumSrcElts;
  2453. Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
  2454. CI->getArgOperand(0),
  2455. ShuffleMask);
  2456. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2457. CI->getArgOperand(1));
  2458. } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
  2459. Name.startswith("avx2.vbroadcast") ||
  2460. Name.startswith("avx512.pbroadcast") ||
  2461. Name.startswith("avx512.mask.broadcast.s"))) {
  2462. // Replace vp?broadcasts with a vector shuffle.
  2463. Value *Op = CI->getArgOperand(0);
  2464. ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
  2465. Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
  2466. SmallVector<int, 8> M;
  2467. ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
  2468. Rep = Builder.CreateShuffleVector(Op, M);
  2469. if (CI->arg_size() == 3)
  2470. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2471. CI->getArgOperand(1));
  2472. } else if (IsX86 && (Name.startswith("sse2.padds.") ||
  2473. Name.startswith("avx2.padds.") ||
  2474. Name.startswith("avx512.padds.") ||
  2475. Name.startswith("avx512.mask.padds."))) {
  2476. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
  2477. } else if (IsX86 && (Name.startswith("sse2.psubs.") ||
  2478. Name.startswith("avx2.psubs.") ||
  2479. Name.startswith("avx512.psubs.") ||
  2480. Name.startswith("avx512.mask.psubs."))) {
  2481. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
  2482. } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
  2483. Name.startswith("avx2.paddus.") ||
  2484. Name.startswith("avx512.mask.paddus."))) {
  2485. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
  2486. } else if (IsX86 && (Name.startswith("sse2.psubus.") ||
  2487. Name.startswith("avx2.psubus.") ||
  2488. Name.startswith("avx512.mask.psubus."))) {
  2489. Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
  2490. } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
  2491. Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
  2492. CI->getArgOperand(1),
  2493. CI->getArgOperand(2),
  2494. CI->getArgOperand(3),
  2495. CI->getArgOperand(4),
  2496. false);
  2497. } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
  2498. Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
  2499. CI->getArgOperand(1),
  2500. CI->getArgOperand(2),
  2501. CI->getArgOperand(3),
  2502. CI->getArgOperand(4),
  2503. true);
  2504. } else if (IsX86 && (Name == "sse2.psll.dq" ||
  2505. Name == "avx2.psll.dq")) {
  2506. // 128/256-bit shift left specified in bits.
  2507. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2508. Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
  2509. Shift / 8); // Shift is in bits.
  2510. } else if (IsX86 && (Name == "sse2.psrl.dq" ||
  2511. Name == "avx2.psrl.dq")) {
  2512. // 128/256-bit shift right specified in bits.
  2513. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2514. Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
  2515. Shift / 8); // Shift is in bits.
  2516. } else if (IsX86 && (Name == "sse2.psll.dq.bs" ||
  2517. Name == "avx2.psll.dq.bs" ||
  2518. Name == "avx512.psll.dq.512")) {
  2519. // 128/256/512-bit shift left specified in bytes.
  2520. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2521. Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
  2522. } else if (IsX86 && (Name == "sse2.psrl.dq.bs" ||
  2523. Name == "avx2.psrl.dq.bs" ||
  2524. Name == "avx512.psrl.dq.512")) {
  2525. // 128/256/512-bit shift right specified in bytes.
  2526. unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2527. Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
  2528. } else if (IsX86 && (Name == "sse41.pblendw" ||
  2529. Name.startswith("sse41.blendp") ||
  2530. Name.startswith("avx.blend.p") ||
  2531. Name == "avx2.pblendw" ||
  2532. Name.startswith("avx2.pblendd."))) {
  2533. Value *Op0 = CI->getArgOperand(0);
  2534. Value *Op1 = CI->getArgOperand(1);
  2535. unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2536. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2537. unsigned NumElts = VecTy->getNumElements();
  2538. SmallVector<int, 16> Idxs(NumElts);
  2539. for (unsigned i = 0; i != NumElts; ++i)
  2540. Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
  2541. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2542. } else if (IsX86 && (Name.startswith("avx.vinsertf128.") ||
  2543. Name == "avx2.vinserti128" ||
  2544. Name.startswith("avx512.mask.insert"))) {
  2545. Value *Op0 = CI->getArgOperand(0);
  2546. Value *Op1 = CI->getArgOperand(1);
  2547. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2548. unsigned DstNumElts =
  2549. cast<FixedVectorType>(CI->getType())->getNumElements();
  2550. unsigned SrcNumElts =
  2551. cast<FixedVectorType>(Op1->getType())->getNumElements();
  2552. unsigned Scale = DstNumElts / SrcNumElts;
  2553. // Mask off the high bits of the immediate value; hardware ignores those.
  2554. Imm = Imm % Scale;
  2555. // Extend the second operand into a vector the size of the destination.
  2556. SmallVector<int, 8> Idxs(DstNumElts);
  2557. for (unsigned i = 0; i != SrcNumElts; ++i)
  2558. Idxs[i] = i;
  2559. for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
  2560. Idxs[i] = SrcNumElts;
  2561. Rep = Builder.CreateShuffleVector(Op1, Idxs);
  2562. // Insert the second operand into the first operand.
  2563. // Note that there is no guarantee that instruction lowering will actually
  2564. // produce a vinsertf128 instruction for the created shuffles. In
  2565. // particular, the 0 immediate case involves no lane changes, so it can
  2566. // be handled as a blend.
  2567. // Example of shuffle mask for 32-bit elements:
  2568. // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  2569. // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
  2570. // First fill with identify mask.
  2571. for (unsigned i = 0; i != DstNumElts; ++i)
  2572. Idxs[i] = i;
  2573. // Then replace the elements where we need to insert.
  2574. for (unsigned i = 0; i != SrcNumElts; ++i)
  2575. Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
  2576. Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
  2577. // If the intrinsic has a mask operand, handle that.
  2578. if (CI->arg_size() == 5)
  2579. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
  2580. CI->getArgOperand(3));
  2581. } else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
  2582. Name == "avx2.vextracti128" ||
  2583. Name.startswith("avx512.mask.vextract"))) {
  2584. Value *Op0 = CI->getArgOperand(0);
  2585. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2586. unsigned DstNumElts =
  2587. cast<FixedVectorType>(CI->getType())->getNumElements();
  2588. unsigned SrcNumElts =
  2589. cast<FixedVectorType>(Op0->getType())->getNumElements();
  2590. unsigned Scale = SrcNumElts / DstNumElts;
  2591. // Mask off the high bits of the immediate value; hardware ignores those.
  2592. Imm = Imm % Scale;
  2593. // Get indexes for the subvector of the input vector.
  2594. SmallVector<int, 8> Idxs(DstNumElts);
  2595. for (unsigned i = 0; i != DstNumElts; ++i) {
  2596. Idxs[i] = i + (Imm * DstNumElts);
  2597. }
  2598. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2599. // If the intrinsic has a mask operand, handle that.
  2600. if (CI->arg_size() == 4)
  2601. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2602. CI->getArgOperand(2));
  2603. } else if (!IsX86 && Name == "stackprotectorcheck") {
  2604. Rep = nullptr;
  2605. } else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") ||
  2606. Name.startswith("avx512.mask.perm.di."))) {
  2607. Value *Op0 = CI->getArgOperand(0);
  2608. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2609. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2610. unsigned NumElts = VecTy->getNumElements();
  2611. SmallVector<int, 8> Idxs(NumElts);
  2612. for (unsigned i = 0; i != NumElts; ++i)
  2613. Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
  2614. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2615. if (CI->arg_size() == 4)
  2616. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2617. CI->getArgOperand(2));
  2618. } else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
  2619. Name == "avx2.vperm2i128")) {
  2620. // The immediate permute control byte looks like this:
  2621. // [1:0] - select 128 bits from sources for low half of destination
  2622. // [2] - ignore
  2623. // [3] - zero low half of destination
  2624. // [5:4] - select 128 bits from sources for high half of destination
  2625. // [6] - ignore
  2626. // [7] - zero high half of destination
  2627. uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2628. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2629. unsigned HalfSize = NumElts / 2;
  2630. SmallVector<int, 8> ShuffleMask(NumElts);
  2631. // Determine which operand(s) are actually in use for this instruction.
  2632. Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
  2633. Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
  2634. // If needed, replace operands based on zero mask.
  2635. V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
  2636. V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
  2637. // Permute low half of result.
  2638. unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
  2639. for (unsigned i = 0; i < HalfSize; ++i)
  2640. ShuffleMask[i] = StartIndex + i;
  2641. // Permute high half of result.
  2642. StartIndex = (Imm & 0x10) ? HalfSize : 0;
  2643. for (unsigned i = 0; i < HalfSize; ++i)
  2644. ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
  2645. Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
  2646. } else if (IsX86 && (Name.startswith("avx.vpermil.") ||
  2647. Name == "sse2.pshuf.d" ||
  2648. Name.startswith("avx512.mask.vpermil.p") ||
  2649. Name.startswith("avx512.mask.pshuf.d."))) {
  2650. Value *Op0 = CI->getArgOperand(0);
  2651. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2652. auto *VecTy = cast<FixedVectorType>(CI->getType());
  2653. unsigned NumElts = VecTy->getNumElements();
  2654. // Calculate the size of each index in the immediate.
  2655. unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
  2656. unsigned IdxMask = ((1 << IdxSize) - 1);
  2657. SmallVector<int, 8> Idxs(NumElts);
  2658. // Lookup the bits for this element, wrapping around the immediate every
  2659. // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
  2660. // to offset by the first index of each group.
  2661. for (unsigned i = 0; i != NumElts; ++i)
  2662. Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
  2663. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2664. if (CI->arg_size() == 4)
  2665. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2666. CI->getArgOperand(2));
  2667. } else if (IsX86 && (Name == "sse2.pshufl.w" ||
  2668. Name.startswith("avx512.mask.pshufl.w."))) {
  2669. Value *Op0 = CI->getArgOperand(0);
  2670. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2671. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2672. SmallVector<int, 16> Idxs(NumElts);
  2673. for (unsigned l = 0; l != NumElts; l += 8) {
  2674. for (unsigned i = 0; i != 4; ++i)
  2675. Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
  2676. for (unsigned i = 4; i != 8; ++i)
  2677. Idxs[i + l] = i + l;
  2678. }
  2679. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2680. if (CI->arg_size() == 4)
  2681. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2682. CI->getArgOperand(2));
  2683. } else if (IsX86 && (Name == "sse2.pshufh.w" ||
  2684. Name.startswith("avx512.mask.pshufh.w."))) {
  2685. Value *Op0 = CI->getArgOperand(0);
  2686. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
  2687. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2688. SmallVector<int, 16> Idxs(NumElts);
  2689. for (unsigned l = 0; l != NumElts; l += 8) {
  2690. for (unsigned i = 0; i != 4; ++i)
  2691. Idxs[i + l] = i + l;
  2692. for (unsigned i = 0; i != 4; ++i)
  2693. Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
  2694. }
  2695. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2696. if (CI->arg_size() == 4)
  2697. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2698. CI->getArgOperand(2));
  2699. } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
  2700. Value *Op0 = CI->getArgOperand(0);
  2701. Value *Op1 = CI->getArgOperand(1);
  2702. unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
  2703. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2704. unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2705. unsigned HalfLaneElts = NumLaneElts / 2;
  2706. SmallVector<int, 16> Idxs(NumElts);
  2707. for (unsigned i = 0; i != NumElts; ++i) {
  2708. // Base index is the starting element of the lane.
  2709. Idxs[i] = i - (i % NumLaneElts);
  2710. // If we are half way through the lane switch to the other source.
  2711. if ((i % NumLaneElts) >= HalfLaneElts)
  2712. Idxs[i] += NumElts;
  2713. // Now select the specific element. By adding HalfLaneElts bits from
  2714. // the immediate. Wrapping around the immediate every 8-bits.
  2715. Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
  2716. }
  2717. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2718. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
  2719. CI->getArgOperand(3));
  2720. } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
  2721. Name.startswith("avx512.mask.movshdup") ||
  2722. Name.startswith("avx512.mask.movsldup"))) {
  2723. Value *Op0 = CI->getArgOperand(0);
  2724. unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2725. unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2726. unsigned Offset = 0;
  2727. if (Name.startswith("avx512.mask.movshdup."))
  2728. Offset = 1;
  2729. SmallVector<int, 16> Idxs(NumElts);
  2730. for (unsigned l = 0; l != NumElts; l += NumLaneElts)
  2731. for (unsigned i = 0; i != NumLaneElts; i += 2) {
  2732. Idxs[i + l + 0] = i + l + Offset;
  2733. Idxs[i + l + 1] = i + l + Offset;
  2734. }
  2735. Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
  2736. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2737. CI->getArgOperand(1));
  2738. } else if (IsX86 && (Name.startswith("avx512.mask.punpckl") ||
  2739. Name.startswith("avx512.mask.unpckl."))) {
  2740. Value *Op0 = CI->getArgOperand(0);
  2741. Value *Op1 = CI->getArgOperand(1);
  2742. int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2743. int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2744. SmallVector<int, 64> Idxs(NumElts);
  2745. for (int l = 0; l != NumElts; l += NumLaneElts)
  2746. for (int i = 0; i != NumLaneElts; ++i)
  2747. Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
  2748. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2749. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2750. CI->getArgOperand(2));
  2751. } else if (IsX86 && (Name.startswith("avx512.mask.punpckh") ||
  2752. Name.startswith("avx512.mask.unpckh."))) {
  2753. Value *Op0 = CI->getArgOperand(0);
  2754. Value *Op1 = CI->getArgOperand(1);
  2755. int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  2756. int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
  2757. SmallVector<int, 64> Idxs(NumElts);
  2758. for (int l = 0; l != NumElts; l += NumLaneElts)
  2759. for (int i = 0; i != NumLaneElts; ++i)
  2760. Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
  2761. Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
  2762. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2763. CI->getArgOperand(2));
  2764. } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
  2765. Name.startswith("avx512.mask.pand."))) {
  2766. VectorType *FTy = cast<VectorType>(CI->getType());
  2767. VectorType *ITy = VectorType::getInteger(FTy);
  2768. Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
  2769. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2770. Rep = Builder.CreateBitCast(Rep, FTy);
  2771. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2772. CI->getArgOperand(2));
  2773. } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
  2774. Name.startswith("avx512.mask.pandn."))) {
  2775. VectorType *FTy = cast<VectorType>(CI->getType());
  2776. VectorType *ITy = VectorType::getInteger(FTy);
  2777. Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
  2778. Rep = Builder.CreateAnd(Rep,
  2779. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2780. Rep = Builder.CreateBitCast(Rep, FTy);
  2781. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2782. CI->getArgOperand(2));
  2783. } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
  2784. Name.startswith("avx512.mask.por."))) {
  2785. VectorType *FTy = cast<VectorType>(CI->getType());
  2786. VectorType *ITy = VectorType::getInteger(FTy);
  2787. Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
  2788. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2789. Rep = Builder.CreateBitCast(Rep, FTy);
  2790. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2791. CI->getArgOperand(2));
  2792. } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
  2793. Name.startswith("avx512.mask.pxor."))) {
  2794. VectorType *FTy = cast<VectorType>(CI->getType());
  2795. VectorType *ITy = VectorType::getInteger(FTy);
  2796. Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
  2797. Builder.CreateBitCast(CI->getArgOperand(1), ITy));
  2798. Rep = Builder.CreateBitCast(Rep, FTy);
  2799. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2800. CI->getArgOperand(2));
  2801. } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
  2802. Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
  2803. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2804. CI->getArgOperand(2));
  2805. } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
  2806. Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
  2807. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2808. CI->getArgOperand(2));
  2809. } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
  2810. Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
  2811. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2812. CI->getArgOperand(2));
  2813. } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
  2814. if (Name.endswith(".512")) {
  2815. Intrinsic::ID IID;
  2816. if (Name[17] == 's')
  2817. IID = Intrinsic::x86_avx512_add_ps_512;
  2818. else
  2819. IID = Intrinsic::x86_avx512_add_pd_512;
  2820. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2821. { CI->getArgOperand(0), CI->getArgOperand(1),
  2822. CI->getArgOperand(4) });
  2823. } else {
  2824. Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
  2825. }
  2826. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2827. CI->getArgOperand(2));
  2828. } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
  2829. if (Name.endswith(".512")) {
  2830. Intrinsic::ID IID;
  2831. if (Name[17] == 's')
  2832. IID = Intrinsic::x86_avx512_div_ps_512;
  2833. else
  2834. IID = Intrinsic::x86_avx512_div_pd_512;
  2835. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2836. { CI->getArgOperand(0), CI->getArgOperand(1),
  2837. CI->getArgOperand(4) });
  2838. } else {
  2839. Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
  2840. }
  2841. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2842. CI->getArgOperand(2));
  2843. } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
  2844. if (Name.endswith(".512")) {
  2845. Intrinsic::ID IID;
  2846. if (Name[17] == 's')
  2847. IID = Intrinsic::x86_avx512_mul_ps_512;
  2848. else
  2849. IID = Intrinsic::x86_avx512_mul_pd_512;
  2850. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2851. { CI->getArgOperand(0), CI->getArgOperand(1),
  2852. CI->getArgOperand(4) });
  2853. } else {
  2854. Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
  2855. }
  2856. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2857. CI->getArgOperand(2));
  2858. } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
  2859. if (Name.endswith(".512")) {
  2860. Intrinsic::ID IID;
  2861. if (Name[17] == 's')
  2862. IID = Intrinsic::x86_avx512_sub_ps_512;
  2863. else
  2864. IID = Intrinsic::x86_avx512_sub_pd_512;
  2865. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2866. { CI->getArgOperand(0), CI->getArgOperand(1),
  2867. CI->getArgOperand(4) });
  2868. } else {
  2869. Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
  2870. }
  2871. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2872. CI->getArgOperand(2));
  2873. } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
  2874. Name.startswith("avx512.mask.min.p")) &&
  2875. Name.drop_front(18) == ".512") {
  2876. bool IsDouble = Name[17] == 'd';
  2877. bool IsMin = Name[13] == 'i';
  2878. static const Intrinsic::ID MinMaxTbl[2][2] = {
  2879. { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
  2880. { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
  2881. };
  2882. Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
  2883. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  2884. { CI->getArgOperand(0), CI->getArgOperand(1),
  2885. CI->getArgOperand(4) });
  2886. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
  2887. CI->getArgOperand(2));
  2888. } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
  2889. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
  2890. Intrinsic::ctlz,
  2891. CI->getType()),
  2892. { CI->getArgOperand(0), Builder.getInt1(false) });
  2893. Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
  2894. CI->getArgOperand(1));
  2895. } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
  2896. bool IsImmediate = Name[16] == 'i' ||
  2897. (Name.size() > 18 && Name[18] == 'i');
  2898. bool IsVariable = Name[16] == 'v';
  2899. char Size = Name[16] == '.' ? Name[17] :
  2900. Name[17] == '.' ? Name[18] :
  2901. Name[18] == '.' ? Name[19] :
  2902. Name[20];
  2903. Intrinsic::ID IID;
  2904. if (IsVariable && Name[17] != '.') {
  2905. if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
  2906. IID = Intrinsic::x86_avx2_psllv_q;
  2907. else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
  2908. IID = Intrinsic::x86_avx2_psllv_q_256;
  2909. else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
  2910. IID = Intrinsic::x86_avx2_psllv_d;
  2911. else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
  2912. IID = Intrinsic::x86_avx2_psllv_d_256;
  2913. else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
  2914. IID = Intrinsic::x86_avx512_psllv_w_128;
  2915. else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
  2916. IID = Intrinsic::x86_avx512_psllv_w_256;
  2917. else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
  2918. IID = Intrinsic::x86_avx512_psllv_w_512;
  2919. else
  2920. llvm_unreachable("Unexpected size");
  2921. } else if (Name.endswith(".128")) {
  2922. if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
  2923. IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
  2924. : Intrinsic::x86_sse2_psll_d;
  2925. else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
  2926. IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
  2927. : Intrinsic::x86_sse2_psll_q;
  2928. else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
  2929. IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
  2930. : Intrinsic::x86_sse2_psll_w;
  2931. else
  2932. llvm_unreachable("Unexpected size");
  2933. } else if (Name.endswith(".256")) {
  2934. if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
  2935. IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
  2936. : Intrinsic::x86_avx2_psll_d;
  2937. else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
  2938. IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
  2939. : Intrinsic::x86_avx2_psll_q;
  2940. else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
  2941. IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
  2942. : Intrinsic::x86_avx2_psll_w;
  2943. else
  2944. llvm_unreachable("Unexpected size");
  2945. } else {
  2946. if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
  2947. IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
  2948. IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
  2949. Intrinsic::x86_avx512_psll_d_512;
  2950. else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
  2951. IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
  2952. IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
  2953. Intrinsic::x86_avx512_psll_q_512;
  2954. else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
  2955. IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
  2956. : Intrinsic::x86_avx512_psll_w_512;
  2957. else
  2958. llvm_unreachable("Unexpected size");
  2959. }
  2960. Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
  2961. } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
  2962. bool IsImmediate = Name[16] == 'i' ||
  2963. (Name.size() > 18 && Name[18] == 'i');
  2964. bool IsVariable = Name[16] == 'v';
  2965. char Size = Name[16] == '.' ? Name[17] :
  2966. Name[17] == '.' ? Name[18] :
  2967. Name[18] == '.' ? Name[19] :
  2968. Name[20];
  2969. Intrinsic::ID IID;
  2970. if (IsVariable && Name[17] != '.') {
  2971. if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
  2972. IID = Intrinsic::x86_avx2_psrlv_q;
  2973. else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
  2974. IID = Intrinsic::x86_avx2_psrlv_q_256;
  2975. else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
  2976. IID = Intrinsic::x86_avx2_psrlv_d;
  2977. else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
  2978. IID = Intrinsic::x86_avx2_psrlv_d_256;
  2979. else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
  2980. IID = Intrinsic::x86_avx512_psrlv_w_128;
  2981. else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
  2982. IID = Intrinsic::x86_avx512_psrlv_w_256;
  2983. else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
  2984. IID = Intrinsic::x86_avx512_psrlv_w_512;
  2985. else
  2986. llvm_unreachable("Unexpected size");
  2987. } else if (Name.endswith(".128")) {
  2988. if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
  2989. IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
  2990. : Intrinsic::x86_sse2_psrl_d;
  2991. else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
  2992. IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
  2993. : Intrinsic::x86_sse2_psrl_q;
  2994. else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
  2995. IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
  2996. : Intrinsic::x86_sse2_psrl_w;
  2997. else
  2998. llvm_unreachable("Unexpected size");
  2999. } else if (Name.endswith(".256")) {
  3000. if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
  3001. IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
  3002. : Intrinsic::x86_avx2_psrl_d;
  3003. else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
  3004. IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
  3005. : Intrinsic::x86_avx2_psrl_q;
  3006. else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
  3007. IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
  3008. : Intrinsic::x86_avx2_psrl_w;
  3009. else
  3010. llvm_unreachable("Unexpected size");
  3011. } else {
  3012. if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
  3013. IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
  3014. IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
  3015. Intrinsic::x86_avx512_psrl_d_512;
  3016. else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
  3017. IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
  3018. IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
  3019. Intrinsic::x86_avx512_psrl_q_512;
  3020. else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
  3021. IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
  3022. : Intrinsic::x86_avx512_psrl_w_512;
  3023. else
  3024. llvm_unreachable("Unexpected size");
  3025. }
  3026. Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
  3027. } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
  3028. bool IsImmediate = Name[16] == 'i' ||
  3029. (Name.size() > 18 && Name[18] == 'i');
  3030. bool IsVariable = Name[16] == 'v';
  3031. char Size = Name[16] == '.' ? Name[17] :
  3032. Name[17] == '.' ? Name[18] :
  3033. Name[18] == '.' ? Name[19] :
  3034. Name[20];
  3035. Intrinsic::ID IID;
  3036. if (IsVariable && Name[17] != '.') {
  3037. if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
  3038. IID = Intrinsic::x86_avx2_psrav_d;
  3039. else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
  3040. IID = Intrinsic::x86_avx2_psrav_d_256;
  3041. else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
  3042. IID = Intrinsic::x86_avx512_psrav_w_128;
  3043. else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
  3044. IID = Intrinsic::x86_avx512_psrav_w_256;
  3045. else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
  3046. IID = Intrinsic::x86_avx512_psrav_w_512;
  3047. else
  3048. llvm_unreachable("Unexpected size");
  3049. } else if (Name.endswith(".128")) {
  3050. if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
  3051. IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
  3052. : Intrinsic::x86_sse2_psra_d;
  3053. else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
  3054. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
  3055. IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
  3056. Intrinsic::x86_avx512_psra_q_128;
  3057. else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
  3058. IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
  3059. : Intrinsic::x86_sse2_psra_w;
  3060. else
  3061. llvm_unreachable("Unexpected size");
  3062. } else if (Name.endswith(".256")) {
  3063. if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
  3064. IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
  3065. : Intrinsic::x86_avx2_psra_d;
  3066. else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
  3067. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
  3068. IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
  3069. Intrinsic::x86_avx512_psra_q_256;
  3070. else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
  3071. IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
  3072. : Intrinsic::x86_avx2_psra_w;
  3073. else
  3074. llvm_unreachable("Unexpected size");
  3075. } else {
  3076. if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
  3077. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
  3078. IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
  3079. Intrinsic::x86_avx512_psra_d_512;
  3080. else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
  3081. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
  3082. IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
  3083. Intrinsic::x86_avx512_psra_q_512;
  3084. else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
  3085. IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
  3086. : Intrinsic::x86_avx512_psra_w_512;
  3087. else
  3088. llvm_unreachable("Unexpected size");
  3089. }
  3090. Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
  3091. } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
  3092. Rep = upgradeMaskedMove(Builder, *CI);
  3093. } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
  3094. Rep = UpgradeMaskToInt(Builder, *CI);
  3095. } else if (IsX86 && Name.endswith(".movntdqa")) {
  3096. Module *M = F->getParent();
  3097. MDNode *Node = MDNode::get(
  3098. C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
  3099. Value *Ptr = CI->getArgOperand(0);
  3100. // Convert the type of the pointer to a pointer to the stored type.
  3101. Value *BC = Builder.CreateBitCast(
  3102. Ptr, PointerType::getUnqual(CI->getType()), "cast");
  3103. LoadInst *LI = Builder.CreateAlignedLoad(
  3104. CI->getType(), BC,
  3105. Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
  3106. LI->setMetadata(M->getMDKindID("nontemporal"), Node);
  3107. Rep = LI;
  3108. } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
  3109. Name.startswith("fma.vfmsub.") ||
  3110. Name.startswith("fma.vfnmadd.") ||
  3111. Name.startswith("fma.vfnmsub."))) {
  3112. bool NegMul = Name[6] == 'n';
  3113. bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
  3114. bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
  3115. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3116. CI->getArgOperand(2) };
  3117. if (IsScalar) {
  3118. Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  3119. Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  3120. Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  3121. }
  3122. if (NegMul && !IsScalar)
  3123. Ops[0] = Builder.CreateFNeg(Ops[0]);
  3124. if (NegMul && IsScalar)
  3125. Ops[1] = Builder.CreateFNeg(Ops[1]);
  3126. if (NegAcc)
  3127. Ops[2] = Builder.CreateFNeg(Ops[2]);
  3128. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
  3129. Intrinsic::fma,
  3130. Ops[0]->getType()),
  3131. Ops);
  3132. if (IsScalar)
  3133. Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
  3134. (uint64_t)0);
  3135. } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
  3136. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3137. CI->getArgOperand(2) };
  3138. Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
  3139. Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
  3140. Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
  3141. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
  3142. Intrinsic::fma,
  3143. Ops[0]->getType()),
  3144. Ops);
  3145. Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
  3146. Rep, (uint64_t)0);
  3147. } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
  3148. Name.startswith("avx512.maskz.vfmadd.s") ||
  3149. Name.startswith("avx512.mask3.vfmadd.s") ||
  3150. Name.startswith("avx512.mask3.vfmsub.s") ||
  3151. Name.startswith("avx512.mask3.vfnmsub.s"))) {
  3152. bool IsMask3 = Name[11] == '3';
  3153. bool IsMaskZ = Name[11] == 'z';
  3154. // Drop the "avx512.mask." to make it easier.
  3155. Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
  3156. bool NegMul = Name[2] == 'n';
  3157. bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
  3158. Value *A = CI->getArgOperand(0);
  3159. Value *B = CI->getArgOperand(1);
  3160. Value *C = CI->getArgOperand(2);
  3161. if (NegMul && (IsMask3 || IsMaskZ))
  3162. A = Builder.CreateFNeg(A);
  3163. if (NegMul && !(IsMask3 || IsMaskZ))
  3164. B = Builder.CreateFNeg(B);
  3165. if (NegAcc)
  3166. C = Builder.CreateFNeg(C);
  3167. A = Builder.CreateExtractElement(A, (uint64_t)0);
  3168. B = Builder.CreateExtractElement(B, (uint64_t)0);
  3169. C = Builder.CreateExtractElement(C, (uint64_t)0);
  3170. if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
  3171. cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
  3172. Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
  3173. Intrinsic::ID IID;
  3174. if (Name.back() == 'd')
  3175. IID = Intrinsic::x86_avx512_vfmadd_f64;
  3176. else
  3177. IID = Intrinsic::x86_avx512_vfmadd_f32;
  3178. Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
  3179. Rep = Builder.CreateCall(FMA, Ops);
  3180. } else {
  3181. Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
  3182. Intrinsic::fma,
  3183. A->getType());
  3184. Rep = Builder.CreateCall(FMA, { A, B, C });
  3185. }
  3186. Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
  3187. IsMask3 ? C : A;
  3188. // For Mask3 with NegAcc, we need to create a new extractelement that
  3189. // avoids the negation above.
  3190. if (NegAcc && IsMask3)
  3191. PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
  3192. (uint64_t)0);
  3193. Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
  3194. Rep, PassThru);
  3195. Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
  3196. Rep, (uint64_t)0);
  3197. } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
  3198. Name.startswith("avx512.mask.vfnmadd.p") ||
  3199. Name.startswith("avx512.mask.vfnmsub.p") ||
  3200. Name.startswith("avx512.mask3.vfmadd.p") ||
  3201. Name.startswith("avx512.mask3.vfmsub.p") ||
  3202. Name.startswith("avx512.mask3.vfnmsub.p") ||
  3203. Name.startswith("avx512.maskz.vfmadd.p"))) {
  3204. bool IsMask3 = Name[11] == '3';
  3205. bool IsMaskZ = Name[11] == 'z';
  3206. // Drop the "avx512.mask." to make it easier.
  3207. Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
  3208. bool NegMul = Name[2] == 'n';
  3209. bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
  3210. Value *A = CI->getArgOperand(0);
  3211. Value *B = CI->getArgOperand(1);
  3212. Value *C = CI->getArgOperand(2);
  3213. if (NegMul && (IsMask3 || IsMaskZ))
  3214. A = Builder.CreateFNeg(A);
  3215. if (NegMul && !(IsMask3 || IsMaskZ))
  3216. B = Builder.CreateFNeg(B);
  3217. if (NegAcc)
  3218. C = Builder.CreateFNeg(C);
  3219. if (CI->arg_size() == 5 &&
  3220. (!isa<ConstantInt>(CI->getArgOperand(4)) ||
  3221. cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
  3222. Intrinsic::ID IID;
  3223. // Check the character before ".512" in string.
  3224. if (Name[Name.size()-5] == 's')
  3225. IID = Intrinsic::x86_avx512_vfmadd_ps_512;
  3226. else
  3227. IID = Intrinsic::x86_avx512_vfmadd_pd_512;
  3228. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  3229. { A, B, C, CI->getArgOperand(4) });
  3230. } else {
  3231. Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
  3232. Intrinsic::fma,
  3233. A->getType());
  3234. Rep = Builder.CreateCall(FMA, { A, B, C });
  3235. }
  3236. Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
  3237. IsMask3 ? CI->getArgOperand(2) :
  3238. CI->getArgOperand(0);
  3239. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3240. } else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
  3241. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3242. unsigned EltWidth = CI->getType()->getScalarSizeInBits();
  3243. Intrinsic::ID IID;
  3244. if (VecWidth == 128 && EltWidth == 32)
  3245. IID = Intrinsic::x86_fma_vfmaddsub_ps;
  3246. else if (VecWidth == 256 && EltWidth == 32)
  3247. IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
  3248. else if (VecWidth == 128 && EltWidth == 64)
  3249. IID = Intrinsic::x86_fma_vfmaddsub_pd;
  3250. else if (VecWidth == 256 && EltWidth == 64)
  3251. IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
  3252. else
  3253. llvm_unreachable("Unexpected intrinsic");
  3254. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3255. CI->getArgOperand(2) };
  3256. Ops[2] = Builder.CreateFNeg(Ops[2]);
  3257. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  3258. Ops);
  3259. } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
  3260. Name.startswith("avx512.mask3.vfmaddsub.p") ||
  3261. Name.startswith("avx512.maskz.vfmaddsub.p") ||
  3262. Name.startswith("avx512.mask3.vfmsubadd.p"))) {
  3263. bool IsMask3 = Name[11] == '3';
  3264. bool IsMaskZ = Name[11] == 'z';
  3265. // Drop the "avx512.mask." to make it easier.
  3266. Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
  3267. bool IsSubAdd = Name[3] == 's';
  3268. if (CI->arg_size() == 5) {
  3269. Intrinsic::ID IID;
  3270. // Check the character before ".512" in string.
  3271. if (Name[Name.size()-5] == 's')
  3272. IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
  3273. else
  3274. IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
  3275. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3276. CI->getArgOperand(2), CI->getArgOperand(4) };
  3277. if (IsSubAdd)
  3278. Ops[2] = Builder.CreateFNeg(Ops[2]);
  3279. Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
  3280. Ops);
  3281. } else {
  3282. int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
  3283. Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3284. CI->getArgOperand(2) };
  3285. Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
  3286. Ops[0]->getType());
  3287. Value *Odd = Builder.CreateCall(FMA, Ops);
  3288. Ops[2] = Builder.CreateFNeg(Ops[2]);
  3289. Value *Even = Builder.CreateCall(FMA, Ops);
  3290. if (IsSubAdd)
  3291. std::swap(Even, Odd);
  3292. SmallVector<int, 32> Idxs(NumElts);
  3293. for (int i = 0; i != NumElts; ++i)
  3294. Idxs[i] = i + (i % 2) * NumElts;
  3295. Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
  3296. }
  3297. Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
  3298. IsMask3 ? CI->getArgOperand(2) :
  3299. CI->getArgOperand(0);
  3300. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3301. } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
  3302. Name.startswith("avx512.maskz.pternlog."))) {
  3303. bool ZeroMask = Name[11] == 'z';
  3304. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3305. unsigned EltWidth = CI->getType()->getScalarSizeInBits();
  3306. Intrinsic::ID IID;
  3307. if (VecWidth == 128 && EltWidth == 32)
  3308. IID = Intrinsic::x86_avx512_pternlog_d_128;
  3309. else if (VecWidth == 256 && EltWidth == 32)
  3310. IID = Intrinsic::x86_avx512_pternlog_d_256;
  3311. else if (VecWidth == 512 && EltWidth == 32)
  3312. IID = Intrinsic::x86_avx512_pternlog_d_512;
  3313. else if (VecWidth == 128 && EltWidth == 64)
  3314. IID = Intrinsic::x86_avx512_pternlog_q_128;
  3315. else if (VecWidth == 256 && EltWidth == 64)
  3316. IID = Intrinsic::x86_avx512_pternlog_q_256;
  3317. else if (VecWidth == 512 && EltWidth == 64)
  3318. IID = Intrinsic::x86_avx512_pternlog_q_512;
  3319. else
  3320. llvm_unreachable("Unexpected intrinsic");
  3321. Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
  3322. CI->getArgOperand(2), CI->getArgOperand(3) };
  3323. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3324. Args);
  3325. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3326. : CI->getArgOperand(0);
  3327. Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
  3328. } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
  3329. Name.startswith("avx512.maskz.vpmadd52"))) {
  3330. bool ZeroMask = Name[11] == 'z';
  3331. bool High = Name[20] == 'h' || Name[21] == 'h';
  3332. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3333. Intrinsic::ID IID;
  3334. if (VecWidth == 128 && !High)
  3335. IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
  3336. else if (VecWidth == 256 && !High)
  3337. IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
  3338. else if (VecWidth == 512 && !High)
  3339. IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
  3340. else if (VecWidth == 128 && High)
  3341. IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
  3342. else if (VecWidth == 256 && High)
  3343. IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
  3344. else if (VecWidth == 512 && High)
  3345. IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
  3346. else
  3347. llvm_unreachable("Unexpected intrinsic");
  3348. Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
  3349. CI->getArgOperand(2) };
  3350. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3351. Args);
  3352. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3353. : CI->getArgOperand(0);
  3354. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3355. } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
  3356. Name.startswith("avx512.mask.vpermt2var.") ||
  3357. Name.startswith("avx512.maskz.vpermt2var."))) {
  3358. bool ZeroMask = Name[11] == 'z';
  3359. bool IndexForm = Name[17] == 'i';
  3360. Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
  3361. } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
  3362. Name.startswith("avx512.maskz.vpdpbusd.") ||
  3363. Name.startswith("avx512.mask.vpdpbusds.") ||
  3364. Name.startswith("avx512.maskz.vpdpbusds."))) {
  3365. bool ZeroMask = Name[11] == 'z';
  3366. bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
  3367. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3368. Intrinsic::ID IID;
  3369. if (VecWidth == 128 && !IsSaturating)
  3370. IID = Intrinsic::x86_avx512_vpdpbusd_128;
  3371. else if (VecWidth == 256 && !IsSaturating)
  3372. IID = Intrinsic::x86_avx512_vpdpbusd_256;
  3373. else if (VecWidth == 512 && !IsSaturating)
  3374. IID = Intrinsic::x86_avx512_vpdpbusd_512;
  3375. else if (VecWidth == 128 && IsSaturating)
  3376. IID = Intrinsic::x86_avx512_vpdpbusds_128;
  3377. else if (VecWidth == 256 && IsSaturating)
  3378. IID = Intrinsic::x86_avx512_vpdpbusds_256;
  3379. else if (VecWidth == 512 && IsSaturating)
  3380. IID = Intrinsic::x86_avx512_vpdpbusds_512;
  3381. else
  3382. llvm_unreachable("Unexpected intrinsic");
  3383. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3384. CI->getArgOperand(2) };
  3385. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3386. Args);
  3387. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3388. : CI->getArgOperand(0);
  3389. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3390. } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
  3391. Name.startswith("avx512.maskz.vpdpwssd.") ||
  3392. Name.startswith("avx512.mask.vpdpwssds.") ||
  3393. Name.startswith("avx512.maskz.vpdpwssds."))) {
  3394. bool ZeroMask = Name[11] == 'z';
  3395. bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
  3396. unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
  3397. Intrinsic::ID IID;
  3398. if (VecWidth == 128 && !IsSaturating)
  3399. IID = Intrinsic::x86_avx512_vpdpwssd_128;
  3400. else if (VecWidth == 256 && !IsSaturating)
  3401. IID = Intrinsic::x86_avx512_vpdpwssd_256;
  3402. else if (VecWidth == 512 && !IsSaturating)
  3403. IID = Intrinsic::x86_avx512_vpdpwssd_512;
  3404. else if (VecWidth == 128 && IsSaturating)
  3405. IID = Intrinsic::x86_avx512_vpdpwssds_128;
  3406. else if (VecWidth == 256 && IsSaturating)
  3407. IID = Intrinsic::x86_avx512_vpdpwssds_256;
  3408. else if (VecWidth == 512 && IsSaturating)
  3409. IID = Intrinsic::x86_avx512_vpdpwssds_512;
  3410. else
  3411. llvm_unreachable("Unexpected intrinsic");
  3412. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3413. CI->getArgOperand(2) };
  3414. Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
  3415. Args);
  3416. Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
  3417. : CI->getArgOperand(0);
  3418. Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
  3419. } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
  3420. Name == "addcarry.u32" || Name == "addcarry.u64" ||
  3421. Name == "subborrow.u32" || Name == "subborrow.u64")) {
  3422. Intrinsic::ID IID;
  3423. if (Name[0] == 'a' && Name.back() == '2')
  3424. IID = Intrinsic::x86_addcarry_32;
  3425. else if (Name[0] == 'a' && Name.back() == '4')
  3426. IID = Intrinsic::x86_addcarry_64;
  3427. else if (Name[0] == 's' && Name.back() == '2')
  3428. IID = Intrinsic::x86_subborrow_32;
  3429. else if (Name[0] == 's' && Name.back() == '4')
  3430. IID = Intrinsic::x86_subborrow_64;
  3431. else
  3432. llvm_unreachable("Unexpected intrinsic");
  3433. // Make a call with 3 operands.
  3434. Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
  3435. CI->getArgOperand(2)};
  3436. Value *NewCall = Builder.CreateCall(
  3437. Intrinsic::getDeclaration(CI->getModule(), IID),
  3438. Args);
  3439. // Extract the second result and store it.
  3440. Value *Data = Builder.CreateExtractValue(NewCall, 1);
  3441. // Cast the pointer to the right type.
  3442. Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
  3443. llvm::PointerType::getUnqual(Data->getType()));
  3444. Builder.CreateAlignedStore(Data, Ptr, Align(1));
  3445. // Replace the original call result with the first result of the new call.
  3446. Value *CF = Builder.CreateExtractValue(NewCall, 0);
  3447. CI->replaceAllUsesWith(CF);
  3448. Rep = nullptr;
  3449. } else if (IsX86 && Name.startswith("avx512.mask.") &&
  3450. upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
  3451. // Rep will be updated by the call in the condition.
  3452. } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
  3453. Value *Arg = CI->getArgOperand(0);
  3454. Value *Neg = Builder.CreateNeg(Arg, "neg");
  3455. Value *Cmp = Builder.CreateICmpSGE(
  3456. Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
  3457. Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
  3458. } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
  3459. Name.startswith("atomic.load.add.f64.p"))) {
  3460. Value *Ptr = CI->getArgOperand(0);
  3461. Value *Val = CI->getArgOperand(1);
  3462. Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
  3463. AtomicOrdering::SequentiallyConsistent);
  3464. } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
  3465. Name == "max.ui" || Name == "max.ull")) {
  3466. Value *Arg0 = CI->getArgOperand(0);
  3467. Value *Arg1 = CI->getArgOperand(1);
  3468. Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
  3469. ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
  3470. : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
  3471. Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
  3472. } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
  3473. Name == "min.ui" || Name == "min.ull")) {
  3474. Value *Arg0 = CI->getArgOperand(0);
  3475. Value *Arg1 = CI->getArgOperand(1);
  3476. Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
  3477. ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
  3478. : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
  3479. Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
  3480. } else if (IsNVVM && Name == "clz.ll") {
  3481. // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
  3482. Value *Arg = CI->getArgOperand(0);
  3483. Value *Ctlz = Builder.CreateCall(
  3484. Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
  3485. {Arg->getType()}),
  3486. {Arg, Builder.getFalse()}, "ctlz");
  3487. Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
  3488. } else if (IsNVVM && Name == "popc.ll") {
  3489. // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
  3490. // i64.
  3491. Value *Arg = CI->getArgOperand(0);
  3492. Value *Popc = Builder.CreateCall(
  3493. Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
  3494. {Arg->getType()}),
  3495. Arg, "ctpop");
  3496. Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
  3497. } else if (IsNVVM && Name == "h2f") {
  3498. Rep = Builder.CreateCall(Intrinsic::getDeclaration(
  3499. F->getParent(), Intrinsic::convert_from_fp16,
  3500. {Builder.getFloatTy()}),
  3501. CI->getArgOperand(0), "h2f");
  3502. } else if (IsARM) {
  3503. Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
  3504. } else {
  3505. llvm_unreachable("Unknown function for CallInst upgrade.");
  3506. }
  3507. if (Rep)
  3508. CI->replaceAllUsesWith(Rep);
  3509. CI->eraseFromParent();
  3510. return;
  3511. }
  3512. const auto &DefaultCase = [&NewFn, &CI]() -> void {
  3513. // Handle generic mangling change, but nothing else
  3514. assert(
  3515. (CI->getCalledFunction()->getName() != NewFn->getName()) &&
  3516. "Unknown function for CallInst upgrade and isn't just a name change");
  3517. CI->setCalledFunction(NewFn);
  3518. };
  3519. CallInst *NewCall = nullptr;
  3520. switch (NewFn->getIntrinsicID()) {
  3521. default: {
  3522. DefaultCase();
  3523. return;
  3524. }
  3525. case Intrinsic::arm_neon_vld1:
  3526. case Intrinsic::arm_neon_vld2:
  3527. case Intrinsic::arm_neon_vld3:
  3528. case Intrinsic::arm_neon_vld4:
  3529. case Intrinsic::arm_neon_vld2lane:
  3530. case Intrinsic::arm_neon_vld3lane:
  3531. case Intrinsic::arm_neon_vld4lane:
  3532. case Intrinsic::arm_neon_vst1:
  3533. case Intrinsic::arm_neon_vst2:
  3534. case Intrinsic::arm_neon_vst3:
  3535. case Intrinsic::arm_neon_vst4:
  3536. case Intrinsic::arm_neon_vst2lane:
  3537. case Intrinsic::arm_neon_vst3lane:
  3538. case Intrinsic::arm_neon_vst4lane: {
  3539. SmallVector<Value *, 4> Args(CI->args());
  3540. NewCall = Builder.CreateCall(NewFn, Args);
  3541. break;
  3542. }
  3543. case Intrinsic::arm_neon_bfdot:
  3544. case Intrinsic::arm_neon_bfmmla:
  3545. case Intrinsic::arm_neon_bfmlalb:
  3546. case Intrinsic::arm_neon_bfmlalt:
  3547. case Intrinsic::aarch64_neon_bfdot:
  3548. case Intrinsic::aarch64_neon_bfmmla:
  3549. case Intrinsic::aarch64_neon_bfmlalb:
  3550. case Intrinsic::aarch64_neon_bfmlalt: {
  3551. SmallVector<Value *, 3> Args;
  3552. assert(CI->arg_size() == 3 &&
  3553. "Mismatch between function args and call args");
  3554. size_t OperandWidth =
  3555. CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
  3556. assert((OperandWidth == 64 || OperandWidth == 128) &&
  3557. "Unexpected operand width");
  3558. Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
  3559. auto Iter = CI->args().begin();
  3560. Args.push_back(*Iter++);
  3561. Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
  3562. Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
  3563. NewCall = Builder.CreateCall(NewFn, Args);
  3564. break;
  3565. }
  3566. case Intrinsic::bitreverse:
  3567. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
  3568. break;
  3569. case Intrinsic::ctlz:
  3570. case Intrinsic::cttz:
  3571. assert(CI->arg_size() == 1 &&
  3572. "Mismatch between function args and call args");
  3573. NewCall =
  3574. Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
  3575. break;
  3576. case Intrinsic::objectsize: {
  3577. Value *NullIsUnknownSize =
  3578. CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
  3579. Value *Dynamic =
  3580. CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
  3581. NewCall = Builder.CreateCall(
  3582. NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
  3583. break;
  3584. }
  3585. case Intrinsic::ctpop:
  3586. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
  3587. break;
  3588. case Intrinsic::convert_from_fp16:
  3589. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
  3590. break;
  3591. case Intrinsic::dbg_value:
  3592. // Upgrade from the old version that had an extra offset argument.
  3593. assert(CI->arg_size() == 4);
  3594. // Drop nonzero offsets instead of attempting to upgrade them.
  3595. if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
  3596. if (Offset->isZeroValue()) {
  3597. NewCall = Builder.CreateCall(
  3598. NewFn,
  3599. {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
  3600. break;
  3601. }
  3602. CI->eraseFromParent();
  3603. return;
  3604. case Intrinsic::ptr_annotation:
  3605. // Upgrade from versions that lacked the annotation attribute argument.
  3606. assert(CI->arg_size() == 4 &&
  3607. "Before LLVM 12.0 this intrinsic took four arguments");
  3608. // Create a new call with an added null annotation attribute argument.
  3609. NewCall = Builder.CreateCall(
  3610. NewFn,
  3611. {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
  3612. CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
  3613. NewCall->takeName(CI);
  3614. CI->replaceAllUsesWith(NewCall);
  3615. CI->eraseFromParent();
  3616. return;
  3617. case Intrinsic::var_annotation:
  3618. // Upgrade from versions that lacked the annotation attribute argument.
  3619. assert(CI->arg_size() == 4 &&
  3620. "Before LLVM 12.0 this intrinsic took four arguments");
  3621. // Create a new call with an added null annotation attribute argument.
  3622. NewCall = Builder.CreateCall(
  3623. NewFn,
  3624. {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
  3625. CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
  3626. CI->eraseFromParent();
  3627. return;
  3628. case Intrinsic::x86_xop_vfrcz_ss:
  3629. case Intrinsic::x86_xop_vfrcz_sd:
  3630. NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
  3631. break;
  3632. case Intrinsic::x86_xop_vpermil2pd:
  3633. case Intrinsic::x86_xop_vpermil2ps:
  3634. case Intrinsic::x86_xop_vpermil2pd_256:
  3635. case Intrinsic::x86_xop_vpermil2ps_256: {
  3636. SmallVector<Value *, 4> Args(CI->args());
  3637. VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
  3638. VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
  3639. Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
  3640. NewCall = Builder.CreateCall(NewFn, Args);
  3641. break;
  3642. }
  3643. case Intrinsic::x86_sse41_ptestc:
  3644. case Intrinsic::x86_sse41_ptestz:
  3645. case Intrinsic::x86_sse41_ptestnzc: {
  3646. // The arguments for these intrinsics used to be v4f32, and changed
  3647. // to v2i64. This is purely a nop, since those are bitwise intrinsics.
  3648. // So, the only thing required is a bitcast for both arguments.
  3649. // First, check the arguments have the old type.
  3650. Value *Arg0 = CI->getArgOperand(0);
  3651. if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
  3652. return;
  3653. // Old intrinsic, add bitcasts
  3654. Value *Arg1 = CI->getArgOperand(1);
  3655. auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
  3656. Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
  3657. Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
  3658. NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
  3659. break;
  3660. }
  3661. case Intrinsic::x86_rdtscp: {
  3662. // This used to take 1 arguments. If we have no arguments, it is already
  3663. // upgraded.
  3664. if (CI->getNumOperands() == 0)
  3665. return;
  3666. NewCall = Builder.CreateCall(NewFn);
  3667. // Extract the second result and store it.
  3668. Value *Data = Builder.CreateExtractValue(NewCall, 1);
  3669. // Cast the pointer to the right type.
  3670. Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
  3671. llvm::PointerType::getUnqual(Data->getType()));
  3672. Builder.CreateAlignedStore(Data, Ptr, Align(1));
  3673. // Replace the original call result with the first result of the new call.
  3674. Value *TSC = Builder.CreateExtractValue(NewCall, 0);
  3675. NewCall->takeName(CI);
  3676. CI->replaceAllUsesWith(TSC);
  3677. CI->eraseFromParent();
  3678. return;
  3679. }
  3680. case Intrinsic::x86_sse41_insertps:
  3681. case Intrinsic::x86_sse41_dppd:
  3682. case Intrinsic::x86_sse41_dpps:
  3683. case Intrinsic::x86_sse41_mpsadbw:
  3684. case Intrinsic::x86_avx_dp_ps_256:
  3685. case Intrinsic::x86_avx2_mpsadbw: {
  3686. // Need to truncate the last argument from i32 to i8 -- this argument models
  3687. // an inherently 8-bit immediate operand to these x86 instructions.
  3688. SmallVector<Value *, 4> Args(CI->args());
  3689. // Replace the last argument with a trunc.
  3690. Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
  3691. NewCall = Builder.CreateCall(NewFn, Args);
  3692. break;
  3693. }
  3694. case Intrinsic::x86_avx512_mask_cmp_pd_128:
  3695. case Intrinsic::x86_avx512_mask_cmp_pd_256:
  3696. case Intrinsic::x86_avx512_mask_cmp_pd_512:
  3697. case Intrinsic::x86_avx512_mask_cmp_ps_128:
  3698. case Intrinsic::x86_avx512_mask_cmp_ps_256:
  3699. case Intrinsic::x86_avx512_mask_cmp_ps_512: {
  3700. SmallVector<Value *, 4> Args(CI->args());
  3701. unsigned NumElts =
  3702. cast<FixedVectorType>(Args[0]->getType())->getNumElements();
  3703. Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
  3704. NewCall = Builder.CreateCall(NewFn, Args);
  3705. Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
  3706. NewCall->takeName(CI);
  3707. CI->replaceAllUsesWith(Res);
  3708. CI->eraseFromParent();
  3709. return;
  3710. }
  3711. case Intrinsic::thread_pointer: {
  3712. NewCall = Builder.CreateCall(NewFn, {});
  3713. break;
  3714. }
  3715. case Intrinsic::invariant_start:
  3716. case Intrinsic::invariant_end: {
  3717. SmallVector<Value *, 4> Args(CI->args());
  3718. NewCall = Builder.CreateCall(NewFn, Args);
  3719. break;
  3720. }
  3721. case Intrinsic::masked_load:
  3722. case Intrinsic::masked_store:
  3723. case Intrinsic::masked_gather:
  3724. case Intrinsic::masked_scatter: {
  3725. SmallVector<Value *, 4> Args(CI->args());
  3726. NewCall = Builder.CreateCall(NewFn, Args);
  3727. NewCall->copyMetadata(*CI);
  3728. break;
  3729. }
  3730. case Intrinsic::memcpy:
  3731. case Intrinsic::memmove:
  3732. case Intrinsic::memset: {
  3733. // We have to make sure that the call signature is what we're expecting.
  3734. // We only want to change the old signatures by removing the alignment arg:
  3735. // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
  3736. // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
  3737. // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
  3738. // -> @llvm.memset...(i8*, i8, i[32|64], i1)
  3739. // Note: i8*'s in the above can be any pointer type
  3740. if (CI->arg_size() != 5) {
  3741. DefaultCase();
  3742. return;
  3743. }
  3744. // Remove alignment argument (3), and add alignment attributes to the
  3745. // dest/src pointers.
  3746. Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
  3747. CI->getArgOperand(2), CI->getArgOperand(4)};
  3748. NewCall = Builder.CreateCall(NewFn, Args);
  3749. auto *MemCI = cast<MemIntrinsic>(NewCall);
  3750. // All mem intrinsics support dest alignment.
  3751. const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
  3752. MemCI->setDestAlignment(Align->getMaybeAlignValue());
  3753. // Memcpy/Memmove also support source alignment.
  3754. if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
  3755. MTI->setSourceAlignment(Align->getMaybeAlignValue());
  3756. break;
  3757. }
  3758. }
  3759. assert(NewCall && "Should have either set this variable or returned through "
  3760. "the default case");
  3761. NewCall->takeName(CI);
  3762. CI->replaceAllUsesWith(NewCall);
  3763. CI->eraseFromParent();
  3764. }
  3765. void llvm::UpgradeCallsToIntrinsic(Function *F) {
  3766. assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
  3767. // Check if this function should be upgraded and get the replacement function
  3768. // if there is one.
  3769. Function *NewFn;
  3770. if (UpgradeIntrinsicFunction(F, NewFn)) {
  3771. // Replace all users of the old function with the new function or new
  3772. // instructions. This is not a range loop because the call is deleted.
  3773. for (User *U : make_early_inc_range(F->users()))
  3774. if (CallInst *CI = dyn_cast<CallInst>(U))
  3775. UpgradeIntrinsicCall(CI, NewFn);
  3776. // Remove old function, no longer used, from the module.
  3777. F->eraseFromParent();
  3778. }
  3779. }
  3780. MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
  3781. // Check if the tag uses struct-path aware TBAA format.
  3782. if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
  3783. return &MD;
  3784. auto &Context = MD.getContext();
  3785. if (MD.getNumOperands() == 3) {
  3786. Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
  3787. MDNode *ScalarType = MDNode::get(Context, Elts);
  3788. // Create a MDNode <ScalarType, ScalarType, offset 0, const>
  3789. Metadata *Elts2[] = {ScalarType, ScalarType,
  3790. ConstantAsMetadata::get(
  3791. Constant::getNullValue(Type::getInt64Ty(Context))),
  3792. MD.getOperand(2)};
  3793. return MDNode::get(Context, Elts2);
  3794. }
  3795. // Create a MDNode <MD, MD, offset 0>
  3796. Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
  3797. Type::getInt64Ty(Context)))};
  3798. return MDNode::get(Context, Elts);
  3799. }
  3800. Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
  3801. Instruction *&Temp) {
  3802. if (Opc != Instruction::BitCast)
  3803. return nullptr;
  3804. Temp = nullptr;
  3805. Type *SrcTy = V->getType();
  3806. if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
  3807. SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
  3808. LLVMContext &Context = V->getContext();
  3809. // We have no information about target data layout, so we assume that
  3810. // the maximum pointer size is 64bit.
  3811. Type *MidTy = Type::getInt64Ty(Context);
  3812. Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
  3813. return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
  3814. }
  3815. return nullptr;
  3816. }
  3817. Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
  3818. if (Opc != Instruction::BitCast)
  3819. return nullptr;
  3820. Type *SrcTy = C->getType();
  3821. if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
  3822. SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
  3823. LLVMContext &Context = C->getContext();
  3824. // We have no information about target data layout, so we assume that
  3825. // the maximum pointer size is 64bit.
  3826. Type *MidTy = Type::getInt64Ty(Context);
  3827. return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
  3828. DestTy);
  3829. }
  3830. return nullptr;
  3831. }
  3832. /// Check the debug info version number, if it is out-dated, drop the debug
  3833. /// info. Return true if module is modified.
  3834. bool llvm::UpgradeDebugInfo(Module &M) {
  3835. unsigned Version = getDebugMetadataVersionFromModule(M);
  3836. if (Version == DEBUG_METADATA_VERSION) {
  3837. bool BrokenDebugInfo = false;
  3838. if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
  3839. report_fatal_error("Broken module found, compilation aborted!");
  3840. if (!BrokenDebugInfo)
  3841. // Everything is ok.
  3842. return false;
  3843. else {
  3844. // Diagnose malformed debug info.
  3845. DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
  3846. M.getContext().diagnose(Diag);
  3847. }
  3848. }
  3849. bool Modified = StripDebugInfo(M);
  3850. if (Modified && Version != DEBUG_METADATA_VERSION) {
  3851. // Diagnose a version mismatch.
  3852. DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
  3853. M.getContext().diagnose(DiagVersion);
  3854. }
  3855. return Modified;
  3856. }
  3857. /// This checks for objc retain release marker which should be upgraded. It
  3858. /// returns true if module is modified.
  3859. static bool UpgradeRetainReleaseMarker(Module &M) {
  3860. bool Changed = false;
  3861. const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
  3862. NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
  3863. if (ModRetainReleaseMarker) {
  3864. MDNode *Op = ModRetainReleaseMarker->getOperand(0);
  3865. if (Op) {
  3866. MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
  3867. if (ID) {
  3868. SmallVector<StringRef, 4> ValueComp;
  3869. ID->getString().split(ValueComp, "#");
  3870. if (ValueComp.size() == 2) {
  3871. std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
  3872. ID = MDString::get(M.getContext(), NewValue);
  3873. }
  3874. M.addModuleFlag(Module::Error, MarkerKey, ID);
  3875. M.eraseNamedMetadata(ModRetainReleaseMarker);
  3876. Changed = true;
  3877. }
  3878. }
  3879. }
  3880. return Changed;
  3881. }
  3882. void llvm::UpgradeARCRuntime(Module &M) {
  3883. // This lambda converts normal function calls to ARC runtime functions to
  3884. // intrinsic calls.
  3885. auto UpgradeToIntrinsic = [&](const char *OldFunc,
  3886. llvm::Intrinsic::ID IntrinsicFunc) {
  3887. Function *Fn = M.getFunction(OldFunc);
  3888. if (!Fn)
  3889. return;
  3890. Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
  3891. for (User *U : make_early_inc_range(Fn->users())) {
  3892. CallInst *CI = dyn_cast<CallInst>(U);
  3893. if (!CI || CI->getCalledFunction() != Fn)
  3894. continue;
  3895. IRBuilder<> Builder(CI->getParent(), CI->getIterator());
  3896. FunctionType *NewFuncTy = NewFn->getFunctionType();
  3897. SmallVector<Value *, 2> Args;
  3898. // Don't upgrade the intrinsic if it's not valid to bitcast the return
  3899. // value to the return type of the old function.
  3900. if (NewFuncTy->getReturnType() != CI->getType() &&
  3901. !CastInst::castIsValid(Instruction::BitCast, CI,
  3902. NewFuncTy->getReturnType()))
  3903. continue;
  3904. bool InvalidCast = false;
  3905. for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
  3906. Value *Arg = CI->getArgOperand(I);
  3907. // Bitcast argument to the parameter type of the new function if it's
  3908. // not a variadic argument.
  3909. if (I < NewFuncTy->getNumParams()) {
  3910. // Don't upgrade the intrinsic if it's not valid to bitcast the argument
  3911. // to the parameter type of the new function.
  3912. if (!CastInst::castIsValid(Instruction::BitCast, Arg,
  3913. NewFuncTy->getParamType(I))) {
  3914. InvalidCast = true;
  3915. break;
  3916. }
  3917. Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
  3918. }
  3919. Args.push_back(Arg);
  3920. }
  3921. if (InvalidCast)
  3922. continue;
  3923. // Create a call instruction that calls the new function.
  3924. CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
  3925. NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
  3926. NewCall->takeName(CI);
  3927. // Bitcast the return value back to the type of the old call.
  3928. Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
  3929. if (!CI->use_empty())
  3930. CI->replaceAllUsesWith(NewRetVal);
  3931. CI->eraseFromParent();
  3932. }
  3933. if (Fn->use_empty())
  3934. Fn->eraseFromParent();
  3935. };
  3936. // Unconditionally convert a call to "clang.arc.use" to a call to
  3937. // "llvm.objc.clang.arc.use".
  3938. UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
  3939. // Upgrade the retain release marker. If there is no need to upgrade
  3940. // the marker, that means either the module is already new enough to contain
  3941. // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
  3942. if (!UpgradeRetainReleaseMarker(M))
  3943. return;
  3944. std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
  3945. {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
  3946. {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
  3947. {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
  3948. {"objc_autoreleaseReturnValue",
  3949. llvm::Intrinsic::objc_autoreleaseReturnValue},
  3950. {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
  3951. {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
  3952. {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
  3953. {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
  3954. {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
  3955. {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
  3956. {"objc_release", llvm::Intrinsic::objc_release},
  3957. {"objc_retain", llvm::Intrinsic::objc_retain},
  3958. {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
  3959. {"objc_retainAutoreleaseReturnValue",
  3960. llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
  3961. {"objc_retainAutoreleasedReturnValue",
  3962. llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
  3963. {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
  3964. {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
  3965. {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
  3966. {"objc_unsafeClaimAutoreleasedReturnValue",
  3967. llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
  3968. {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
  3969. {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
  3970. {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
  3971. {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
  3972. {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
  3973. {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
  3974. {"objc_arc_annotation_topdown_bbstart",
  3975. llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
  3976. {"objc_arc_annotation_topdown_bbend",
  3977. llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
  3978. {"objc_arc_annotation_bottomup_bbstart",
  3979. llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
  3980. {"objc_arc_annotation_bottomup_bbend",
  3981. llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
  3982. for (auto &I : RuntimeFuncs)
  3983. UpgradeToIntrinsic(I.first, I.second);
  3984. }
  3985. bool llvm::UpgradeModuleFlags(Module &M) {
  3986. NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
  3987. if (!ModFlags)
  3988. return false;
  3989. bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
  3990. bool HasSwiftVersionFlag = false;
  3991. uint8_t SwiftMajorVersion, SwiftMinorVersion;
  3992. uint32_t SwiftABIVersion;
  3993. auto Int8Ty = Type::getInt8Ty(M.getContext());
  3994. auto Int32Ty = Type::getInt32Ty(M.getContext());
  3995. for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
  3996. MDNode *Op = ModFlags->getOperand(I);
  3997. if (Op->getNumOperands() != 3)
  3998. continue;
  3999. MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
  4000. if (!ID)
  4001. continue;
  4002. if (ID->getString() == "Objective-C Image Info Version")
  4003. HasObjCFlag = true;
  4004. if (ID->getString() == "Objective-C Class Properties")
  4005. HasClassProperties = true;
  4006. // Upgrade PIC/PIE Module Flags. The module flag behavior for these two
  4007. // field was Error and now they are Max.
  4008. if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") {
  4009. if (auto *Behavior =
  4010. mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
  4011. if (Behavior->getLimitedValue() == Module::Error) {
  4012. Type *Int32Ty = Type::getInt32Ty(M.getContext());
  4013. Metadata *Ops[3] = {
  4014. ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
  4015. MDString::get(M.getContext(), ID->getString()),
  4016. Op->getOperand(2)};
  4017. ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
  4018. Changed = true;
  4019. }
  4020. }
  4021. }
  4022. // Upgrade Objective-C Image Info Section. Removed the whitespce in the
  4023. // section name so that llvm-lto will not complain about mismatching
  4024. // module flags that is functionally the same.
  4025. if (ID->getString() == "Objective-C Image Info Section") {
  4026. if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
  4027. SmallVector<StringRef, 4> ValueComp;
  4028. Value->getString().split(ValueComp, " ");
  4029. if (ValueComp.size() != 1) {
  4030. std::string NewValue;
  4031. for (auto &S : ValueComp)
  4032. NewValue += S.str();
  4033. Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
  4034. MDString::get(M.getContext(), NewValue)};
  4035. ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
  4036. Changed = true;
  4037. }
  4038. }
  4039. }
  4040. // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
  4041. // If the higher bits are set, it adds new module flag for swift info.
  4042. if (ID->getString() == "Objective-C Garbage Collection") {
  4043. auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
  4044. if (Md) {
  4045. assert(Md->getValue() && "Expected non-empty metadata");
  4046. auto Type = Md->getValue()->getType();
  4047. if (Type == Int8Ty)
  4048. continue;
  4049. unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
  4050. if ((Val & 0xff) != Val) {
  4051. HasSwiftVersionFlag = true;
  4052. SwiftABIVersion = (Val & 0xff00) >> 8;
  4053. SwiftMajorVersion = (Val & 0xff000000) >> 24;
  4054. SwiftMinorVersion = (Val & 0xff0000) >> 16;
  4055. }
  4056. Metadata *Ops[3] = {
  4057. ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
  4058. Op->getOperand(1),
  4059. ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
  4060. ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
  4061. Changed = true;
  4062. }
  4063. }
  4064. }
  4065. // "Objective-C Class Properties" is recently added for Objective-C. We
  4066. // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
  4067. // flag of value 0, so we can correclty downgrade this flag when trying to
  4068. // link an ObjC bitcode without this module flag with an ObjC bitcode with
  4069. // this module flag.
  4070. if (HasObjCFlag && !HasClassProperties) {
  4071. M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
  4072. (uint32_t)0);
  4073. Changed = true;
  4074. }
  4075. if (HasSwiftVersionFlag) {
  4076. M.addModuleFlag(Module::Error, "Swift ABI Version",
  4077. SwiftABIVersion);
  4078. M.addModuleFlag(Module::Error, "Swift Major Version",
  4079. ConstantInt::get(Int8Ty, SwiftMajorVersion));
  4080. M.addModuleFlag(Module::Error, "Swift Minor Version",
  4081. ConstantInt::get(Int8Ty, SwiftMinorVersion));
  4082. Changed = true;
  4083. }
  4084. return Changed;
  4085. }
  4086. void llvm::UpgradeSectionAttributes(Module &M) {
  4087. auto TrimSpaces = [](StringRef Section) -> std::string {
  4088. SmallVector<StringRef, 5> Components;
  4089. Section.split(Components, ',');
  4090. SmallString<32> Buffer;
  4091. raw_svector_ostream OS(Buffer);
  4092. for (auto Component : Components)
  4093. OS << ',' << Component.trim();
  4094. return std::string(OS.str().substr(1));
  4095. };
  4096. for (auto &GV : M.globals()) {
  4097. if (!GV.hasSection())
  4098. continue;
  4099. StringRef Section = GV.getSection();
  4100. if (!Section.startswith("__DATA, __objc_catlist"))
  4101. continue;
  4102. // __DATA, __objc_catlist, regular, no_dead_strip
  4103. // __DATA,__objc_catlist,regular,no_dead_strip
  4104. GV.setSection(TrimSpaces(Section));
  4105. }
  4106. }
  4107. namespace {
  4108. // Prior to LLVM 10.0, the strictfp attribute could be used on individual
  4109. // callsites within a function that did not also have the strictfp attribute.
  4110. // Since 10.0, if strict FP semantics are needed within a function, the
  4111. // function must have the strictfp attribute and all calls within the function
  4112. // must also have the strictfp attribute. This latter restriction is
  4113. // necessary to prevent unwanted libcall simplification when a function is
  4114. // being cloned (such as for inlining).
  4115. //
  4116. // The "dangling" strictfp attribute usage was only used to prevent constant
  4117. // folding and other libcall simplification. The nobuiltin attribute on the
  4118. // callsite has the same effect.
  4119. struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
  4120. StrictFPUpgradeVisitor() {}
  4121. void visitCallBase(CallBase &Call) {
  4122. if (!Call.isStrictFP())
  4123. return;
  4124. if (isa<ConstrainedFPIntrinsic>(&Call))
  4125. return;
  4126. // If we get here, the caller doesn't have the strictfp attribute
  4127. // but this callsite does. Replace the strictfp attribute with nobuiltin.
  4128. Call.removeFnAttr(Attribute::StrictFP);
  4129. Call.addFnAttr(Attribute::NoBuiltin);
  4130. }
  4131. };
  4132. } // namespace
  4133. void llvm::UpgradeFunctionAttributes(Function &F) {
  4134. // If a function definition doesn't have the strictfp attribute,
  4135. // convert any callsite strictfp attributes to nobuiltin.
  4136. if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
  4137. StrictFPUpgradeVisitor SFPV;
  4138. SFPV.visit(F);
  4139. }
  4140. if (F.getCallingConv() == CallingConv::X86_INTR &&
  4141. !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
  4142. Type *ByValTy = F.getArg(0)->getType()->getPointerElementType();
  4143. Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
  4144. F.addParamAttr(0, NewAttr);
  4145. }
  4146. // Remove all incompatibile attributes from function.
  4147. F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
  4148. for (auto &Arg : F.args())
  4149. Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
  4150. }
  4151. static bool isOldLoopArgument(Metadata *MD) {
  4152. auto *T = dyn_cast_or_null<MDTuple>(MD);
  4153. if (!T)
  4154. return false;
  4155. if (T->getNumOperands() < 1)
  4156. return false;
  4157. auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
  4158. if (!S)
  4159. return false;
  4160. return S->getString().startswith("llvm.vectorizer.");
  4161. }
  4162. static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
  4163. StringRef OldPrefix = "llvm.vectorizer.";
  4164. assert(OldTag.startswith(OldPrefix) && "Expected old prefix");
  4165. if (OldTag == "llvm.vectorizer.unroll")
  4166. return MDString::get(C, "llvm.loop.interleave.count");
  4167. return MDString::get(
  4168. C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
  4169. .str());
  4170. }
  4171. static Metadata *upgradeLoopArgument(Metadata *MD) {
  4172. auto *T = dyn_cast_or_null<MDTuple>(MD);
  4173. if (!T)
  4174. return MD;
  4175. if (T->getNumOperands() < 1)
  4176. return MD;
  4177. auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
  4178. if (!OldTag)
  4179. return MD;
  4180. if (!OldTag->getString().startswith("llvm.vectorizer."))
  4181. return MD;
  4182. // This has an old tag. Upgrade it.
  4183. SmallVector<Metadata *, 8> Ops;
  4184. Ops.reserve(T->getNumOperands());
  4185. Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
  4186. for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
  4187. Ops.push_back(T->getOperand(I));
  4188. return MDTuple::get(T->getContext(), Ops);
  4189. }
  4190. MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
  4191. auto *T = dyn_cast<MDTuple>(&N);
  4192. if (!T)
  4193. return &N;
  4194. if (none_of(T->operands(), isOldLoopArgument))
  4195. return &N;
  4196. SmallVector<Metadata *, 8> Ops;
  4197. Ops.reserve(T->getNumOperands());
  4198. for (Metadata *MD : T->operands())
  4199. Ops.push_back(upgradeLoopArgument(MD));
  4200. return MDTuple::get(T->getContext(), Ops);
  4201. }
  4202. std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
  4203. Triple T(TT);
  4204. // For AMDGPU we uprgrade older DataLayouts to include the default globals
  4205. // address space of 1.
  4206. if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
  4207. return DL.empty() ? std::string("G1") : (DL + "-G1").str();
  4208. }
  4209. std::string Res = DL.str();
  4210. if (!T.isX86())
  4211. return Res;
  4212. // If the datalayout matches the expected format, add pointer size address
  4213. // spaces to the datalayout.
  4214. std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
  4215. if (!DL.contains(AddrSpaces)) {
  4216. SmallVector<StringRef, 4> Groups;
  4217. Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
  4218. if (R.match(DL, &Groups))
  4219. Res = (Groups[1] + AddrSpaces + Groups[3]).str();
  4220. }
  4221. // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
  4222. // Raising the alignment is safe because Clang did not produce f80 values in
  4223. // the MSVC environment before this upgrade was added.
  4224. if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
  4225. StringRef Ref = Res;
  4226. auto I = Ref.find("-f80:32-");
  4227. if (I != StringRef::npos)
  4228. Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
  4229. }
  4230. return Res;
  4231. }
  4232. void llvm::UpgradeAttributes(AttrBuilder &B) {
  4233. StringRef FramePointer;
  4234. Attribute A = B.getAttribute("no-frame-pointer-elim");
  4235. if (A.isValid()) {
  4236. // The value can be "true" or "false".
  4237. FramePointer = A.getValueAsString() == "true" ? "all" : "none";
  4238. B.removeAttribute("no-frame-pointer-elim");
  4239. }
  4240. if (B.contains("no-frame-pointer-elim-non-leaf")) {
  4241. // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
  4242. if (FramePointer != "all")
  4243. FramePointer = "non-leaf";
  4244. B.removeAttribute("no-frame-pointer-elim-non-leaf");
  4245. }
  4246. if (!FramePointer.empty())
  4247. B.addAttribute("frame-pointer", FramePointer);
  4248. A = B.getAttribute("null-pointer-is-valid");
  4249. if (A.isValid()) {
  4250. // The value can be "true" or "false".
  4251. bool NullPointerIsValid = A.getValueAsString() == "true";
  4252. B.removeAttribute("null-pointer-is-valid");
  4253. if (NullPointerIsValid)
  4254. B.addAttribute(Attribute::NullPointerIsValid);
  4255. }
  4256. }