IRTranslator.cpp 134 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606
  1. //===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file implements the IRTranslator class.
  10. //===----------------------------------------------------------------------===//
  11. #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
  12. #include "llvm/ADT/PostOrderIterator.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/ADT/ScopeExit.h"
  15. #include "llvm/ADT/SmallSet.h"
  16. #include "llvm/ADT/SmallVector.h"
  17. #include "llvm/Analysis/AliasAnalysis.h"
  18. #include "llvm/Analysis/AssumptionCache.h"
  19. #include "llvm/Analysis/BranchProbabilityInfo.h"
  20. #include "llvm/Analysis/Loads.h"
  21. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  22. #include "llvm/Analysis/ValueTracking.h"
  23. #include "llvm/CodeGen/Analysis.h"
  24. #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
  25. #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
  26. #include "llvm/CodeGen/GlobalISel/CallLowering.h"
  27. #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
  28. #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
  29. #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
  30. #include "llvm/CodeGen/LowLevelType.h"
  31. #include "llvm/CodeGen/MachineBasicBlock.h"
  32. #include "llvm/CodeGen/MachineFrameInfo.h"
  33. #include "llvm/CodeGen/MachineFunction.h"
  34. #include "llvm/CodeGen/MachineInstrBuilder.h"
  35. #include "llvm/CodeGen/MachineMemOperand.h"
  36. #include "llvm/CodeGen/MachineModuleInfo.h"
  37. #include "llvm/CodeGen/MachineOperand.h"
  38. #include "llvm/CodeGen/MachineRegisterInfo.h"
  39. #include "llvm/CodeGen/RuntimeLibcalls.h"
  40. #include "llvm/CodeGen/StackProtector.h"
  41. #include "llvm/CodeGen/SwitchLoweringUtils.h"
  42. #include "llvm/CodeGen/TargetFrameLowering.h"
  43. #include "llvm/CodeGen/TargetInstrInfo.h"
  44. #include "llvm/CodeGen/TargetLowering.h"
  45. #include "llvm/CodeGen/TargetPassConfig.h"
  46. #include "llvm/CodeGen/TargetRegisterInfo.h"
  47. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  48. #include "llvm/IR/BasicBlock.h"
  49. #include "llvm/IR/CFG.h"
  50. #include "llvm/IR/Constant.h"
  51. #include "llvm/IR/Constants.h"
  52. #include "llvm/IR/DataLayout.h"
  53. #include "llvm/IR/DerivedTypes.h"
  54. #include "llvm/IR/DiagnosticInfo.h"
  55. #include "llvm/IR/Function.h"
  56. #include "llvm/IR/GetElementPtrTypeIterator.h"
  57. #include "llvm/IR/InlineAsm.h"
  58. #include "llvm/IR/InstrTypes.h"
  59. #include "llvm/IR/Instructions.h"
  60. #include "llvm/IR/IntrinsicInst.h"
  61. #include "llvm/IR/Intrinsics.h"
  62. #include "llvm/IR/LLVMContext.h"
  63. #include "llvm/IR/Metadata.h"
  64. #include "llvm/IR/PatternMatch.h"
  65. #include "llvm/IR/Statepoint.h"
  66. #include "llvm/IR/Type.h"
  67. #include "llvm/IR/User.h"
  68. #include "llvm/IR/Value.h"
  69. #include "llvm/InitializePasses.h"
  70. #include "llvm/MC/MCContext.h"
  71. #include "llvm/Pass.h"
  72. #include "llvm/Support/Casting.h"
  73. #include "llvm/Support/CodeGen.h"
  74. #include "llvm/Support/Debug.h"
  75. #include "llvm/Support/ErrorHandling.h"
  76. #include "llvm/Support/LowLevelTypeImpl.h"
  77. #include "llvm/Support/MathExtras.h"
  78. #include "llvm/Support/raw_ostream.h"
  79. #include "llvm/Target/TargetIntrinsicInfo.h"
  80. #include "llvm/Target/TargetMachine.h"
  81. #include "llvm/Transforms/Utils/MemoryOpRemark.h"
  82. #include <algorithm>
  83. #include <cassert>
  84. #include <cstdint>
  85. #include <iterator>
  86. #include <optional>
  87. #include <string>
  88. #include <utility>
  89. #include <vector>
  90. #define DEBUG_TYPE "irtranslator"
  91. using namespace llvm;
  92. static cl::opt<bool>
  93. EnableCSEInIRTranslator("enable-cse-in-irtranslator",
  94. cl::desc("Should enable CSE in irtranslator"),
  95. cl::Optional, cl::init(false));
  96. char IRTranslator::ID = 0;
  97. INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
  98. false, false)
  99. INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
  100. INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
  101. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  102. INITIALIZE_PASS_DEPENDENCY(StackProtector)
  103. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  104. INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
  105. false, false)
  106. static void reportTranslationError(MachineFunction &MF,
  107. const TargetPassConfig &TPC,
  108. OptimizationRemarkEmitter &ORE,
  109. OptimizationRemarkMissed &R) {
  110. MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
  111. // Print the function name explicitly if we don't have a debug location (which
  112. // makes the diagnostic less useful) or if we're going to emit a raw error.
  113. if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
  114. R << (" (in function: " + MF.getName() + ")").str();
  115. if (TPC.isGlobalISelAbortEnabled())
  116. report_fatal_error(Twine(R.getMsg()));
  117. else
  118. ORE.emit(R);
  119. }
  120. IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
  121. : MachineFunctionPass(ID), OptLevel(optlevel) {}
  122. #ifndef NDEBUG
  123. namespace {
  124. /// Verify that every instruction created has the same DILocation as the
  125. /// instruction being translated.
  126. class DILocationVerifier : public GISelChangeObserver {
  127. const Instruction *CurrInst = nullptr;
  128. public:
  129. DILocationVerifier() = default;
  130. ~DILocationVerifier() = default;
  131. const Instruction *getCurrentInst() const { return CurrInst; }
  132. void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
  133. void erasingInstr(MachineInstr &MI) override {}
  134. void changingInstr(MachineInstr &MI) override {}
  135. void changedInstr(MachineInstr &MI) override {}
  136. void createdInstr(MachineInstr &MI) override {
  137. assert(getCurrentInst() && "Inserted instruction without a current MI");
  138. // Only print the check message if we're actually checking it.
  139. #ifndef NDEBUG
  140. LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
  141. << " was copied to " << MI);
  142. #endif
  143. // We allow insts in the entry block to have no debug loc because
  144. // they could have originated from constants, and we don't want a jumpy
  145. // debug experience.
  146. assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
  147. (MI.getParent()->isEntryBlock() && !MI.getDebugLoc())) &&
  148. "Line info was not transferred to all instructions");
  149. }
  150. };
  151. } // namespace
  152. #endif // ifndef NDEBUG
  153. void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
  154. AU.addRequired<StackProtector>();
  155. AU.addRequired<TargetPassConfig>();
  156. AU.addRequired<GISelCSEAnalysisWrapperPass>();
  157. AU.addRequired<AssumptionCacheTracker>();
  158. if (OptLevel != CodeGenOpt::None) {
  159. AU.addRequired<BranchProbabilityInfoWrapperPass>();
  160. AU.addRequired<AAResultsWrapperPass>();
  161. }
  162. AU.addRequired<TargetLibraryInfoWrapperPass>();
  163. AU.addPreserved<TargetLibraryInfoWrapperPass>();
  164. getSelectionDAGFallbackAnalysisUsage(AU);
  165. MachineFunctionPass::getAnalysisUsage(AU);
  166. }
  167. IRTranslator::ValueToVRegInfo::VRegListT &
  168. IRTranslator::allocateVRegs(const Value &Val) {
  169. auto VRegsIt = VMap.findVRegs(Val);
  170. if (VRegsIt != VMap.vregs_end())
  171. return *VRegsIt->second;
  172. auto *Regs = VMap.getVRegs(Val);
  173. auto *Offsets = VMap.getOffsets(Val);
  174. SmallVector<LLT, 4> SplitTys;
  175. computeValueLLTs(*DL, *Val.getType(), SplitTys,
  176. Offsets->empty() ? Offsets : nullptr);
  177. for (unsigned i = 0; i < SplitTys.size(); ++i)
  178. Regs->push_back(0);
  179. return *Regs;
  180. }
  181. ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
  182. auto VRegsIt = VMap.findVRegs(Val);
  183. if (VRegsIt != VMap.vregs_end())
  184. return *VRegsIt->second;
  185. if (Val.getType()->isVoidTy())
  186. return *VMap.getVRegs(Val);
  187. // Create entry for this type.
  188. auto *VRegs = VMap.getVRegs(Val);
  189. auto *Offsets = VMap.getOffsets(Val);
  190. assert(Val.getType()->isSized() &&
  191. "Don't know how to create an empty vreg");
  192. SmallVector<LLT, 4> SplitTys;
  193. computeValueLLTs(*DL, *Val.getType(), SplitTys,
  194. Offsets->empty() ? Offsets : nullptr);
  195. if (!isa<Constant>(Val)) {
  196. for (auto Ty : SplitTys)
  197. VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
  198. return *VRegs;
  199. }
  200. if (Val.getType()->isAggregateType()) {
  201. // UndefValue, ConstantAggregateZero
  202. auto &C = cast<Constant>(Val);
  203. unsigned Idx = 0;
  204. while (auto Elt = C.getAggregateElement(Idx++)) {
  205. auto EltRegs = getOrCreateVRegs(*Elt);
  206. llvm::copy(EltRegs, std::back_inserter(*VRegs));
  207. }
  208. } else {
  209. assert(SplitTys.size() == 1 && "unexpectedly split LLT");
  210. VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
  211. bool Success = translate(cast<Constant>(Val), VRegs->front());
  212. if (!Success) {
  213. OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
  214. MF->getFunction().getSubprogram(),
  215. &MF->getFunction().getEntryBlock());
  216. R << "unable to translate constant: " << ore::NV("Type", Val.getType());
  217. reportTranslationError(*MF, *TPC, *ORE, R);
  218. return *VRegs;
  219. }
  220. }
  221. return *VRegs;
  222. }
  223. int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
  224. auto MapEntry = FrameIndices.find(&AI);
  225. if (MapEntry != FrameIndices.end())
  226. return MapEntry->second;
  227. uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
  228. uint64_t Size =
  229. ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
  230. // Always allocate at least one byte.
  231. Size = std::max<uint64_t>(Size, 1u);
  232. int &FI = FrameIndices[&AI];
  233. FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI);
  234. return FI;
  235. }
  236. Align IRTranslator::getMemOpAlign(const Instruction &I) {
  237. if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
  238. return SI->getAlign();
  239. if (const LoadInst *LI = dyn_cast<LoadInst>(&I))
  240. return LI->getAlign();
  241. if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I))
  242. return AI->getAlign();
  243. if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I))
  244. return AI->getAlign();
  245. OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
  246. R << "unable to translate memop: " << ore::NV("Opcode", &I);
  247. reportTranslationError(*MF, *TPC, *ORE, R);
  248. return Align(1);
  249. }
  250. MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
  251. MachineBasicBlock *&MBB = BBToMBB[&BB];
  252. assert(MBB && "BasicBlock was not encountered before");
  253. return *MBB;
  254. }
  255. void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
  256. assert(NewPred && "new predecessor must be a real MachineBasicBlock");
  257. MachinePreds[Edge].push_back(NewPred);
  258. }
  259. bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
  260. MachineIRBuilder &MIRBuilder) {
  261. // Get or create a virtual register for each value.
  262. // Unless the value is a Constant => loadimm cst?
  263. // or inline constant each time?
  264. // Creation of a virtual register needs to have a size.
  265. Register Op0 = getOrCreateVReg(*U.getOperand(0));
  266. Register Op1 = getOrCreateVReg(*U.getOperand(1));
  267. Register Res = getOrCreateVReg(U);
  268. uint16_t Flags = 0;
  269. if (isa<Instruction>(U)) {
  270. const Instruction &I = cast<Instruction>(U);
  271. Flags = MachineInstr::copyFlagsFromInstruction(I);
  272. }
  273. MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
  274. return true;
  275. }
  276. bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
  277. MachineIRBuilder &MIRBuilder) {
  278. Register Op0 = getOrCreateVReg(*U.getOperand(0));
  279. Register Res = getOrCreateVReg(U);
  280. uint16_t Flags = 0;
  281. if (isa<Instruction>(U)) {
  282. const Instruction &I = cast<Instruction>(U);
  283. Flags = MachineInstr::copyFlagsFromInstruction(I);
  284. }
  285. MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
  286. return true;
  287. }
  288. bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
  289. return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
  290. }
  291. bool IRTranslator::translateCompare(const User &U,
  292. MachineIRBuilder &MIRBuilder) {
  293. auto *CI = dyn_cast<CmpInst>(&U);
  294. Register Op0 = getOrCreateVReg(*U.getOperand(0));
  295. Register Op1 = getOrCreateVReg(*U.getOperand(1));
  296. Register Res = getOrCreateVReg(U);
  297. CmpInst::Predicate Pred =
  298. CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
  299. cast<ConstantExpr>(U).getPredicate());
  300. if (CmpInst::isIntPredicate(Pred))
  301. MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
  302. else if (Pred == CmpInst::FCMP_FALSE)
  303. MIRBuilder.buildCopy(
  304. Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
  305. else if (Pred == CmpInst::FCMP_TRUE)
  306. MIRBuilder.buildCopy(
  307. Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
  308. else {
  309. uint16_t Flags = 0;
  310. if (CI)
  311. Flags = MachineInstr::copyFlagsFromInstruction(*CI);
  312. MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
  313. }
  314. return true;
  315. }
  316. bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
  317. const ReturnInst &RI = cast<ReturnInst>(U);
  318. const Value *Ret = RI.getReturnValue();
  319. if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
  320. Ret = nullptr;
  321. ArrayRef<Register> VRegs;
  322. if (Ret)
  323. VRegs = getOrCreateVRegs(*Ret);
  324. Register SwiftErrorVReg = 0;
  325. if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
  326. SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
  327. &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
  328. }
  329. // The target may mess up with the insertion point, but
  330. // this is not important as a return is the last instruction
  331. // of the block anyway.
  332. return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg);
  333. }
  334. void IRTranslator::emitBranchForMergedCondition(
  335. const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
  336. MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
  337. BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
  338. // If the leaf of the tree is a comparison, merge the condition into
  339. // the caseblock.
  340. if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
  341. CmpInst::Predicate Condition;
  342. if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
  343. Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
  344. } else {
  345. const FCmpInst *FC = cast<FCmpInst>(Cond);
  346. Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
  347. }
  348. SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
  349. BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
  350. CurBuilder->getDebugLoc(), TProb, FProb);
  351. SL->SwitchCases.push_back(CB);
  352. return;
  353. }
  354. // Create a CaseBlock record representing this branch.
  355. CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
  356. SwitchCG::CaseBlock CB(
  357. Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
  358. nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
  359. SL->SwitchCases.push_back(CB);
  360. }
  361. static bool isValInBlock(const Value *V, const BasicBlock *BB) {
  362. if (const Instruction *I = dyn_cast<Instruction>(V))
  363. return I->getParent() == BB;
  364. return true;
  365. }
  366. void IRTranslator::findMergedConditions(
  367. const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
  368. MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
  369. Instruction::BinaryOps Opc, BranchProbability TProb,
  370. BranchProbability FProb, bool InvertCond) {
  371. using namespace PatternMatch;
  372. assert((Opc == Instruction::And || Opc == Instruction::Or) &&
  373. "Expected Opc to be AND/OR");
  374. // Skip over not part of the tree and remember to invert op and operands at
  375. // next level.
  376. Value *NotCond;
  377. if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
  378. isValInBlock(NotCond, CurBB->getBasicBlock())) {
  379. findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
  380. !InvertCond);
  381. return;
  382. }
  383. const Instruction *BOp = dyn_cast<Instruction>(Cond);
  384. const Value *BOpOp0, *BOpOp1;
  385. // Compute the effective opcode for Cond, taking into account whether it needs
  386. // to be inverted, e.g.
  387. // and (not (or A, B)), C
  388. // gets lowered as
  389. // and (and (not A, not B), C)
  390. Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
  391. if (BOp) {
  392. BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
  393. ? Instruction::And
  394. : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
  395. ? Instruction::Or
  396. : (Instruction::BinaryOps)0);
  397. if (InvertCond) {
  398. if (BOpc == Instruction::And)
  399. BOpc = Instruction::Or;
  400. else if (BOpc == Instruction::Or)
  401. BOpc = Instruction::And;
  402. }
  403. }
  404. // If this node is not part of the or/and tree, emit it as a branch.
  405. // Note that all nodes in the tree should have same opcode.
  406. bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
  407. if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() ||
  408. !isValInBlock(BOpOp0, CurBB->getBasicBlock()) ||
  409. !isValInBlock(BOpOp1, CurBB->getBasicBlock())) {
  410. emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
  411. InvertCond);
  412. return;
  413. }
  414. // Create TmpBB after CurBB.
  415. MachineFunction::iterator BBI(CurBB);
  416. MachineBasicBlock *TmpBB =
  417. MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
  418. CurBB->getParent()->insert(++BBI, TmpBB);
  419. if (Opc == Instruction::Or) {
  420. // Codegen X | Y as:
  421. // BB1:
  422. // jmp_if_X TBB
  423. // jmp TmpBB
  424. // TmpBB:
  425. // jmp_if_Y TBB
  426. // jmp FBB
  427. //
  428. // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
  429. // The requirement is that
  430. // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
  431. // = TrueProb for original BB.
  432. // Assuming the original probabilities are A and B, one choice is to set
  433. // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
  434. // A/(1+B) and 2B/(1+B). This choice assumes that
  435. // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
  436. // Another choice is to assume TrueProb for BB1 equals to TrueProb for
  437. // TmpBB, but the math is more complicated.
  438. auto NewTrueProb = TProb / 2;
  439. auto NewFalseProb = TProb / 2 + FProb;
  440. // Emit the LHS condition.
  441. findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
  442. NewFalseProb, InvertCond);
  443. // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
  444. SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
  445. BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
  446. // Emit the RHS condition into TmpBB.
  447. findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
  448. Probs[1], InvertCond);
  449. } else {
  450. assert(Opc == Instruction::And && "Unknown merge op!");
  451. // Codegen X & Y as:
  452. // BB1:
  453. // jmp_if_X TmpBB
  454. // jmp FBB
  455. // TmpBB:
  456. // jmp_if_Y TBB
  457. // jmp FBB
  458. //
  459. // This requires creation of TmpBB after CurBB.
  460. // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
  461. // The requirement is that
  462. // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
  463. // = FalseProb for original BB.
  464. // Assuming the original probabilities are A and B, one choice is to set
  465. // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
  466. // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
  467. // TrueProb for BB1 * FalseProb for TmpBB.
  468. auto NewTrueProb = TProb + FProb / 2;
  469. auto NewFalseProb = FProb / 2;
  470. // Emit the LHS condition.
  471. findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
  472. NewFalseProb, InvertCond);
  473. // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
  474. SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
  475. BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
  476. // Emit the RHS condition into TmpBB.
  477. findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
  478. Probs[1], InvertCond);
  479. }
  480. }
  481. bool IRTranslator::shouldEmitAsBranches(
  482. const std::vector<SwitchCG::CaseBlock> &Cases) {
  483. // For multiple cases, it's better to emit as branches.
  484. if (Cases.size() != 2)
  485. return true;
  486. // If this is two comparisons of the same values or'd or and'd together, they
  487. // will get folded into a single comparison, so don't emit two blocks.
  488. if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
  489. Cases[0].CmpRHS == Cases[1].CmpRHS) ||
  490. (Cases[0].CmpRHS == Cases[1].CmpLHS &&
  491. Cases[0].CmpLHS == Cases[1].CmpRHS)) {
  492. return false;
  493. }
  494. // Handle: (X != null) | (Y != null) --> (X|Y) != 0
  495. // Handle: (X == null) & (Y == null) --> (X|Y) == 0
  496. if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
  497. Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
  498. isa<Constant>(Cases[0].CmpRHS) &&
  499. cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
  500. if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
  501. Cases[0].TrueBB == Cases[1].ThisBB)
  502. return false;
  503. if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
  504. Cases[0].FalseBB == Cases[1].ThisBB)
  505. return false;
  506. }
  507. return true;
  508. }
  509. bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
  510. const BranchInst &BrInst = cast<BranchInst>(U);
  511. auto &CurMBB = MIRBuilder.getMBB();
  512. auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
  513. if (BrInst.isUnconditional()) {
  514. // If the unconditional target is the layout successor, fallthrough.
  515. if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))
  516. MIRBuilder.buildBr(*Succ0MBB);
  517. // Link successors.
  518. for (const BasicBlock *Succ : successors(&BrInst))
  519. CurMBB.addSuccessor(&getMBB(*Succ));
  520. return true;
  521. }
  522. // If this condition is one of the special cases we handle, do special stuff
  523. // now.
  524. const Value *CondVal = BrInst.getCondition();
  525. MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
  526. const auto &TLI = *MF->getSubtarget().getTargetLowering();
  527. // If this is a series of conditions that are or'd or and'd together, emit
  528. // this as a sequence of branches instead of setcc's with and/or operations.
  529. // As long as jumps are not expensive (exceptions for multi-use logic ops,
  530. // unpredictable branches, and vector extracts because those jumps are likely
  531. // expensive for any target), this should improve performance.
  532. // For example, instead of something like:
  533. // cmp A, B
  534. // C = seteq
  535. // cmp D, E
  536. // F = setle
  537. // or C, F
  538. // jnz foo
  539. // Emit:
  540. // cmp A, B
  541. // je foo
  542. // cmp D, E
  543. // jle foo
  544. using namespace PatternMatch;
  545. const Instruction *CondI = dyn_cast<Instruction>(CondVal);
  546. if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() &&
  547. !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) {
  548. Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
  549. Value *Vec;
  550. const Value *BOp0, *BOp1;
  551. if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
  552. Opcode = Instruction::And;
  553. else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
  554. Opcode = Instruction::Or;
  555. if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
  556. match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
  557. findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
  558. getEdgeProbability(&CurMBB, Succ0MBB),
  559. getEdgeProbability(&CurMBB, Succ1MBB),
  560. /*InvertCond=*/false);
  561. assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
  562. // Allow some cases to be rejected.
  563. if (shouldEmitAsBranches(SL->SwitchCases)) {
  564. // Emit the branch for this block.
  565. emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
  566. SL->SwitchCases.erase(SL->SwitchCases.begin());
  567. return true;
  568. }
  569. // Okay, we decided not to do this, remove any inserted MBB's and clear
  570. // SwitchCases.
  571. for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
  572. MF->erase(SL->SwitchCases[I].ThisBB);
  573. SL->SwitchCases.clear();
  574. }
  575. }
  576. // Create a CaseBlock record representing this branch.
  577. SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
  578. ConstantInt::getTrue(MF->getFunction().getContext()),
  579. nullptr, Succ0MBB, Succ1MBB, &CurMBB,
  580. CurBuilder->getDebugLoc());
  581. // Use emitSwitchCase to actually insert the fast branch sequence for this
  582. // cond branch.
  583. emitSwitchCase(CB, &CurMBB, *CurBuilder);
  584. return true;
  585. }
  586. void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
  587. MachineBasicBlock *Dst,
  588. BranchProbability Prob) {
  589. if (!FuncInfo.BPI) {
  590. Src->addSuccessorWithoutProb(Dst);
  591. return;
  592. }
  593. if (Prob.isUnknown())
  594. Prob = getEdgeProbability(Src, Dst);
  595. Src->addSuccessor(Dst, Prob);
  596. }
  597. BranchProbability
  598. IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
  599. const MachineBasicBlock *Dst) const {
  600. const BasicBlock *SrcBB = Src->getBasicBlock();
  601. const BasicBlock *DstBB = Dst->getBasicBlock();
  602. if (!FuncInfo.BPI) {
  603. // If BPI is not available, set the default probability as 1 / N, where N is
  604. // the number of successors.
  605. auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
  606. return BranchProbability(1, SuccSize);
  607. }
  608. return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
  609. }
  610. bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
  611. using namespace SwitchCG;
  612. // Extract cases from the switch.
  613. const SwitchInst &SI = cast<SwitchInst>(U);
  614. BranchProbabilityInfo *BPI = FuncInfo.BPI;
  615. CaseClusterVector Clusters;
  616. Clusters.reserve(SI.getNumCases());
  617. for (const auto &I : SI.cases()) {
  618. MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
  619. assert(Succ && "Could not find successor mbb in mapping");
  620. const ConstantInt *CaseVal = I.getCaseValue();
  621. BranchProbability Prob =
  622. BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
  623. : BranchProbability(1, SI.getNumCases() + 1);
  624. Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
  625. }
  626. MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
  627. // Cluster adjacent cases with the same destination. We do this at all
  628. // optimization levels because it's cheap to do and will make codegen faster
  629. // if there are many clusters.
  630. sortAndRangeify(Clusters);
  631. MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
  632. // If there is only the default destination, jump there directly.
  633. if (Clusters.empty()) {
  634. SwitchMBB->addSuccessor(DefaultMBB);
  635. if (DefaultMBB != SwitchMBB->getNextNode())
  636. MIB.buildBr(*DefaultMBB);
  637. return true;
  638. }
  639. SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
  640. SL->findBitTestClusters(Clusters, &SI);
  641. LLVM_DEBUG({
  642. dbgs() << "Case clusters: ";
  643. for (const CaseCluster &C : Clusters) {
  644. if (C.Kind == CC_JumpTable)
  645. dbgs() << "JT:";
  646. if (C.Kind == CC_BitTests)
  647. dbgs() << "BT:";
  648. C.Low->getValue().print(dbgs(), true);
  649. if (C.Low != C.High) {
  650. dbgs() << '-';
  651. C.High->getValue().print(dbgs(), true);
  652. }
  653. dbgs() << ' ';
  654. }
  655. dbgs() << '\n';
  656. });
  657. assert(!Clusters.empty());
  658. SwitchWorkList WorkList;
  659. CaseClusterIt First = Clusters.begin();
  660. CaseClusterIt Last = Clusters.end() - 1;
  661. auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
  662. WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
  663. // FIXME: At the moment we don't do any splitting optimizations here like
  664. // SelectionDAG does, so this worklist only has one entry.
  665. while (!WorkList.empty()) {
  666. SwitchWorkListItem W = WorkList.pop_back_val();
  667. if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
  668. return false;
  669. }
  670. return true;
  671. }
  672. void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
  673. MachineBasicBlock *MBB) {
  674. // Emit the code for the jump table
  675. assert(JT.Reg != -1U && "Should lower JT Header first!");
  676. MachineIRBuilder MIB(*MBB->getParent());
  677. MIB.setMBB(*MBB);
  678. MIB.setDebugLoc(CurBuilder->getDebugLoc());
  679. Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
  680. const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
  681. auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
  682. MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
  683. }
  684. bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
  685. SwitchCG::JumpTableHeader &JTH,
  686. MachineBasicBlock *HeaderBB) {
  687. MachineIRBuilder MIB(*HeaderBB->getParent());
  688. MIB.setMBB(*HeaderBB);
  689. MIB.setDebugLoc(CurBuilder->getDebugLoc());
  690. const Value &SValue = *JTH.SValue;
  691. // Subtract the lowest switch case value from the value being switched on.
  692. const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
  693. Register SwitchOpReg = getOrCreateVReg(SValue);
  694. auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
  695. auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
  696. // This value may be smaller or larger than the target's pointer type, and
  697. // therefore require extension or truncating.
  698. Type *PtrIRTy = SValue.getType()->getPointerTo();
  699. const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
  700. Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
  701. JT.Reg = Sub.getReg(0);
  702. if (JTH.FallthroughUnreachable) {
  703. if (JT.MBB != HeaderBB->getNextNode())
  704. MIB.buildBr(*JT.MBB);
  705. return true;
  706. }
  707. // Emit the range check for the jump table, and branch to the default block
  708. // for the switch statement if the value being switched on exceeds the
  709. // largest case in the switch.
  710. auto Cst = getOrCreateVReg(
  711. *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
  712. Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
  713. auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
  714. auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
  715. // Avoid emitting unnecessary branches to the next block.
  716. if (JT.MBB != HeaderBB->getNextNode())
  717. BrCond = MIB.buildBr(*JT.MBB);
  718. return true;
  719. }
  720. void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
  721. MachineBasicBlock *SwitchBB,
  722. MachineIRBuilder &MIB) {
  723. Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
  724. Register Cond;
  725. DebugLoc OldDbgLoc = MIB.getDebugLoc();
  726. MIB.setDebugLoc(CB.DbgLoc);
  727. MIB.setMBB(*CB.ThisBB);
  728. if (CB.PredInfo.NoCmp) {
  729. // Branch or fall through to TrueBB.
  730. addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
  731. addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
  732. CB.ThisBB);
  733. CB.ThisBB->normalizeSuccProbs();
  734. if (CB.TrueBB != CB.ThisBB->getNextNode())
  735. MIB.buildBr(*CB.TrueBB);
  736. MIB.setDebugLoc(OldDbgLoc);
  737. return;
  738. }
  739. const LLT i1Ty = LLT::scalar(1);
  740. // Build the compare.
  741. if (!CB.CmpMHS) {
  742. const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
  743. // For conditional branch lowering, we might try to do something silly like
  744. // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
  745. // just re-use the existing condition vreg.
  746. if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
  747. CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
  748. Cond = CondLHS;
  749. } else {
  750. Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
  751. if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
  752. Cond =
  753. MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
  754. else
  755. Cond =
  756. MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
  757. }
  758. } else {
  759. assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
  760. "Can only handle SLE ranges");
  761. const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
  762. const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
  763. Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
  764. if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
  765. Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
  766. Cond =
  767. MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
  768. } else {
  769. const LLT CmpTy = MRI->getType(CmpOpReg);
  770. auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
  771. auto Diff = MIB.buildConstant(CmpTy, High - Low);
  772. Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
  773. }
  774. }
  775. // Update successor info
  776. addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
  777. addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
  778. CB.ThisBB);
  779. // TrueBB and FalseBB are always different unless the incoming IR is
  780. // degenerate. This only happens when running llc on weird IR.
  781. if (CB.TrueBB != CB.FalseBB)
  782. addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
  783. CB.ThisBB->normalizeSuccProbs();
  784. addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
  785. CB.ThisBB);
  786. MIB.buildBrCond(Cond, *CB.TrueBB);
  787. MIB.buildBr(*CB.FalseBB);
  788. MIB.setDebugLoc(OldDbgLoc);
  789. }
  790. bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
  791. MachineBasicBlock *SwitchMBB,
  792. MachineBasicBlock *CurMBB,
  793. MachineBasicBlock *DefaultMBB,
  794. MachineIRBuilder &MIB,
  795. MachineFunction::iterator BBI,
  796. BranchProbability UnhandledProbs,
  797. SwitchCG::CaseClusterIt I,
  798. MachineBasicBlock *Fallthrough,
  799. bool FallthroughUnreachable) {
  800. using namespace SwitchCG;
  801. MachineFunction *CurMF = SwitchMBB->getParent();
  802. // FIXME: Optimize away range check based on pivot comparisons.
  803. JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
  804. SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
  805. BranchProbability DefaultProb = W.DefaultProb;
  806. // The jump block hasn't been inserted yet; insert it here.
  807. MachineBasicBlock *JumpMBB = JT->MBB;
  808. CurMF->insert(BBI, JumpMBB);
  809. // Since the jump table block is separate from the switch block, we need
  810. // to keep track of it as a machine predecessor to the default block,
  811. // otherwise we lose the phi edges.
  812. addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
  813. CurMBB);
  814. addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
  815. JumpMBB);
  816. auto JumpProb = I->Prob;
  817. auto FallthroughProb = UnhandledProbs;
  818. // If the default statement is a target of the jump table, we evenly
  819. // distribute the default probability to successors of CurMBB. Also
  820. // update the probability on the edge from JumpMBB to Fallthrough.
  821. for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
  822. SE = JumpMBB->succ_end();
  823. SI != SE; ++SI) {
  824. if (*SI == DefaultMBB) {
  825. JumpProb += DefaultProb / 2;
  826. FallthroughProb -= DefaultProb / 2;
  827. JumpMBB->setSuccProbability(SI, DefaultProb / 2);
  828. JumpMBB->normalizeSuccProbs();
  829. } else {
  830. // Also record edges from the jump table block to it's successors.
  831. addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
  832. JumpMBB);
  833. }
  834. }
  835. if (FallthroughUnreachable)
  836. JTH->FallthroughUnreachable = true;
  837. if (!JTH->FallthroughUnreachable)
  838. addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
  839. addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
  840. CurMBB->normalizeSuccProbs();
  841. // The jump table header will be inserted in our current block, do the
  842. // range check, and fall through to our fallthrough block.
  843. JTH->HeaderBB = CurMBB;
  844. JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
  845. // If we're in the right place, emit the jump table header right now.
  846. if (CurMBB == SwitchMBB) {
  847. if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
  848. return false;
  849. JTH->Emitted = true;
  850. }
  851. return true;
  852. }
  853. bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
  854. Value *Cond,
  855. MachineBasicBlock *Fallthrough,
  856. bool FallthroughUnreachable,
  857. BranchProbability UnhandledProbs,
  858. MachineBasicBlock *CurMBB,
  859. MachineIRBuilder &MIB,
  860. MachineBasicBlock *SwitchMBB) {
  861. using namespace SwitchCG;
  862. const Value *RHS, *LHS, *MHS;
  863. CmpInst::Predicate Pred;
  864. if (I->Low == I->High) {
  865. // Check Cond == I->Low.
  866. Pred = CmpInst::ICMP_EQ;
  867. LHS = Cond;
  868. RHS = I->Low;
  869. MHS = nullptr;
  870. } else {
  871. // Check I->Low <= Cond <= I->High.
  872. Pred = CmpInst::ICMP_SLE;
  873. LHS = I->Low;
  874. MHS = Cond;
  875. RHS = I->High;
  876. }
  877. // If Fallthrough is unreachable, fold away the comparison.
  878. // The false probability is the sum of all unhandled cases.
  879. CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
  880. CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
  881. emitSwitchCase(CB, SwitchMBB, MIB);
  882. return true;
  883. }
  884. void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
  885. MachineBasicBlock *SwitchBB) {
  886. MachineIRBuilder &MIB = *CurBuilder;
  887. MIB.setMBB(*SwitchBB);
  888. // Subtract the minimum value.
  889. Register SwitchOpReg = getOrCreateVReg(*B.SValue);
  890. LLT SwitchOpTy = MRI->getType(SwitchOpReg);
  891. Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
  892. auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
  893. Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
  894. const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
  895. LLT MaskTy = SwitchOpTy;
  896. if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
  897. !isPowerOf2_32(MaskTy.getSizeInBits()))
  898. MaskTy = LLT::scalar(PtrTy.getSizeInBits());
  899. else {
  900. // Ensure that the type will fit the mask value.
  901. for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
  902. if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
  903. // Switch table case range are encoded into series of masks.
  904. // Just use pointer type, it's guaranteed to fit.
  905. MaskTy = LLT::scalar(PtrTy.getSizeInBits());
  906. break;
  907. }
  908. }
  909. }
  910. Register SubReg = RangeSub.getReg(0);
  911. if (SwitchOpTy != MaskTy)
  912. SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
  913. B.RegVT = getMVTForLLT(MaskTy);
  914. B.Reg = SubReg;
  915. MachineBasicBlock *MBB = B.Cases[0].ThisBB;
  916. if (!B.FallthroughUnreachable)
  917. addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
  918. addSuccessorWithProb(SwitchBB, MBB, B.Prob);
  919. SwitchBB->normalizeSuccProbs();
  920. if (!B.FallthroughUnreachable) {
  921. // Conditional branch to the default block.
  922. auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
  923. auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
  924. RangeSub, RangeCst);
  925. MIB.buildBrCond(RangeCmp, *B.Default);
  926. }
  927. // Avoid emitting unnecessary branches to the next block.
  928. if (MBB != SwitchBB->getNextNode())
  929. MIB.buildBr(*MBB);
  930. }
  931. void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
  932. MachineBasicBlock *NextMBB,
  933. BranchProbability BranchProbToNext,
  934. Register Reg, SwitchCG::BitTestCase &B,
  935. MachineBasicBlock *SwitchBB) {
  936. MachineIRBuilder &MIB = *CurBuilder;
  937. MIB.setMBB(*SwitchBB);
  938. LLT SwitchTy = getLLTForMVT(BB.RegVT);
  939. Register Cmp;
  940. unsigned PopCount = llvm::popcount(B.Mask);
  941. if (PopCount == 1) {
  942. // Testing for a single bit; just compare the shift count with what it
  943. // would need to be to shift a 1 bit in that position.
  944. auto MaskTrailingZeros =
  945. MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
  946. Cmp =
  947. MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
  948. .getReg(0);
  949. } else if (PopCount == BB.Range) {
  950. // There is only one zero bit in the range, test for it directly.
  951. auto MaskTrailingOnes =
  952. MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
  953. Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
  954. .getReg(0);
  955. } else {
  956. // Make desired shift.
  957. auto CstOne = MIB.buildConstant(SwitchTy, 1);
  958. auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
  959. // Emit bit tests and jumps.
  960. auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
  961. auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
  962. auto CstZero = MIB.buildConstant(SwitchTy, 0);
  963. Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
  964. .getReg(0);
  965. }
  966. // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
  967. addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
  968. // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
  969. addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
  970. // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
  971. // one as they are relative probabilities (and thus work more like weights),
  972. // and hence we need to normalize them to let the sum of them become one.
  973. SwitchBB->normalizeSuccProbs();
  974. // Record the fact that the IR edge from the header to the bit test target
  975. // will go through our new block. Neeeded for PHIs to have nodes added.
  976. addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
  977. SwitchBB);
  978. MIB.buildBrCond(Cmp, *B.TargetBB);
  979. // Avoid emitting unnecessary branches to the next block.
  980. if (NextMBB != SwitchBB->getNextNode())
  981. MIB.buildBr(*NextMBB);
  982. }
  983. bool IRTranslator::lowerBitTestWorkItem(
  984. SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
  985. MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
  986. MachineIRBuilder &MIB, MachineFunction::iterator BBI,
  987. BranchProbability DefaultProb, BranchProbability UnhandledProbs,
  988. SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
  989. bool FallthroughUnreachable) {
  990. using namespace SwitchCG;
  991. MachineFunction *CurMF = SwitchMBB->getParent();
  992. // FIXME: Optimize away range check based on pivot comparisons.
  993. BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
  994. // The bit test blocks haven't been inserted yet; insert them here.
  995. for (BitTestCase &BTC : BTB->Cases)
  996. CurMF->insert(BBI, BTC.ThisBB);
  997. // Fill in fields of the BitTestBlock.
  998. BTB->Parent = CurMBB;
  999. BTB->Default = Fallthrough;
  1000. BTB->DefaultProb = UnhandledProbs;
  1001. // If the cases in bit test don't form a contiguous range, we evenly
  1002. // distribute the probability on the edge to Fallthrough to two
  1003. // successors of CurMBB.
  1004. if (!BTB->ContiguousRange) {
  1005. BTB->Prob += DefaultProb / 2;
  1006. BTB->DefaultProb -= DefaultProb / 2;
  1007. }
  1008. if (FallthroughUnreachable)
  1009. BTB->FallthroughUnreachable = true;
  1010. // If we're in the right place, emit the bit test header right now.
  1011. if (CurMBB == SwitchMBB) {
  1012. emitBitTestHeader(*BTB, SwitchMBB);
  1013. BTB->Emitted = true;
  1014. }
  1015. return true;
  1016. }
  1017. bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
  1018. Value *Cond,
  1019. MachineBasicBlock *SwitchMBB,
  1020. MachineBasicBlock *DefaultMBB,
  1021. MachineIRBuilder &MIB) {
  1022. using namespace SwitchCG;
  1023. MachineFunction *CurMF = FuncInfo.MF;
  1024. MachineBasicBlock *NextMBB = nullptr;
  1025. MachineFunction::iterator BBI(W.MBB);
  1026. if (++BBI != FuncInfo.MF->end())
  1027. NextMBB = &*BBI;
  1028. if (EnableOpts) {
  1029. // Here, we order cases by probability so the most likely case will be
  1030. // checked first. However, two clusters can have the same probability in
  1031. // which case their relative ordering is non-deterministic. So we use Low
  1032. // as a tie-breaker as clusters are guaranteed to never overlap.
  1033. llvm::sort(W.FirstCluster, W.LastCluster + 1,
  1034. [](const CaseCluster &a, const CaseCluster &b) {
  1035. return a.Prob != b.Prob
  1036. ? a.Prob > b.Prob
  1037. : a.Low->getValue().slt(b.Low->getValue());
  1038. });
  1039. // Rearrange the case blocks so that the last one falls through if possible
  1040. // without changing the order of probabilities.
  1041. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
  1042. --I;
  1043. if (I->Prob > W.LastCluster->Prob)
  1044. break;
  1045. if (I->Kind == CC_Range && I->MBB == NextMBB) {
  1046. std::swap(*I, *W.LastCluster);
  1047. break;
  1048. }
  1049. }
  1050. }
  1051. // Compute total probability.
  1052. BranchProbability DefaultProb = W.DefaultProb;
  1053. BranchProbability UnhandledProbs = DefaultProb;
  1054. for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
  1055. UnhandledProbs += I->Prob;
  1056. MachineBasicBlock *CurMBB = W.MBB;
  1057. for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
  1058. bool FallthroughUnreachable = false;
  1059. MachineBasicBlock *Fallthrough;
  1060. if (I == W.LastCluster) {
  1061. // For the last cluster, fall through to the default destination.
  1062. Fallthrough = DefaultMBB;
  1063. FallthroughUnreachable = isa<UnreachableInst>(
  1064. DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
  1065. } else {
  1066. Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
  1067. CurMF->insert(BBI, Fallthrough);
  1068. }
  1069. UnhandledProbs -= I->Prob;
  1070. switch (I->Kind) {
  1071. case CC_BitTests: {
  1072. if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
  1073. DefaultProb, UnhandledProbs, I, Fallthrough,
  1074. FallthroughUnreachable)) {
  1075. LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
  1076. return false;
  1077. }
  1078. break;
  1079. }
  1080. case CC_JumpTable: {
  1081. if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
  1082. UnhandledProbs, I, Fallthrough,
  1083. FallthroughUnreachable)) {
  1084. LLVM_DEBUG(dbgs() << "Failed to lower jump table");
  1085. return false;
  1086. }
  1087. break;
  1088. }
  1089. case CC_Range: {
  1090. if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
  1091. FallthroughUnreachable, UnhandledProbs,
  1092. CurMBB, MIB, SwitchMBB)) {
  1093. LLVM_DEBUG(dbgs() << "Failed to lower switch range");
  1094. return false;
  1095. }
  1096. break;
  1097. }
  1098. }
  1099. CurMBB = Fallthrough;
  1100. }
  1101. return true;
  1102. }
  1103. bool IRTranslator::translateIndirectBr(const User &U,
  1104. MachineIRBuilder &MIRBuilder) {
  1105. const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
  1106. const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
  1107. MIRBuilder.buildBrIndirect(Tgt);
  1108. // Link successors.
  1109. SmallPtrSet<const BasicBlock *, 32> AddedSuccessors;
  1110. MachineBasicBlock &CurBB = MIRBuilder.getMBB();
  1111. for (const BasicBlock *Succ : successors(&BrInst)) {
  1112. // It's legal for indirectbr instructions to have duplicate blocks in the
  1113. // destination list. We don't allow this in MIR. Skip anything that's
  1114. // already a successor.
  1115. if (!AddedSuccessors.insert(Succ).second)
  1116. continue;
  1117. CurBB.addSuccessor(&getMBB(*Succ));
  1118. }
  1119. return true;
  1120. }
  1121. static bool isSwiftError(const Value *V) {
  1122. if (auto Arg = dyn_cast<Argument>(V))
  1123. return Arg->hasSwiftErrorAttr();
  1124. if (auto AI = dyn_cast<AllocaInst>(V))
  1125. return AI->isSwiftError();
  1126. return false;
  1127. }
  1128. bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
  1129. const LoadInst &LI = cast<LoadInst>(U);
  1130. unsigned StoreSize = DL->getTypeStoreSize(LI.getType());
  1131. if (StoreSize == 0)
  1132. return true;
  1133. ArrayRef<Register> Regs = getOrCreateVRegs(LI);
  1134. ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
  1135. Register Base = getOrCreateVReg(*LI.getPointerOperand());
  1136. AAMDNodes AAInfo = LI.getAAMetadata();
  1137. const Value *Ptr = LI.getPointerOperand();
  1138. Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType());
  1139. LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
  1140. if (CLI->supportSwiftError() && isSwiftError(Ptr)) {
  1141. assert(Regs.size() == 1 && "swifterror should be single pointer");
  1142. Register VReg =
  1143. SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), Ptr);
  1144. MIRBuilder.buildCopy(Regs[0], VReg);
  1145. return true;
  1146. }
  1147. auto &TLI = *MF->getSubtarget().getTargetLowering();
  1148. MachineMemOperand::Flags Flags =
  1149. TLI.getLoadMemOperandFlags(LI, *DL, AC, LibInfo);
  1150. if (AA && !(Flags & MachineMemOperand::MOInvariant)) {
  1151. if (AA->pointsToConstantMemory(
  1152. MemoryLocation(Ptr, LocationSize::precise(StoreSize), AAInfo))) {
  1153. Flags |= MachineMemOperand::MOInvariant;
  1154. }
  1155. }
  1156. const MDNode *Ranges =
  1157. Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
  1158. for (unsigned i = 0; i < Regs.size(); ++i) {
  1159. Register Addr;
  1160. MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
  1161. MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
  1162. Align BaseAlign = getMemOpAlign(LI);
  1163. auto MMO = MF->getMachineMemOperand(
  1164. Ptr, Flags, MRI->getType(Regs[i]),
  1165. commonAlignment(BaseAlign, Offsets[i] / 8), AAInfo, Ranges,
  1166. LI.getSyncScopeID(), LI.getOrdering());
  1167. MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
  1168. }
  1169. return true;
  1170. }
  1171. bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
  1172. const StoreInst &SI = cast<StoreInst>(U);
  1173. if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
  1174. return true;
  1175. ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
  1176. ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
  1177. Register Base = getOrCreateVReg(*SI.getPointerOperand());
  1178. Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
  1179. LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
  1180. if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
  1181. assert(Vals.size() == 1 && "swifterror should be single pointer");
  1182. Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
  1183. SI.getPointerOperand());
  1184. MIRBuilder.buildCopy(VReg, Vals[0]);
  1185. return true;
  1186. }
  1187. auto &TLI = *MF->getSubtarget().getTargetLowering();
  1188. MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);
  1189. for (unsigned i = 0; i < Vals.size(); ++i) {
  1190. Register Addr;
  1191. MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
  1192. MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
  1193. Align BaseAlign = getMemOpAlign(SI);
  1194. auto MMO = MF->getMachineMemOperand(
  1195. Ptr, Flags, MRI->getType(Vals[i]),
  1196. commonAlignment(BaseAlign, Offsets[i] / 8), SI.getAAMetadata(), nullptr,
  1197. SI.getSyncScopeID(), SI.getOrdering());
  1198. MIRBuilder.buildStore(Vals[i], Addr, *MMO);
  1199. }
  1200. return true;
  1201. }
  1202. static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
  1203. const Value *Src = U.getOperand(0);
  1204. Type *Int32Ty = Type::getInt32Ty(U.getContext());
  1205. // getIndexedOffsetInType is designed for GEPs, so the first index is the
  1206. // usual array element rather than looking into the actual aggregate.
  1207. SmallVector<Value *, 1> Indices;
  1208. Indices.push_back(ConstantInt::get(Int32Ty, 0));
  1209. if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
  1210. for (auto Idx : EVI->indices())
  1211. Indices.push_back(ConstantInt::get(Int32Ty, Idx));
  1212. } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
  1213. for (auto Idx : IVI->indices())
  1214. Indices.push_back(ConstantInt::get(Int32Ty, Idx));
  1215. } else {
  1216. for (unsigned i = 1; i < U.getNumOperands(); ++i)
  1217. Indices.push_back(U.getOperand(i));
  1218. }
  1219. return 8 * static_cast<uint64_t>(
  1220. DL.getIndexedOffsetInType(Src->getType(), Indices));
  1221. }
  1222. bool IRTranslator::translateExtractValue(const User &U,
  1223. MachineIRBuilder &MIRBuilder) {
  1224. const Value *Src = U.getOperand(0);
  1225. uint64_t Offset = getOffsetFromIndices(U, *DL);
  1226. ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
  1227. ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
  1228. unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
  1229. auto &DstRegs = allocateVRegs(U);
  1230. for (unsigned i = 0; i < DstRegs.size(); ++i)
  1231. DstRegs[i] = SrcRegs[Idx++];
  1232. return true;
  1233. }
  1234. bool IRTranslator::translateInsertValue(const User &U,
  1235. MachineIRBuilder &MIRBuilder) {
  1236. const Value *Src = U.getOperand(0);
  1237. uint64_t Offset = getOffsetFromIndices(U, *DL);
  1238. auto &DstRegs = allocateVRegs(U);
  1239. ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
  1240. ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
  1241. ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
  1242. auto *InsertedIt = InsertedRegs.begin();
  1243. for (unsigned i = 0; i < DstRegs.size(); ++i) {
  1244. if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
  1245. DstRegs[i] = *InsertedIt++;
  1246. else
  1247. DstRegs[i] = SrcRegs[i];
  1248. }
  1249. return true;
  1250. }
  1251. bool IRTranslator::translateSelect(const User &U,
  1252. MachineIRBuilder &MIRBuilder) {
  1253. Register Tst = getOrCreateVReg(*U.getOperand(0));
  1254. ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
  1255. ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
  1256. ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
  1257. uint16_t Flags = 0;
  1258. if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
  1259. Flags = MachineInstr::copyFlagsFromInstruction(*SI);
  1260. for (unsigned i = 0; i < ResRegs.size(); ++i) {
  1261. MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags);
  1262. }
  1263. return true;
  1264. }
  1265. bool IRTranslator::translateCopy(const User &U, const Value &V,
  1266. MachineIRBuilder &MIRBuilder) {
  1267. Register Src = getOrCreateVReg(V);
  1268. auto &Regs = *VMap.getVRegs(U);
  1269. if (Regs.empty()) {
  1270. Regs.push_back(Src);
  1271. VMap.getOffsets(U)->push_back(0);
  1272. } else {
  1273. // If we already assigned a vreg for this instruction, we can't change that.
  1274. // Emit a copy to satisfy the users we already emitted.
  1275. MIRBuilder.buildCopy(Regs[0], Src);
  1276. }
  1277. return true;
  1278. }
  1279. bool IRTranslator::translateBitCast(const User &U,
  1280. MachineIRBuilder &MIRBuilder) {
  1281. // If we're bitcasting to the source type, we can reuse the source vreg.
  1282. if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
  1283. getLLTForType(*U.getType(), *DL))
  1284. return translateCopy(U, *U.getOperand(0), MIRBuilder);
  1285. return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
  1286. }
  1287. bool IRTranslator::translateCast(unsigned Opcode, const User &U,
  1288. MachineIRBuilder &MIRBuilder) {
  1289. Register Op = getOrCreateVReg(*U.getOperand(0));
  1290. Register Res = getOrCreateVReg(U);
  1291. MIRBuilder.buildInstr(Opcode, {Res}, {Op});
  1292. return true;
  1293. }
  1294. bool IRTranslator::translateGetElementPtr(const User &U,
  1295. MachineIRBuilder &MIRBuilder) {
  1296. Value &Op0 = *U.getOperand(0);
  1297. Register BaseReg = getOrCreateVReg(Op0);
  1298. Type *PtrIRTy = Op0.getType();
  1299. LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
  1300. Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
  1301. LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
  1302. // Normalize Vector GEP - all scalar operands should be converted to the
  1303. // splat vector.
  1304. unsigned VectorWidth = 0;
  1305. // True if we should use a splat vector; using VectorWidth alone is not
  1306. // sufficient.
  1307. bool WantSplatVector = false;
  1308. if (auto *VT = dyn_cast<VectorType>(U.getType())) {
  1309. VectorWidth = cast<FixedVectorType>(VT)->getNumElements();
  1310. // We don't produce 1 x N vectors; those are treated as scalars.
  1311. WantSplatVector = VectorWidth > 1;
  1312. }
  1313. // We might need to splat the base pointer into a vector if the offsets
  1314. // are vectors.
  1315. if (WantSplatVector && !PtrTy.isVector()) {
  1316. BaseReg =
  1317. MIRBuilder
  1318. .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg)
  1319. .getReg(0);
  1320. PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
  1321. PtrTy = getLLTForType(*PtrIRTy, *DL);
  1322. OffsetIRTy = DL->getIntPtrType(PtrIRTy);
  1323. OffsetTy = getLLTForType(*OffsetIRTy, *DL);
  1324. }
  1325. int64_t Offset = 0;
  1326. for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
  1327. GTI != E; ++GTI) {
  1328. const Value *Idx = GTI.getOperand();
  1329. if (StructType *StTy = GTI.getStructTypeOrNull()) {
  1330. unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
  1331. Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
  1332. continue;
  1333. } else {
  1334. uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
  1335. // If this is a scalar constant or a splat vector of constants,
  1336. // handle it quickly.
  1337. if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
  1338. Offset += ElementSize * CI->getSExtValue();
  1339. continue;
  1340. }
  1341. if (Offset != 0) {
  1342. auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
  1343. BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
  1344. .getReg(0);
  1345. Offset = 0;
  1346. }
  1347. Register IdxReg = getOrCreateVReg(*Idx);
  1348. LLT IdxTy = MRI->getType(IdxReg);
  1349. if (IdxTy != OffsetTy) {
  1350. if (!IdxTy.isVector() && WantSplatVector) {
  1351. IdxReg = MIRBuilder.buildSplatVector(
  1352. OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
  1353. }
  1354. IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
  1355. }
  1356. // N = N + Idx * ElementSize;
  1357. // Avoid doing it for ElementSize of 1.
  1358. Register GepOffsetReg;
  1359. if (ElementSize != 1) {
  1360. auto ElementSizeMIB = MIRBuilder.buildConstant(
  1361. getLLTForType(*OffsetIRTy, *DL), ElementSize);
  1362. GepOffsetReg =
  1363. MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
  1364. } else
  1365. GepOffsetReg = IdxReg;
  1366. BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
  1367. }
  1368. }
  1369. if (Offset != 0) {
  1370. auto OffsetMIB =
  1371. MIRBuilder.buildConstant(OffsetTy, Offset);
  1372. MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
  1373. return true;
  1374. }
  1375. MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
  1376. return true;
  1377. }
  1378. bool IRTranslator::translateMemFunc(const CallInst &CI,
  1379. MachineIRBuilder &MIRBuilder,
  1380. unsigned Opcode) {
  1381. const Value *SrcPtr = CI.getArgOperand(1);
  1382. // If the source is undef, then just emit a nop.
  1383. if (isa<UndefValue>(SrcPtr))
  1384. return true;
  1385. SmallVector<Register, 3> SrcRegs;
  1386. unsigned MinPtrSize = UINT_MAX;
  1387. for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
  1388. Register SrcReg = getOrCreateVReg(**AI);
  1389. LLT SrcTy = MRI->getType(SrcReg);
  1390. if (SrcTy.isPointer())
  1391. MinPtrSize = std::min<unsigned>(SrcTy.getSizeInBits(), MinPtrSize);
  1392. SrcRegs.push_back(SrcReg);
  1393. }
  1394. LLT SizeTy = LLT::scalar(MinPtrSize);
  1395. // The size operand should be the minimum of the pointer sizes.
  1396. Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
  1397. if (MRI->getType(SizeOpReg) != SizeTy)
  1398. SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
  1399. auto ICall = MIRBuilder.buildInstr(Opcode);
  1400. for (Register SrcReg : SrcRegs)
  1401. ICall.addUse(SrcReg);
  1402. Align DstAlign;
  1403. Align SrcAlign;
  1404. unsigned IsVol =
  1405. cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue();
  1406. ConstantInt *CopySize = nullptr;
  1407. if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
  1408. DstAlign = MCI->getDestAlign().valueOrOne();
  1409. SrcAlign = MCI->getSourceAlign().valueOrOne();
  1410. CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2));
  1411. } else if (auto *MCI = dyn_cast<MemCpyInlineInst>(&CI)) {
  1412. DstAlign = MCI->getDestAlign().valueOrOne();
  1413. SrcAlign = MCI->getSourceAlign().valueOrOne();
  1414. CopySize = dyn_cast<ConstantInt>(MCI->getArgOperand(2));
  1415. } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
  1416. DstAlign = MMI->getDestAlign().valueOrOne();
  1417. SrcAlign = MMI->getSourceAlign().valueOrOne();
  1418. CopySize = dyn_cast<ConstantInt>(MMI->getArgOperand(2));
  1419. } else {
  1420. auto *MSI = cast<MemSetInst>(&CI);
  1421. DstAlign = MSI->getDestAlign().valueOrOne();
  1422. }
  1423. if (Opcode != TargetOpcode::G_MEMCPY_INLINE) {
  1424. // We need to propagate the tail call flag from the IR inst as an argument.
  1425. // Otherwise, we have to pessimize and assume later that we cannot tail call
  1426. // any memory intrinsics.
  1427. ICall.addImm(CI.isTailCall() ? 1 : 0);
  1428. }
  1429. // Create mem operands to store the alignment and volatile info.
  1430. MachineMemOperand::Flags LoadFlags = MachineMemOperand::MOLoad;
  1431. MachineMemOperand::Flags StoreFlags = MachineMemOperand::MOStore;
  1432. if (IsVol) {
  1433. LoadFlags |= MachineMemOperand::MOVolatile;
  1434. StoreFlags |= MachineMemOperand::MOVolatile;
  1435. }
  1436. AAMDNodes AAInfo = CI.getAAMetadata();
  1437. if (AA && CopySize &&
  1438. AA->pointsToConstantMemory(MemoryLocation(
  1439. SrcPtr, LocationSize::precise(CopySize->getZExtValue()), AAInfo))) {
  1440. LoadFlags |= MachineMemOperand::MOInvariant;
  1441. // FIXME: pointsToConstantMemory probably does not imply dereferenceable,
  1442. // but the previous usage implied it did. Probably should check
  1443. // isDereferenceableAndAlignedPointer.
  1444. LoadFlags |= MachineMemOperand::MODereferenceable;
  1445. }
  1446. ICall.addMemOperand(
  1447. MF->getMachineMemOperand(MachinePointerInfo(CI.getArgOperand(0)),
  1448. StoreFlags, 1, DstAlign, AAInfo));
  1449. if (Opcode != TargetOpcode::G_MEMSET)
  1450. ICall.addMemOperand(MF->getMachineMemOperand(
  1451. MachinePointerInfo(SrcPtr), LoadFlags, 1, SrcAlign, AAInfo));
  1452. return true;
  1453. }
  1454. void IRTranslator::getStackGuard(Register DstReg,
  1455. MachineIRBuilder &MIRBuilder) {
  1456. const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
  1457. MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
  1458. auto MIB =
  1459. MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
  1460. auto &TLI = *MF->getSubtarget().getTargetLowering();
  1461. Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
  1462. if (!Global)
  1463. return;
  1464. unsigned AddrSpace = Global->getType()->getPointerAddressSpace();
  1465. LLT PtrTy = LLT::pointer(AddrSpace, DL->getPointerSizeInBits(AddrSpace));
  1466. MachinePointerInfo MPInfo(Global);
  1467. auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
  1468. MachineMemOperand::MODereferenceable;
  1469. MachineMemOperand *MemRef = MF->getMachineMemOperand(
  1470. MPInfo, Flags, PtrTy, DL->getPointerABIAlignment(AddrSpace));
  1471. MIB.setMemRefs({MemRef});
  1472. }
  1473. bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
  1474. MachineIRBuilder &MIRBuilder) {
  1475. ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
  1476. MIRBuilder.buildInstr(
  1477. Op, {ResRegs[0], ResRegs[1]},
  1478. {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))});
  1479. return true;
  1480. }
  1481. bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
  1482. MachineIRBuilder &MIRBuilder) {
  1483. Register Dst = getOrCreateVReg(CI);
  1484. Register Src0 = getOrCreateVReg(*CI.getOperand(0));
  1485. Register Src1 = getOrCreateVReg(*CI.getOperand(1));
  1486. uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
  1487. MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
  1488. return true;
  1489. }
  1490. unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
  1491. switch (ID) {
  1492. default:
  1493. break;
  1494. case Intrinsic::bswap:
  1495. return TargetOpcode::G_BSWAP;
  1496. case Intrinsic::bitreverse:
  1497. return TargetOpcode::G_BITREVERSE;
  1498. case Intrinsic::fshl:
  1499. return TargetOpcode::G_FSHL;
  1500. case Intrinsic::fshr:
  1501. return TargetOpcode::G_FSHR;
  1502. case Intrinsic::ceil:
  1503. return TargetOpcode::G_FCEIL;
  1504. case Intrinsic::cos:
  1505. return TargetOpcode::G_FCOS;
  1506. case Intrinsic::ctpop:
  1507. return TargetOpcode::G_CTPOP;
  1508. case Intrinsic::exp:
  1509. return TargetOpcode::G_FEXP;
  1510. case Intrinsic::exp2:
  1511. return TargetOpcode::G_FEXP2;
  1512. case Intrinsic::fabs:
  1513. return TargetOpcode::G_FABS;
  1514. case Intrinsic::copysign:
  1515. return TargetOpcode::G_FCOPYSIGN;
  1516. case Intrinsic::minnum:
  1517. return TargetOpcode::G_FMINNUM;
  1518. case Intrinsic::maxnum:
  1519. return TargetOpcode::G_FMAXNUM;
  1520. case Intrinsic::minimum:
  1521. return TargetOpcode::G_FMINIMUM;
  1522. case Intrinsic::maximum:
  1523. return TargetOpcode::G_FMAXIMUM;
  1524. case Intrinsic::canonicalize:
  1525. return TargetOpcode::G_FCANONICALIZE;
  1526. case Intrinsic::floor:
  1527. return TargetOpcode::G_FFLOOR;
  1528. case Intrinsic::fma:
  1529. return TargetOpcode::G_FMA;
  1530. case Intrinsic::log:
  1531. return TargetOpcode::G_FLOG;
  1532. case Intrinsic::log2:
  1533. return TargetOpcode::G_FLOG2;
  1534. case Intrinsic::log10:
  1535. return TargetOpcode::G_FLOG10;
  1536. case Intrinsic::nearbyint:
  1537. return TargetOpcode::G_FNEARBYINT;
  1538. case Intrinsic::pow:
  1539. return TargetOpcode::G_FPOW;
  1540. case Intrinsic::powi:
  1541. return TargetOpcode::G_FPOWI;
  1542. case Intrinsic::rint:
  1543. return TargetOpcode::G_FRINT;
  1544. case Intrinsic::round:
  1545. return TargetOpcode::G_INTRINSIC_ROUND;
  1546. case Intrinsic::roundeven:
  1547. return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
  1548. case Intrinsic::sin:
  1549. return TargetOpcode::G_FSIN;
  1550. case Intrinsic::sqrt:
  1551. return TargetOpcode::G_FSQRT;
  1552. case Intrinsic::trunc:
  1553. return TargetOpcode::G_INTRINSIC_TRUNC;
  1554. case Intrinsic::readcyclecounter:
  1555. return TargetOpcode::G_READCYCLECOUNTER;
  1556. case Intrinsic::ptrmask:
  1557. return TargetOpcode::G_PTRMASK;
  1558. case Intrinsic::lrint:
  1559. return TargetOpcode::G_INTRINSIC_LRINT;
  1560. // FADD/FMUL require checking the FMF, so are handled elsewhere.
  1561. case Intrinsic::vector_reduce_fmin:
  1562. return TargetOpcode::G_VECREDUCE_FMIN;
  1563. case Intrinsic::vector_reduce_fmax:
  1564. return TargetOpcode::G_VECREDUCE_FMAX;
  1565. case Intrinsic::vector_reduce_add:
  1566. return TargetOpcode::G_VECREDUCE_ADD;
  1567. case Intrinsic::vector_reduce_mul:
  1568. return TargetOpcode::G_VECREDUCE_MUL;
  1569. case Intrinsic::vector_reduce_and:
  1570. return TargetOpcode::G_VECREDUCE_AND;
  1571. case Intrinsic::vector_reduce_or:
  1572. return TargetOpcode::G_VECREDUCE_OR;
  1573. case Intrinsic::vector_reduce_xor:
  1574. return TargetOpcode::G_VECREDUCE_XOR;
  1575. case Intrinsic::vector_reduce_smax:
  1576. return TargetOpcode::G_VECREDUCE_SMAX;
  1577. case Intrinsic::vector_reduce_smin:
  1578. return TargetOpcode::G_VECREDUCE_SMIN;
  1579. case Intrinsic::vector_reduce_umax:
  1580. return TargetOpcode::G_VECREDUCE_UMAX;
  1581. case Intrinsic::vector_reduce_umin:
  1582. return TargetOpcode::G_VECREDUCE_UMIN;
  1583. case Intrinsic::lround:
  1584. return TargetOpcode::G_LROUND;
  1585. case Intrinsic::llround:
  1586. return TargetOpcode::G_LLROUND;
  1587. }
  1588. return Intrinsic::not_intrinsic;
  1589. }
  1590. bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
  1591. Intrinsic::ID ID,
  1592. MachineIRBuilder &MIRBuilder) {
  1593. unsigned Op = getSimpleIntrinsicOpcode(ID);
  1594. // Is this a simple intrinsic?
  1595. if (Op == Intrinsic::not_intrinsic)
  1596. return false;
  1597. // Yes. Let's translate it.
  1598. SmallVector<llvm::SrcOp, 4> VRegs;
  1599. for (const auto &Arg : CI.args())
  1600. VRegs.push_back(getOrCreateVReg(*Arg));
  1601. MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
  1602. MachineInstr::copyFlagsFromInstruction(CI));
  1603. return true;
  1604. }
  1605. // TODO: Include ConstainedOps.def when all strict instructions are defined.
  1606. static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
  1607. switch (ID) {
  1608. case Intrinsic::experimental_constrained_fadd:
  1609. return TargetOpcode::G_STRICT_FADD;
  1610. case Intrinsic::experimental_constrained_fsub:
  1611. return TargetOpcode::G_STRICT_FSUB;
  1612. case Intrinsic::experimental_constrained_fmul:
  1613. return TargetOpcode::G_STRICT_FMUL;
  1614. case Intrinsic::experimental_constrained_fdiv:
  1615. return TargetOpcode::G_STRICT_FDIV;
  1616. case Intrinsic::experimental_constrained_frem:
  1617. return TargetOpcode::G_STRICT_FREM;
  1618. case Intrinsic::experimental_constrained_fma:
  1619. return TargetOpcode::G_STRICT_FMA;
  1620. case Intrinsic::experimental_constrained_sqrt:
  1621. return TargetOpcode::G_STRICT_FSQRT;
  1622. default:
  1623. return 0;
  1624. }
  1625. }
  1626. bool IRTranslator::translateConstrainedFPIntrinsic(
  1627. const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
  1628. fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();
  1629. unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
  1630. if (!Opcode)
  1631. return false;
  1632. unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
  1633. if (EB == fp::ExceptionBehavior::ebIgnore)
  1634. Flags |= MachineInstr::NoFPExcept;
  1635. SmallVector<llvm::SrcOp, 4> VRegs;
  1636. VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
  1637. if (!FPI.isUnaryOp())
  1638. VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
  1639. if (FPI.isTernaryOp())
  1640. VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));
  1641. MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
  1642. return true;
  1643. }
  1644. bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
  1645. MachineIRBuilder &MIRBuilder) {
  1646. if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
  1647. if (ORE->enabled()) {
  1648. if (MemoryOpRemark::canHandle(MI, *LibInfo)) {
  1649. MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo);
  1650. R.visit(MI);
  1651. }
  1652. }
  1653. }
  1654. // If this is a simple intrinsic (that is, we just need to add a def of
  1655. // a vreg, and uses for each arg operand, then translate it.
  1656. if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
  1657. return true;
  1658. switch (ID) {
  1659. default:
  1660. break;
  1661. case Intrinsic::lifetime_start:
  1662. case Intrinsic::lifetime_end: {
  1663. // No stack colouring in O0, discard region information.
  1664. if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
  1665. return true;
  1666. unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
  1667. : TargetOpcode::LIFETIME_END;
  1668. // Get the underlying objects for the location passed on the lifetime
  1669. // marker.
  1670. SmallVector<const Value *, 4> Allocas;
  1671. getUnderlyingObjects(CI.getArgOperand(1), Allocas);
  1672. // Iterate over each underlying object, creating lifetime markers for each
  1673. // static alloca. Quit if we find a non-static alloca.
  1674. for (const Value *V : Allocas) {
  1675. const AllocaInst *AI = dyn_cast<AllocaInst>(V);
  1676. if (!AI)
  1677. continue;
  1678. if (!AI->isStaticAlloca())
  1679. return true;
  1680. MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
  1681. }
  1682. return true;
  1683. }
  1684. case Intrinsic::dbg_declare: {
  1685. const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
  1686. assert(DI.getVariable() && "Missing variable");
  1687. const Value *Address = DI.getAddress();
  1688. if (!Address || isa<UndefValue>(Address)) {
  1689. LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
  1690. return true;
  1691. }
  1692. assert(DI.getVariable()->isValidLocationForIntrinsic(
  1693. MIRBuilder.getDebugLoc()) &&
  1694. "Expected inlined-at fields to agree");
  1695. auto AI = dyn_cast<AllocaInst>(Address);
  1696. if (AI && AI->isStaticAlloca()) {
  1697. // Static allocas are tracked at the MF level, no need for DBG_VALUE
  1698. // instructions (in fact, they get ignored if they *do* exist).
  1699. MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
  1700. getOrCreateFrameIndex(*AI), DI.getDebugLoc());
  1701. } else {
  1702. // A dbg.declare describes the address of a source variable, so lower it
  1703. // into an indirect DBG_VALUE.
  1704. MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
  1705. DI.getVariable(), DI.getExpression());
  1706. }
  1707. return true;
  1708. }
  1709. case Intrinsic::dbg_label: {
  1710. const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
  1711. assert(DI.getLabel() && "Missing label");
  1712. assert(DI.getLabel()->isValidLocationForIntrinsic(
  1713. MIRBuilder.getDebugLoc()) &&
  1714. "Expected inlined-at fields to agree");
  1715. MIRBuilder.buildDbgLabel(DI.getLabel());
  1716. return true;
  1717. }
  1718. case Intrinsic::vaend:
  1719. // No target I know of cares about va_end. Certainly no in-tree target
  1720. // does. Simplest intrinsic ever!
  1721. return true;
  1722. case Intrinsic::vastart: {
  1723. auto &TLI = *MF->getSubtarget().getTargetLowering();
  1724. Value *Ptr = CI.getArgOperand(0);
  1725. unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
  1726. // FIXME: Get alignment
  1727. MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
  1728. .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
  1729. MachineMemOperand::MOStore,
  1730. ListSize, Align(1)));
  1731. return true;
  1732. }
  1733. case Intrinsic::dbg_value: {
  1734. // This form of DBG_VALUE is target-independent.
  1735. const DbgValueInst &DI = cast<DbgValueInst>(CI);
  1736. const Value *V = DI.getValue();
  1737. assert(DI.getVariable()->isValidLocationForIntrinsic(
  1738. MIRBuilder.getDebugLoc()) &&
  1739. "Expected inlined-at fields to agree");
  1740. if (!V || DI.hasArgList()) {
  1741. // DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
  1742. // terminate any prior location.
  1743. MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
  1744. } else if (const auto *CI = dyn_cast<Constant>(V)) {
  1745. MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
  1746. } else {
  1747. for (Register Reg : getOrCreateVRegs(*V)) {
  1748. // FIXME: This does not handle register-indirect values at offset 0. The
  1749. // direct/indirect thing shouldn't really be handled by something as
  1750. // implicit as reg+noreg vs reg+imm in the first place, but it seems
  1751. // pretty baked in right now.
  1752. MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
  1753. }
  1754. }
  1755. return true;
  1756. }
  1757. case Intrinsic::uadd_with_overflow:
  1758. return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
  1759. case Intrinsic::sadd_with_overflow:
  1760. return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
  1761. case Intrinsic::usub_with_overflow:
  1762. return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
  1763. case Intrinsic::ssub_with_overflow:
  1764. return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
  1765. case Intrinsic::umul_with_overflow:
  1766. return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
  1767. case Intrinsic::smul_with_overflow:
  1768. return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
  1769. case Intrinsic::uadd_sat:
  1770. return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder);
  1771. case Intrinsic::sadd_sat:
  1772. return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder);
  1773. case Intrinsic::usub_sat:
  1774. return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
  1775. case Intrinsic::ssub_sat:
  1776. return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
  1777. case Intrinsic::ushl_sat:
  1778. return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
  1779. case Intrinsic::sshl_sat:
  1780. return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
  1781. case Intrinsic::umin:
  1782. return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
  1783. case Intrinsic::umax:
  1784. return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
  1785. case Intrinsic::smin:
  1786. return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
  1787. case Intrinsic::smax:
  1788. return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
  1789. case Intrinsic::abs:
  1790. // TODO: Preserve "int min is poison" arg in GMIR?
  1791. return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
  1792. case Intrinsic::smul_fix:
  1793. return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
  1794. case Intrinsic::umul_fix:
  1795. return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
  1796. case Intrinsic::smul_fix_sat:
  1797. return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
  1798. case Intrinsic::umul_fix_sat:
  1799. return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
  1800. case Intrinsic::sdiv_fix:
  1801. return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
  1802. case Intrinsic::udiv_fix:
  1803. return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
  1804. case Intrinsic::sdiv_fix_sat:
  1805. return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
  1806. case Intrinsic::udiv_fix_sat:
  1807. return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
  1808. case Intrinsic::fmuladd: {
  1809. const TargetMachine &TM = MF->getTarget();
  1810. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  1811. Register Dst = getOrCreateVReg(CI);
  1812. Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
  1813. Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
  1814. Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
  1815. if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
  1816. TLI.isFMAFasterThanFMulAndFAdd(*MF,
  1817. TLI.getValueType(*DL, CI.getType()))) {
  1818. // TODO: Revisit this to see if we should move this part of the
  1819. // lowering to the combiner.
  1820. MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
  1821. MachineInstr::copyFlagsFromInstruction(CI));
  1822. } else {
  1823. LLT Ty = getLLTForType(*CI.getType(), *DL);
  1824. auto FMul = MIRBuilder.buildFMul(
  1825. Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI));
  1826. MIRBuilder.buildFAdd(Dst, FMul, Op2,
  1827. MachineInstr::copyFlagsFromInstruction(CI));
  1828. }
  1829. return true;
  1830. }
  1831. case Intrinsic::convert_from_fp16:
  1832. // FIXME: This intrinsic should probably be removed from the IR.
  1833. MIRBuilder.buildFPExt(getOrCreateVReg(CI),
  1834. getOrCreateVReg(*CI.getArgOperand(0)),
  1835. MachineInstr::copyFlagsFromInstruction(CI));
  1836. return true;
  1837. case Intrinsic::convert_to_fp16:
  1838. // FIXME: This intrinsic should probably be removed from the IR.
  1839. MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
  1840. getOrCreateVReg(*CI.getArgOperand(0)),
  1841. MachineInstr::copyFlagsFromInstruction(CI));
  1842. return true;
  1843. case Intrinsic::memcpy_inline:
  1844. return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
  1845. case Intrinsic::memcpy:
  1846. return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
  1847. case Intrinsic::memmove:
  1848. return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
  1849. case Intrinsic::memset:
  1850. return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
  1851. case Intrinsic::eh_typeid_for: {
  1852. GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
  1853. Register Reg = getOrCreateVReg(CI);
  1854. unsigned TypeID = MF->getTypeIDFor(GV);
  1855. MIRBuilder.buildConstant(Reg, TypeID);
  1856. return true;
  1857. }
  1858. case Intrinsic::objectsize:
  1859. llvm_unreachable("llvm.objectsize.* should have been lowered already");
  1860. case Intrinsic::is_constant:
  1861. llvm_unreachable("llvm.is.constant.* should have been lowered already");
  1862. case Intrinsic::stackguard:
  1863. getStackGuard(getOrCreateVReg(CI), MIRBuilder);
  1864. return true;
  1865. case Intrinsic::stackprotector: {
  1866. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  1867. LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
  1868. Register GuardVal;
  1869. if (TLI.useLoadStackGuardNode()) {
  1870. GuardVal = MRI->createGenericVirtualRegister(PtrTy);
  1871. getStackGuard(GuardVal, MIRBuilder);
  1872. } else
  1873. GuardVal = getOrCreateVReg(*CI.getArgOperand(0)); // The guard's value.
  1874. AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
  1875. int FI = getOrCreateFrameIndex(*Slot);
  1876. MF->getFrameInfo().setStackProtectorIndex(FI);
  1877. MIRBuilder.buildStore(
  1878. GuardVal, getOrCreateVReg(*Slot),
  1879. *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
  1880. MachineMemOperand::MOStore |
  1881. MachineMemOperand::MOVolatile,
  1882. PtrTy, Align(8)));
  1883. return true;
  1884. }
  1885. case Intrinsic::stacksave: {
  1886. // Save the stack pointer to the location provided by the intrinsic.
  1887. Register Reg = getOrCreateVReg(CI);
  1888. Register StackPtr = MF->getSubtarget()
  1889. .getTargetLowering()
  1890. ->getStackPointerRegisterToSaveRestore();
  1891. // If the target doesn't specify a stack pointer, then fall back.
  1892. if (!StackPtr)
  1893. return false;
  1894. MIRBuilder.buildCopy(Reg, StackPtr);
  1895. return true;
  1896. }
  1897. case Intrinsic::stackrestore: {
  1898. // Restore the stack pointer from the location provided by the intrinsic.
  1899. Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
  1900. Register StackPtr = MF->getSubtarget()
  1901. .getTargetLowering()
  1902. ->getStackPointerRegisterToSaveRestore();
  1903. // If the target doesn't specify a stack pointer, then fall back.
  1904. if (!StackPtr)
  1905. return false;
  1906. MIRBuilder.buildCopy(StackPtr, Reg);
  1907. return true;
  1908. }
  1909. case Intrinsic::cttz:
  1910. case Intrinsic::ctlz: {
  1911. ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
  1912. bool isTrailing = ID == Intrinsic::cttz;
  1913. unsigned Opcode = isTrailing
  1914. ? Cst->isZero() ? TargetOpcode::G_CTTZ
  1915. : TargetOpcode::G_CTTZ_ZERO_UNDEF
  1916. : Cst->isZero() ? TargetOpcode::G_CTLZ
  1917. : TargetOpcode::G_CTLZ_ZERO_UNDEF;
  1918. MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
  1919. {getOrCreateVReg(*CI.getArgOperand(0))});
  1920. return true;
  1921. }
  1922. case Intrinsic::invariant_start: {
  1923. LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
  1924. Register Undef = MRI->createGenericVirtualRegister(PtrTy);
  1925. MIRBuilder.buildUndef(Undef);
  1926. return true;
  1927. }
  1928. case Intrinsic::invariant_end:
  1929. return true;
  1930. case Intrinsic::expect:
  1931. case Intrinsic::annotation:
  1932. case Intrinsic::ptr_annotation:
  1933. case Intrinsic::launder_invariant_group:
  1934. case Intrinsic::strip_invariant_group: {
  1935. // Drop the intrinsic, but forward the value.
  1936. MIRBuilder.buildCopy(getOrCreateVReg(CI),
  1937. getOrCreateVReg(*CI.getArgOperand(0)));
  1938. return true;
  1939. }
  1940. case Intrinsic::assume:
  1941. case Intrinsic::experimental_noalias_scope_decl:
  1942. case Intrinsic::var_annotation:
  1943. case Intrinsic::sideeffect:
  1944. // Discard annotate attributes, assumptions, and artificial side-effects.
  1945. return true;
  1946. case Intrinsic::read_volatile_register:
  1947. case Intrinsic::read_register: {
  1948. Value *Arg = CI.getArgOperand(0);
  1949. MIRBuilder
  1950. .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {})
  1951. .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
  1952. return true;
  1953. }
  1954. case Intrinsic::write_register: {
  1955. Value *Arg = CI.getArgOperand(0);
  1956. MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
  1957. .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
  1958. .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
  1959. return true;
  1960. }
  1961. case Intrinsic::localescape: {
  1962. MachineBasicBlock &EntryMBB = MF->front();
  1963. StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
  1964. // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
  1965. // is the same on all targets.
  1966. for (unsigned Idx = 0, E = CI.arg_size(); Idx < E; ++Idx) {
  1967. Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
  1968. if (isa<ConstantPointerNull>(Arg))
  1969. continue; // Skip null pointers. They represent a hole in index space.
  1970. int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
  1971. MCSymbol *FrameAllocSym =
  1972. MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
  1973. Idx);
  1974. // This should be inserted at the start of the entry block.
  1975. auto LocalEscape =
  1976. MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
  1977. .addSym(FrameAllocSym)
  1978. .addFrameIndex(FI);
  1979. EntryMBB.insert(EntryMBB.begin(), LocalEscape);
  1980. }
  1981. return true;
  1982. }
  1983. case Intrinsic::vector_reduce_fadd:
  1984. case Intrinsic::vector_reduce_fmul: {
  1985. // Need to check for the reassoc flag to decide whether we want a
  1986. // sequential reduction opcode or not.
  1987. Register Dst = getOrCreateVReg(CI);
  1988. Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0));
  1989. Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1));
  1990. unsigned Opc = 0;
  1991. if (!CI.hasAllowReassoc()) {
  1992. // The sequential ordering case.
  1993. Opc = ID == Intrinsic::vector_reduce_fadd
  1994. ? TargetOpcode::G_VECREDUCE_SEQ_FADD
  1995. : TargetOpcode::G_VECREDUCE_SEQ_FMUL;
  1996. MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
  1997. MachineInstr::copyFlagsFromInstruction(CI));
  1998. return true;
  1999. }
  2000. // We split the operation into a separate G_FADD/G_FMUL + the reduce,
  2001. // since the associativity doesn't matter.
  2002. unsigned ScalarOpc;
  2003. if (ID == Intrinsic::vector_reduce_fadd) {
  2004. Opc = TargetOpcode::G_VECREDUCE_FADD;
  2005. ScalarOpc = TargetOpcode::G_FADD;
  2006. } else {
  2007. Opc = TargetOpcode::G_VECREDUCE_FMUL;
  2008. ScalarOpc = TargetOpcode::G_FMUL;
  2009. }
  2010. LLT DstTy = MRI->getType(Dst);
  2011. auto Rdx = MIRBuilder.buildInstr(
  2012. Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI));
  2013. MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx},
  2014. MachineInstr::copyFlagsFromInstruction(CI));
  2015. return true;
  2016. }
  2017. case Intrinsic::trap:
  2018. case Intrinsic::debugtrap:
  2019. case Intrinsic::ubsantrap: {
  2020. StringRef TrapFuncName =
  2021. CI.getAttributes().getFnAttr("trap-func-name").getValueAsString();
  2022. if (TrapFuncName.empty())
  2023. break; // Use the default handling.
  2024. CallLowering::CallLoweringInfo Info;
  2025. if (ID == Intrinsic::ubsantrap) {
  2026. Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)),
  2027. CI.getArgOperand(0)->getType(), 0});
  2028. }
  2029. Info.Callee = MachineOperand::CreateES(TrapFuncName.data());
  2030. Info.CB = &CI;
  2031. Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
  2032. return CLI->lowerCall(MIRBuilder, Info);
  2033. }
  2034. case Intrinsic::fptrunc_round: {
  2035. unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
  2036. // Convert the metadata argument to a constant integer
  2037. Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
  2038. std::optional<RoundingMode> RoundMode =
  2039. convertStrToRoundingMode(cast<MDString>(MD)->getString());
  2040. // Add the Rounding mode as an integer
  2041. MIRBuilder
  2042. .buildInstr(TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND,
  2043. {getOrCreateVReg(CI)},
  2044. {getOrCreateVReg(*CI.getArgOperand(0))}, Flags)
  2045. .addImm((int)*RoundMode);
  2046. return true;
  2047. }
  2048. case Intrinsic::is_fpclass: {
  2049. Value *FpValue = CI.getOperand(0);
  2050. ConstantInt *TestMaskValue = cast<ConstantInt>(CI.getOperand(1));
  2051. MIRBuilder
  2052. .buildInstr(TargetOpcode::G_IS_FPCLASS, {getOrCreateVReg(CI)},
  2053. {getOrCreateVReg(*FpValue)})
  2054. .addImm(TestMaskValue->getZExtValue());
  2055. return true;
  2056. }
  2057. #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
  2058. case Intrinsic::INTRINSIC:
  2059. #include "llvm/IR/ConstrainedOps.def"
  2060. return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
  2061. MIRBuilder);
  2062. }
  2063. return false;
  2064. }
  2065. bool IRTranslator::translateInlineAsm(const CallBase &CB,
  2066. MachineIRBuilder &MIRBuilder) {
  2067. const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
  2068. if (!ALI) {
  2069. LLVM_DEBUG(
  2070. dbgs() << "Inline asm lowering is not supported for this target yet\n");
  2071. return false;
  2072. }
  2073. return ALI->lowerInlineAsm(
  2074. MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); });
  2075. }
  2076. bool IRTranslator::translateCallBase(const CallBase &CB,
  2077. MachineIRBuilder &MIRBuilder) {
  2078. ArrayRef<Register> Res = getOrCreateVRegs(CB);
  2079. SmallVector<ArrayRef<Register>, 8> Args;
  2080. Register SwiftInVReg = 0;
  2081. Register SwiftErrorVReg = 0;
  2082. for (const auto &Arg : CB.args()) {
  2083. if (CLI->supportSwiftError() && isSwiftError(Arg)) {
  2084. assert(SwiftInVReg == 0 && "Expected only one swift error argument");
  2085. LLT Ty = getLLTForType(*Arg->getType(), *DL);
  2086. SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
  2087. MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
  2088. &CB, &MIRBuilder.getMBB(), Arg));
  2089. Args.emplace_back(ArrayRef(SwiftInVReg));
  2090. SwiftErrorVReg =
  2091. SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
  2092. continue;
  2093. }
  2094. Args.push_back(getOrCreateVRegs(*Arg));
  2095. }
  2096. if (auto *CI = dyn_cast<CallInst>(&CB)) {
  2097. if (ORE->enabled()) {
  2098. if (MemoryOpRemark::canHandle(CI, *LibInfo)) {
  2099. MemoryOpRemark R(*ORE, "gisel-irtranslator-memsize", *DL, *LibInfo);
  2100. R.visit(CI);
  2101. }
  2102. }
  2103. }
  2104. // We don't set HasCalls on MFI here yet because call lowering may decide to
  2105. // optimize into tail calls. Instead, we defer that to selection where a final
  2106. // scan is done to check if any instructions are calls.
  2107. bool Success =
  2108. CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
  2109. [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
  2110. // Check if we just inserted a tail call.
  2111. if (Success) {
  2112. assert(!HasTailCall && "Can't tail call return twice from block?");
  2113. const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  2114. HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
  2115. }
  2116. return Success;
  2117. }
  2118. bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
  2119. const CallInst &CI = cast<CallInst>(U);
  2120. auto TII = MF->getTarget().getIntrinsicInfo();
  2121. const Function *F = CI.getCalledFunction();
  2122. // FIXME: support Windows dllimport function calls.
  2123. if (F && (F->hasDLLImportStorageClass() ||
  2124. (MF->getTarget().getTargetTriple().isOSWindows() &&
  2125. F->hasExternalWeakLinkage())))
  2126. return false;
  2127. // FIXME: support control flow guard targets.
  2128. if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
  2129. return false;
  2130. // FIXME: support statepoints and related.
  2131. if (isa<GCStatepointInst, GCRelocateInst, GCResultInst>(U))
  2132. return false;
  2133. if (CI.isInlineAsm())
  2134. return translateInlineAsm(CI, MIRBuilder);
  2135. diagnoseDontCall(CI);
  2136. Intrinsic::ID ID = Intrinsic::not_intrinsic;
  2137. if (F && F->isIntrinsic()) {
  2138. ID = F->getIntrinsicID();
  2139. if (TII && ID == Intrinsic::not_intrinsic)
  2140. ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
  2141. }
  2142. if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
  2143. return translateCallBase(CI, MIRBuilder);
  2144. assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
  2145. if (translateKnownIntrinsic(CI, ID, MIRBuilder))
  2146. return true;
  2147. ArrayRef<Register> ResultRegs;
  2148. if (!CI.getType()->isVoidTy())
  2149. ResultRegs = getOrCreateVRegs(CI);
  2150. // Ignore the callsite attributes. Backend code is most likely not expecting
  2151. // an intrinsic to sometimes have side effects and sometimes not.
  2152. MachineInstrBuilder MIB =
  2153. MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
  2154. if (isa<FPMathOperator>(CI))
  2155. MIB->copyIRFlags(CI);
  2156. for (const auto &Arg : enumerate(CI.args())) {
  2157. // If this is required to be an immediate, don't materialize it in a
  2158. // register.
  2159. if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
  2160. if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
  2161. // imm arguments are more convenient than cimm (and realistically
  2162. // probably sufficient), so use them.
  2163. assert(CI->getBitWidth() <= 64 &&
  2164. "large intrinsic immediates not handled");
  2165. MIB.addImm(CI->getSExtValue());
  2166. } else {
  2167. MIB.addFPImm(cast<ConstantFP>(Arg.value()));
  2168. }
  2169. } else if (auto *MDVal = dyn_cast<MetadataAsValue>(Arg.value())) {
  2170. auto *MD = MDVal->getMetadata();
  2171. auto *MDN = dyn_cast<MDNode>(MD);
  2172. if (!MDN) {
  2173. if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(MD))
  2174. MDN = MDNode::get(MF->getFunction().getContext(), ConstMD);
  2175. else // This was probably an MDString.
  2176. return false;
  2177. }
  2178. MIB.addMetadata(MDN);
  2179. } else {
  2180. ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
  2181. if (VRegs.size() > 1)
  2182. return false;
  2183. MIB.addUse(VRegs[0]);
  2184. }
  2185. }
  2186. // Add a MachineMemOperand if it is a target mem intrinsic.
  2187. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  2188. TargetLowering::IntrinsicInfo Info;
  2189. // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
  2190. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
  2191. Align Alignment = Info.align.value_or(
  2192. DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
  2193. LLT MemTy = Info.memVT.isSimple()
  2194. ? getLLTForMVT(Info.memVT.getSimpleVT())
  2195. : LLT::scalar(Info.memVT.getStoreSizeInBits());
  2196. // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic
  2197. // didn't yield anything useful.
  2198. MachinePointerInfo MPI;
  2199. if (Info.ptrVal)
  2200. MPI = MachinePointerInfo(Info.ptrVal, Info.offset);
  2201. else if (Info.fallbackAddressSpace)
  2202. MPI = MachinePointerInfo(*Info.fallbackAddressSpace);
  2203. MIB.addMemOperand(
  2204. MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata()));
  2205. }
  2206. return true;
  2207. }
  2208. bool IRTranslator::findUnwindDestinations(
  2209. const BasicBlock *EHPadBB,
  2210. BranchProbability Prob,
  2211. SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
  2212. &UnwindDests) {
  2213. EHPersonality Personality = classifyEHPersonality(
  2214. EHPadBB->getParent()->getFunction().getPersonalityFn());
  2215. bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
  2216. bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
  2217. bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
  2218. bool IsSEH = isAsynchronousEHPersonality(Personality);
  2219. if (IsWasmCXX) {
  2220. // Ignore this for now.
  2221. return false;
  2222. }
  2223. while (EHPadBB) {
  2224. const Instruction *Pad = EHPadBB->getFirstNonPHI();
  2225. BasicBlock *NewEHPadBB = nullptr;
  2226. if (isa<LandingPadInst>(Pad)) {
  2227. // Stop on landingpads. They are not funclets.
  2228. UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
  2229. break;
  2230. }
  2231. if (isa<CleanupPadInst>(Pad)) {
  2232. // Stop on cleanup pads. Cleanups are always funclet entries for all known
  2233. // personalities.
  2234. UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob);
  2235. UnwindDests.back().first->setIsEHScopeEntry();
  2236. UnwindDests.back().first->setIsEHFuncletEntry();
  2237. break;
  2238. }
  2239. if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
  2240. // Add the catchpad handlers to the possible destinations.
  2241. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
  2242. UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob);
  2243. // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
  2244. if (IsMSVCCXX || IsCoreCLR)
  2245. UnwindDests.back().first->setIsEHFuncletEntry();
  2246. if (!IsSEH)
  2247. UnwindDests.back().first->setIsEHScopeEntry();
  2248. }
  2249. NewEHPadBB = CatchSwitch->getUnwindDest();
  2250. } else {
  2251. continue;
  2252. }
  2253. BranchProbabilityInfo *BPI = FuncInfo.BPI;
  2254. if (BPI && NewEHPadBB)
  2255. Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
  2256. EHPadBB = NewEHPadBB;
  2257. }
  2258. return true;
  2259. }
  2260. bool IRTranslator::translateInvoke(const User &U,
  2261. MachineIRBuilder &MIRBuilder) {
  2262. const InvokeInst &I = cast<InvokeInst>(U);
  2263. MCContext &Context = MF->getContext();
  2264. const BasicBlock *ReturnBB = I.getSuccessor(0);
  2265. const BasicBlock *EHPadBB = I.getSuccessor(1);
  2266. const Function *Fn = I.getCalledFunction();
  2267. // FIXME: support invoking patchpoint and statepoint intrinsics.
  2268. if (Fn && Fn->isIntrinsic())
  2269. return false;
  2270. // FIXME: support whatever these are.
  2271. if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
  2272. return false;
  2273. // FIXME: support control flow guard targets.
  2274. if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
  2275. return false;
  2276. // FIXME: support Windows exception handling.
  2277. if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
  2278. return false;
  2279. bool LowerInlineAsm = I.isInlineAsm();
  2280. bool NeedEHLabel = true;
  2281. // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
  2282. // the region covered by the try.
  2283. MCSymbol *BeginSymbol = nullptr;
  2284. if (NeedEHLabel) {
  2285. MIRBuilder.buildInstr(TargetOpcode::G_INVOKE_REGION_START);
  2286. BeginSymbol = Context.createTempSymbol();
  2287. MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
  2288. }
  2289. if (LowerInlineAsm) {
  2290. if (!translateInlineAsm(I, MIRBuilder))
  2291. return false;
  2292. } else if (!translateCallBase(I, MIRBuilder))
  2293. return false;
  2294. MCSymbol *EndSymbol = nullptr;
  2295. if (NeedEHLabel) {
  2296. EndSymbol = Context.createTempSymbol();
  2297. MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
  2298. }
  2299. SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
  2300. BranchProbabilityInfo *BPI = FuncInfo.BPI;
  2301. MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB();
  2302. BranchProbability EHPadBBProb =
  2303. BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
  2304. : BranchProbability::getZero();
  2305. if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests))
  2306. return false;
  2307. MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
  2308. &ReturnMBB = getMBB(*ReturnBB);
  2309. // Update successor info.
  2310. addSuccessorWithProb(InvokeMBB, &ReturnMBB);
  2311. for (auto &UnwindDest : UnwindDests) {
  2312. UnwindDest.first->setIsEHPad();
  2313. addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
  2314. }
  2315. InvokeMBB->normalizeSuccProbs();
  2316. if (NeedEHLabel) {
  2317. assert(BeginSymbol && "Expected a begin symbol!");
  2318. assert(EndSymbol && "Expected an end symbol!");
  2319. MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
  2320. }
  2321. MIRBuilder.buildBr(ReturnMBB);
  2322. return true;
  2323. }
  2324. bool IRTranslator::translateCallBr(const User &U,
  2325. MachineIRBuilder &MIRBuilder) {
  2326. // FIXME: Implement this.
  2327. return false;
  2328. }
  2329. bool IRTranslator::translateLandingPad(const User &U,
  2330. MachineIRBuilder &MIRBuilder) {
  2331. const LandingPadInst &LP = cast<LandingPadInst>(U);
  2332. MachineBasicBlock &MBB = MIRBuilder.getMBB();
  2333. MBB.setIsEHPad();
  2334. // If there aren't registers to copy the values into (e.g., during SjLj
  2335. // exceptions), then don't bother.
  2336. auto &TLI = *MF->getSubtarget().getTargetLowering();
  2337. const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
  2338. if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
  2339. TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
  2340. return true;
  2341. // If landingpad's return type is token type, we don't create DAG nodes
  2342. // for its exception pointer and selector value. The extraction of exception
  2343. // pointer or selector value from token type landingpads is not currently
  2344. // supported.
  2345. if (LP.getType()->isTokenTy())
  2346. return true;
  2347. // Add a label to mark the beginning of the landing pad. Deletion of the
  2348. // landing pad can thus be detected via the MachineModuleInfo.
  2349. MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
  2350. .addSym(MF->addLandingPad(&MBB));
  2351. // If the unwinder does not preserve all registers, ensure that the
  2352. // function marks the clobbered registers as used.
  2353. const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
  2354. if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
  2355. MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
  2356. LLT Ty = getLLTForType(*LP.getType(), *DL);
  2357. Register Undef = MRI->createGenericVirtualRegister(Ty);
  2358. MIRBuilder.buildUndef(Undef);
  2359. SmallVector<LLT, 2> Tys;
  2360. for (Type *Ty : cast<StructType>(LP.getType())->elements())
  2361. Tys.push_back(getLLTForType(*Ty, *DL));
  2362. assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
  2363. // Mark exception register as live in.
  2364. Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
  2365. if (!ExceptionReg)
  2366. return false;
  2367. MBB.addLiveIn(ExceptionReg);
  2368. ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
  2369. MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
  2370. Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
  2371. if (!SelectorReg)
  2372. return false;
  2373. MBB.addLiveIn(SelectorReg);
  2374. Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
  2375. MIRBuilder.buildCopy(PtrVReg, SelectorReg);
  2376. MIRBuilder.buildCast(ResRegs[1], PtrVReg);
  2377. return true;
  2378. }
  2379. bool IRTranslator::translateAlloca(const User &U,
  2380. MachineIRBuilder &MIRBuilder) {
  2381. auto &AI = cast<AllocaInst>(U);
  2382. if (AI.isSwiftError())
  2383. return true;
  2384. if (AI.isStaticAlloca()) {
  2385. Register Res = getOrCreateVReg(AI);
  2386. int FI = getOrCreateFrameIndex(AI);
  2387. MIRBuilder.buildFrameIndex(Res, FI);
  2388. return true;
  2389. }
  2390. // FIXME: support stack probing for Windows.
  2391. if (MF->getTarget().getTargetTriple().isOSWindows())
  2392. return false;
  2393. // Now we're in the harder dynamic case.
  2394. Register NumElts = getOrCreateVReg(*AI.getArraySize());
  2395. Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
  2396. LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
  2397. if (MRI->getType(NumElts) != IntPtrTy) {
  2398. Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
  2399. MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
  2400. NumElts = ExtElts;
  2401. }
  2402. Type *Ty = AI.getAllocatedType();
  2403. Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
  2404. Register TySize =
  2405. getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
  2406. MIRBuilder.buildMul(AllocSize, NumElts, TySize);
  2407. // Round the size of the allocation up to the stack alignment size
  2408. // by add SA-1 to the size. This doesn't overflow because we're computing
  2409. // an address inside an alloca.
  2410. Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
  2411. auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
  2412. auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
  2413. MachineInstr::NoUWrap);
  2414. auto AlignCst =
  2415. MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
  2416. auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
  2417. Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
  2418. if (Alignment <= StackAlign)
  2419. Alignment = Align(1);
  2420. MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
  2421. MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
  2422. assert(MF->getFrameInfo().hasVarSizedObjects());
  2423. return true;
  2424. }
  2425. bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
  2426. // FIXME: We may need more info about the type. Because of how LLT works,
  2427. // we're completely discarding the i64/double distinction here (amongst
  2428. // others). Fortunately the ABIs I know of where that matters don't use va_arg
  2429. // anyway but that's not guaranteed.
  2430. MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)},
  2431. {getOrCreateVReg(*U.getOperand(0)),
  2432. DL->getABITypeAlign(U.getType()).value()});
  2433. return true;
  2434. }
  2435. bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
  2436. if (!MF->getTarget().Options.TrapUnreachable)
  2437. return true;
  2438. auto &UI = cast<UnreachableInst>(U);
  2439. // We may be able to ignore unreachable behind a noreturn call.
  2440. if (MF->getTarget().Options.NoTrapAfterNoreturn) {
  2441. const BasicBlock &BB = *UI.getParent();
  2442. if (&UI != &BB.front()) {
  2443. BasicBlock::const_iterator PredI =
  2444. std::prev(BasicBlock::const_iterator(UI));
  2445. if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
  2446. if (Call->doesNotReturn())
  2447. return true;
  2448. }
  2449. }
  2450. }
  2451. MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
  2452. return true;
  2453. }
  2454. bool IRTranslator::translateInsertElement(const User &U,
  2455. MachineIRBuilder &MIRBuilder) {
  2456. // If it is a <1 x Ty> vector, use the scalar as it is
  2457. // not a legal vector type in LLT.
  2458. if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
  2459. return translateCopy(U, *U.getOperand(1), MIRBuilder);
  2460. Register Res = getOrCreateVReg(U);
  2461. Register Val = getOrCreateVReg(*U.getOperand(0));
  2462. Register Elt = getOrCreateVReg(*U.getOperand(1));
  2463. Register Idx = getOrCreateVReg(*U.getOperand(2));
  2464. MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
  2465. return true;
  2466. }
  2467. bool IRTranslator::translateExtractElement(const User &U,
  2468. MachineIRBuilder &MIRBuilder) {
  2469. // If it is a <1 x Ty> vector, use the scalar as it is
  2470. // not a legal vector type in LLT.
  2471. if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
  2472. return translateCopy(U, *U.getOperand(0), MIRBuilder);
  2473. Register Res = getOrCreateVReg(U);
  2474. Register Val = getOrCreateVReg(*U.getOperand(0));
  2475. const auto &TLI = *MF->getSubtarget().getTargetLowering();
  2476. unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
  2477. Register Idx;
  2478. if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
  2479. if (CI->getBitWidth() != PreferredVecIdxWidth) {
  2480. APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
  2481. auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
  2482. Idx = getOrCreateVReg(*NewIdxCI);
  2483. }
  2484. }
  2485. if (!Idx)
  2486. Idx = getOrCreateVReg(*U.getOperand(1));
  2487. if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
  2488. const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
  2489. Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0);
  2490. }
  2491. MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
  2492. return true;
  2493. }
  2494. bool IRTranslator::translateShuffleVector(const User &U,
  2495. MachineIRBuilder &MIRBuilder) {
  2496. ArrayRef<int> Mask;
  2497. if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
  2498. Mask = SVI->getShuffleMask();
  2499. else
  2500. Mask = cast<ConstantExpr>(U).getShuffleMask();
  2501. ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
  2502. MIRBuilder
  2503. .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
  2504. {getOrCreateVReg(*U.getOperand(0)),
  2505. getOrCreateVReg(*U.getOperand(1))})
  2506. .addShuffleMask(MaskAlloc);
  2507. return true;
  2508. }
  2509. bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
  2510. const PHINode &PI = cast<PHINode>(U);
  2511. SmallVector<MachineInstr *, 4> Insts;
  2512. for (auto Reg : getOrCreateVRegs(PI)) {
  2513. auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
  2514. Insts.push_back(MIB.getInstr());
  2515. }
  2516. PendingPHIs.emplace_back(&PI, std::move(Insts));
  2517. return true;
  2518. }
  2519. bool IRTranslator::translateAtomicCmpXchg(const User &U,
  2520. MachineIRBuilder &MIRBuilder) {
  2521. const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
  2522. auto &TLI = *MF->getSubtarget().getTargetLowering();
  2523. auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
  2524. auto Res = getOrCreateVRegs(I);
  2525. Register OldValRes = Res[0];
  2526. Register SuccessRes = Res[1];
  2527. Register Addr = getOrCreateVReg(*I.getPointerOperand());
  2528. Register Cmp = getOrCreateVReg(*I.getCompareOperand());
  2529. Register NewVal = getOrCreateVReg(*I.getNewValOperand());
  2530. MIRBuilder.buildAtomicCmpXchgWithSuccess(
  2531. OldValRes, SuccessRes, Addr, Cmp, NewVal,
  2532. *MF->getMachineMemOperand(
  2533. MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp),
  2534. getMemOpAlign(I), I.getAAMetadata(), nullptr, I.getSyncScopeID(),
  2535. I.getSuccessOrdering(), I.getFailureOrdering()));
  2536. return true;
  2537. }
  2538. bool IRTranslator::translateAtomicRMW(const User &U,
  2539. MachineIRBuilder &MIRBuilder) {
  2540. const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
  2541. auto &TLI = *MF->getSubtarget().getTargetLowering();
  2542. auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
  2543. Register Res = getOrCreateVReg(I);
  2544. Register Addr = getOrCreateVReg(*I.getPointerOperand());
  2545. Register Val = getOrCreateVReg(*I.getValOperand());
  2546. unsigned Opcode = 0;
  2547. switch (I.getOperation()) {
  2548. default:
  2549. return false;
  2550. case AtomicRMWInst::Xchg:
  2551. Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
  2552. break;
  2553. case AtomicRMWInst::Add:
  2554. Opcode = TargetOpcode::G_ATOMICRMW_ADD;
  2555. break;
  2556. case AtomicRMWInst::Sub:
  2557. Opcode = TargetOpcode::G_ATOMICRMW_SUB;
  2558. break;
  2559. case AtomicRMWInst::And:
  2560. Opcode = TargetOpcode::G_ATOMICRMW_AND;
  2561. break;
  2562. case AtomicRMWInst::Nand:
  2563. Opcode = TargetOpcode::G_ATOMICRMW_NAND;
  2564. break;
  2565. case AtomicRMWInst::Or:
  2566. Opcode = TargetOpcode::G_ATOMICRMW_OR;
  2567. break;
  2568. case AtomicRMWInst::Xor:
  2569. Opcode = TargetOpcode::G_ATOMICRMW_XOR;
  2570. break;
  2571. case AtomicRMWInst::Max:
  2572. Opcode = TargetOpcode::G_ATOMICRMW_MAX;
  2573. break;
  2574. case AtomicRMWInst::Min:
  2575. Opcode = TargetOpcode::G_ATOMICRMW_MIN;
  2576. break;
  2577. case AtomicRMWInst::UMax:
  2578. Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
  2579. break;
  2580. case AtomicRMWInst::UMin:
  2581. Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
  2582. break;
  2583. case AtomicRMWInst::FAdd:
  2584. Opcode = TargetOpcode::G_ATOMICRMW_FADD;
  2585. break;
  2586. case AtomicRMWInst::FSub:
  2587. Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
  2588. break;
  2589. case AtomicRMWInst::FMax:
  2590. Opcode = TargetOpcode::G_ATOMICRMW_FMAX;
  2591. break;
  2592. case AtomicRMWInst::FMin:
  2593. Opcode = TargetOpcode::G_ATOMICRMW_FMIN;
  2594. break;
  2595. case AtomicRMWInst::UIncWrap:
  2596. Opcode = TargetOpcode::G_ATOMICRMW_UINC_WRAP;
  2597. break;
  2598. case AtomicRMWInst::UDecWrap:
  2599. Opcode = TargetOpcode::G_ATOMICRMW_UDEC_WRAP;
  2600. break;
  2601. }
  2602. MIRBuilder.buildAtomicRMW(
  2603. Opcode, Res, Addr, Val,
  2604. *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
  2605. Flags, MRI->getType(Val), getMemOpAlign(I),
  2606. I.getAAMetadata(), nullptr, I.getSyncScopeID(),
  2607. I.getOrdering()));
  2608. return true;
  2609. }
  2610. bool IRTranslator::translateFence(const User &U,
  2611. MachineIRBuilder &MIRBuilder) {
  2612. const FenceInst &Fence = cast<FenceInst>(U);
  2613. MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
  2614. Fence.getSyncScopeID());
  2615. return true;
  2616. }
  2617. bool IRTranslator::translateFreeze(const User &U,
  2618. MachineIRBuilder &MIRBuilder) {
  2619. const ArrayRef<Register> DstRegs = getOrCreateVRegs(U);
  2620. const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0));
  2621. assert(DstRegs.size() == SrcRegs.size() &&
  2622. "Freeze with different source and destination type?");
  2623. for (unsigned I = 0; I < DstRegs.size(); ++I) {
  2624. MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]);
  2625. }
  2626. return true;
  2627. }
  2628. void IRTranslator::finishPendingPhis() {
  2629. #ifndef NDEBUG
  2630. DILocationVerifier Verifier;
  2631. GISelObserverWrapper WrapperObserver(&Verifier);
  2632. RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
  2633. #endif // ifndef NDEBUG
  2634. for (auto &Phi : PendingPHIs) {
  2635. const PHINode *PI = Phi.first;
  2636. ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
  2637. MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
  2638. EntryBuilder->setDebugLoc(PI->getDebugLoc());
  2639. #ifndef NDEBUG
  2640. Verifier.setCurrentInst(PI);
  2641. #endif // ifndef NDEBUG
  2642. SmallSet<const MachineBasicBlock *, 16> SeenPreds;
  2643. for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
  2644. auto IRPred = PI->getIncomingBlock(i);
  2645. ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
  2646. for (auto *Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
  2647. if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
  2648. continue;
  2649. SeenPreds.insert(Pred);
  2650. for (unsigned j = 0; j < ValRegs.size(); ++j) {
  2651. MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
  2652. MIB.addUse(ValRegs[j]);
  2653. MIB.addMBB(Pred);
  2654. }
  2655. }
  2656. }
  2657. }
  2658. }
  2659. bool IRTranslator::translate(const Instruction &Inst) {
  2660. CurBuilder->setDebugLoc(Inst.getDebugLoc());
  2661. CurBuilder->setPCSections(Inst.getMetadata(LLVMContext::MD_pcsections));
  2662. auto &TLI = *MF->getSubtarget().getTargetLowering();
  2663. if (TLI.fallBackToDAGISel(Inst))
  2664. return false;
  2665. switch (Inst.getOpcode()) {
  2666. #define HANDLE_INST(NUM, OPCODE, CLASS) \
  2667. case Instruction::OPCODE: \
  2668. return translate##OPCODE(Inst, *CurBuilder.get());
  2669. #include "llvm/IR/Instruction.def"
  2670. default:
  2671. return false;
  2672. }
  2673. }
  2674. bool IRTranslator::translate(const Constant &C, Register Reg) {
  2675. // We only emit constants into the entry block from here. To prevent jumpy
  2676. // debug behaviour remove debug line.
  2677. if (auto CurrInstDL = CurBuilder->getDL())
  2678. EntryBuilder->setDebugLoc(DebugLoc());
  2679. if (auto CI = dyn_cast<ConstantInt>(&C))
  2680. EntryBuilder->buildConstant(Reg, *CI);
  2681. else if (auto CF = dyn_cast<ConstantFP>(&C))
  2682. EntryBuilder->buildFConstant(Reg, *CF);
  2683. else if (isa<UndefValue>(C))
  2684. EntryBuilder->buildUndef(Reg);
  2685. else if (isa<ConstantPointerNull>(C))
  2686. EntryBuilder->buildConstant(Reg, 0);
  2687. else if (auto GV = dyn_cast<GlobalValue>(&C))
  2688. EntryBuilder->buildGlobalValue(Reg, GV);
  2689. else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
  2690. if (!isa<FixedVectorType>(CAZ->getType()))
  2691. return false;
  2692. // Return the scalar if it is a <1 x Ty> vector.
  2693. unsigned NumElts = CAZ->getElementCount().getFixedValue();
  2694. if (NumElts == 1)
  2695. return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder);
  2696. SmallVector<Register, 4> Ops;
  2697. for (unsigned I = 0; I < NumElts; ++I) {
  2698. Constant &Elt = *CAZ->getElementValue(I);
  2699. Ops.push_back(getOrCreateVReg(Elt));
  2700. }
  2701. EntryBuilder->buildBuildVector(Reg, Ops);
  2702. } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
  2703. // Return the scalar if it is a <1 x Ty> vector.
  2704. if (CV->getNumElements() == 1)
  2705. return translateCopy(C, *CV->getElementAsConstant(0), *EntryBuilder);
  2706. SmallVector<Register, 4> Ops;
  2707. for (unsigned i = 0; i < CV->getNumElements(); ++i) {
  2708. Constant &Elt = *CV->getElementAsConstant(i);
  2709. Ops.push_back(getOrCreateVReg(Elt));
  2710. }
  2711. EntryBuilder->buildBuildVector(Reg, Ops);
  2712. } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
  2713. switch(CE->getOpcode()) {
  2714. #define HANDLE_INST(NUM, OPCODE, CLASS) \
  2715. case Instruction::OPCODE: \
  2716. return translate##OPCODE(*CE, *EntryBuilder.get());
  2717. #include "llvm/IR/Instruction.def"
  2718. default:
  2719. return false;
  2720. }
  2721. } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
  2722. if (CV->getNumOperands() == 1)
  2723. return translateCopy(C, *CV->getOperand(0), *EntryBuilder);
  2724. SmallVector<Register, 4> Ops;
  2725. for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
  2726. Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
  2727. }
  2728. EntryBuilder->buildBuildVector(Reg, Ops);
  2729. } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
  2730. EntryBuilder->buildBlockAddress(Reg, BA);
  2731. } else
  2732. return false;
  2733. return true;
  2734. }
  2735. bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB,
  2736. MachineBasicBlock &MBB) {
  2737. for (auto &BTB : SL->BitTestCases) {
  2738. // Emit header first, if it wasn't already emitted.
  2739. if (!BTB.Emitted)
  2740. emitBitTestHeader(BTB, BTB.Parent);
  2741. BranchProbability UnhandledProb = BTB.Prob;
  2742. for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
  2743. UnhandledProb -= BTB.Cases[j].ExtraProb;
  2744. // Set the current basic block to the mbb we wish to insert the code into
  2745. MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
  2746. // If all cases cover a contiguous range, it is not necessary to jump to
  2747. // the default block after the last bit test fails. This is because the
  2748. // range check during bit test header creation has guaranteed that every
  2749. // case here doesn't go outside the range. In this case, there is no need
  2750. // to perform the last bit test, as it will always be true. Instead, make
  2751. // the second-to-last bit-test fall through to the target of the last bit
  2752. // test, and delete the last bit test.
  2753. MachineBasicBlock *NextMBB;
  2754. if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
  2755. // Second-to-last bit-test with contiguous range: fall through to the
  2756. // target of the final bit test.
  2757. NextMBB = BTB.Cases[j + 1].TargetBB;
  2758. } else if (j + 1 == ej) {
  2759. // For the last bit test, fall through to Default.
  2760. NextMBB = BTB.Default;
  2761. } else {
  2762. // Otherwise, fall through to the next bit test.
  2763. NextMBB = BTB.Cases[j + 1].ThisBB;
  2764. }
  2765. emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
  2766. if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
  2767. // We need to record the replacement phi edge here that normally
  2768. // happens in emitBitTestCase before we delete the case, otherwise the
  2769. // phi edge will be lost.
  2770. addMachineCFGPred({BTB.Parent->getBasicBlock(),
  2771. BTB.Cases[ej - 1].TargetBB->getBasicBlock()},
  2772. MBB);
  2773. // Since we're not going to use the final bit test, remove it.
  2774. BTB.Cases.pop_back();
  2775. break;
  2776. }
  2777. }
  2778. // This is "default" BB. We have two jumps to it. From "header" BB and from
  2779. // last "case" BB, unless the latter was skipped.
  2780. CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
  2781. BTB.Default->getBasicBlock()};
  2782. addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
  2783. if (!BTB.ContiguousRange) {
  2784. addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
  2785. }
  2786. }
  2787. SL->BitTestCases.clear();
  2788. for (auto &JTCase : SL->JTCases) {
  2789. // Emit header first, if it wasn't already emitted.
  2790. if (!JTCase.first.Emitted)
  2791. emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
  2792. emitJumpTable(JTCase.second, JTCase.second.MBB);
  2793. }
  2794. SL->JTCases.clear();
  2795. for (auto &SwCase : SL->SwitchCases)
  2796. emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
  2797. SL->SwitchCases.clear();
  2798. // Check if we need to generate stack-protector guard checks.
  2799. StackProtector &SP = getAnalysis<StackProtector>();
  2800. if (SP.shouldEmitSDCheck(BB)) {
  2801. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  2802. bool FunctionBasedInstrumentation =
  2803. TLI.getSSPStackGuardCheck(*MF->getFunction().getParent());
  2804. SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation);
  2805. }
  2806. // Handle stack protector.
  2807. if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
  2808. LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n");
  2809. return false;
  2810. } else if (SPDescriptor.shouldEmitStackProtector()) {
  2811. MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB();
  2812. MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB();
  2813. // Find the split point to split the parent mbb. At the same time copy all
  2814. // physical registers used in the tail of parent mbb into virtual registers
  2815. // before the split point and back into physical registers after the split
  2816. // point. This prevents us needing to deal with Live-ins and many other
  2817. // register allocation issues caused by us splitting the parent mbb. The
  2818. // register allocator will clean up said virtual copies later on.
  2819. MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector(
  2820. ParentMBB, *MF->getSubtarget().getInstrInfo());
  2821. // Splice the terminator of ParentMBB into SuccessMBB.
  2822. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint,
  2823. ParentMBB->end());
  2824. // Add compare/jump on neq/jump to the parent BB.
  2825. if (!emitSPDescriptorParent(SPDescriptor, ParentMBB))
  2826. return false;
  2827. // CodeGen Failure MBB if we have not codegened it yet.
  2828. MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB();
  2829. if (FailureMBB->empty()) {
  2830. if (!emitSPDescriptorFailure(SPDescriptor, FailureMBB))
  2831. return false;
  2832. }
  2833. // Clear the Per-BB State.
  2834. SPDescriptor.resetPerBBState();
  2835. }
  2836. return true;
  2837. }
  2838. bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
  2839. MachineBasicBlock *ParentBB) {
  2840. CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
  2841. // First create the loads to the guard/stack slot for the comparison.
  2842. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  2843. Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
  2844. const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
  2845. LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
  2846. MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
  2847. int FI = MFI.getStackProtectorIndex();
  2848. Register Guard;
  2849. Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0);
  2850. const Module &M = *ParentBB->getParent()->getFunction().getParent();
  2851. Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
  2852. // Generate code to load the content of the guard slot.
  2853. Register GuardVal =
  2854. CurBuilder
  2855. ->buildLoad(PtrMemTy, StackSlotPtr,
  2856. MachinePointerInfo::getFixedStack(*MF, FI), Align,
  2857. MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile)
  2858. .getReg(0);
  2859. if (TLI.useStackGuardXorFP()) {
  2860. LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
  2861. return false;
  2862. }
  2863. // Retrieve guard check function, nullptr if instrumentation is inlined.
  2864. if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
  2865. // This path is currently untestable on GlobalISel, since the only platform
  2866. // that needs this seems to be Windows, and we fall back on that currently.
  2867. // The code still lives here in case that changes.
  2868. // Silence warning about unused variable until the code below that uses
  2869. // 'GuardCheckFn' is enabled.
  2870. (void)GuardCheckFn;
  2871. return false;
  2872. #if 0
  2873. // The target provides a guard check function to validate the guard value.
  2874. // Generate a call to that function with the content of the guard slot as
  2875. // argument.
  2876. FunctionType *FnTy = GuardCheckFn->getFunctionType();
  2877. assert(FnTy->getNumParams() == 1 && "Invalid function signature");
  2878. ISD::ArgFlagsTy Flags;
  2879. if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
  2880. Flags.setInReg();
  2881. CallLowering::ArgInfo GuardArgInfo(
  2882. {GuardVal, FnTy->getParamType(0), {Flags}});
  2883. CallLowering::CallLoweringInfo Info;
  2884. Info.OrigArgs.push_back(GuardArgInfo);
  2885. Info.CallConv = GuardCheckFn->getCallingConv();
  2886. Info.Callee = MachineOperand::CreateGA(GuardCheckFn, 0);
  2887. Info.OrigRet = {Register(), FnTy->getReturnType()};
  2888. if (!CLI->lowerCall(MIRBuilder, Info)) {
  2889. LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n");
  2890. return false;
  2891. }
  2892. return true;
  2893. #endif
  2894. }
  2895. // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
  2896. // Otherwise, emit a volatile load to retrieve the stack guard value.
  2897. if (TLI.useLoadStackGuardNode()) {
  2898. Guard =
  2899. MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits()));
  2900. getStackGuard(Guard, *CurBuilder);
  2901. } else {
  2902. // TODO: test using android subtarget when we support @llvm.thread.pointer.
  2903. const Value *IRGuard = TLI.getSDagStackGuard(M);
  2904. Register GuardPtr = getOrCreateVReg(*IRGuard);
  2905. Guard = CurBuilder
  2906. ->buildLoad(PtrMemTy, GuardPtr,
  2907. MachinePointerInfo::getFixedStack(*MF, FI), Align,
  2908. MachineMemOperand::MOLoad |
  2909. MachineMemOperand::MOVolatile)
  2910. .getReg(0);
  2911. }
  2912. // Perform the comparison.
  2913. auto Cmp =
  2914. CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal);
  2915. // If the guard/stackslot do not equal, branch to failure MBB.
  2916. CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB());
  2917. // Otherwise branch to success MBB.
  2918. CurBuilder->buildBr(*SPD.getSuccessMBB());
  2919. return true;
  2920. }
  2921. bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
  2922. MachineBasicBlock *FailureBB) {
  2923. CurBuilder->setInsertPt(*FailureBB, FailureBB->end());
  2924. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
  2925. const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL;
  2926. const char *Name = TLI.getLibcallName(Libcall);
  2927. CallLowering::CallLoweringInfo Info;
  2928. Info.CallConv = TLI.getLibcallCallingConv(Libcall);
  2929. Info.Callee = MachineOperand::CreateES(Name);
  2930. Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()),
  2931. 0};
  2932. if (!CLI->lowerCall(*CurBuilder, Info)) {
  2933. LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n");
  2934. return false;
  2935. }
  2936. // On PS4/PS5, the "return address" must still be within the calling
  2937. // function, even if it's at the very end, so emit an explicit TRAP here.
  2938. // WebAssembly needs an unreachable instruction after a non-returning call,
  2939. // because the function return type can be different from __stack_chk_fail's
  2940. // return type (void).
  2941. const TargetMachine &TM = MF->getTarget();
  2942. if (TM.getTargetTriple().isPS() || TM.getTargetTriple().isWasm()) {
  2943. LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
  2944. return false;
  2945. }
  2946. return true;
  2947. }
  2948. void IRTranslator::finalizeFunction() {
  2949. // Release the memory used by the different maps we
  2950. // needed during the translation.
  2951. PendingPHIs.clear();
  2952. VMap.reset();
  2953. FrameIndices.clear();
  2954. MachinePreds.clear();
  2955. // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
  2956. // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
  2957. // destroying it twice (in ~IRTranslator() and ~LLVMContext())
  2958. EntryBuilder.reset();
  2959. CurBuilder.reset();
  2960. FuncInfo.clear();
  2961. SPDescriptor.resetPerFunctionState();
  2962. }
  2963. /// Returns true if a BasicBlock \p BB within a variadic function contains a
  2964. /// variadic musttail call.
  2965. static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
  2966. if (!IsVarArg)
  2967. return false;
  2968. // Walk the block backwards, because tail calls usually only appear at the end
  2969. // of a block.
  2970. return llvm::any_of(llvm::reverse(BB), [](const Instruction &I) {
  2971. const auto *CI = dyn_cast<CallInst>(&I);
  2972. return CI && CI->isMustTailCall();
  2973. });
  2974. }
  2975. bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
  2976. MF = &CurMF;
  2977. const Function &F = MF->getFunction();
  2978. GISelCSEAnalysisWrapper &Wrapper =
  2979. getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
  2980. // Set the CSEConfig and run the analysis.
  2981. GISelCSEInfo *CSEInfo = nullptr;
  2982. TPC = &getAnalysis<TargetPassConfig>();
  2983. bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
  2984. ? EnableCSEInIRTranslator
  2985. : TPC->isGISelCSEEnabled();
  2986. if (EnableCSE) {
  2987. EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
  2988. CSEInfo = &Wrapper.get(TPC->getCSEConfig());
  2989. EntryBuilder->setCSEInfo(CSEInfo);
  2990. CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
  2991. CurBuilder->setCSEInfo(CSEInfo);
  2992. } else {
  2993. EntryBuilder = std::make_unique<MachineIRBuilder>();
  2994. CurBuilder = std::make_unique<MachineIRBuilder>();
  2995. }
  2996. CLI = MF->getSubtarget().getCallLowering();
  2997. CurBuilder->setMF(*MF);
  2998. EntryBuilder->setMF(*MF);
  2999. MRI = &MF->getRegInfo();
  3000. DL = &F.getParent()->getDataLayout();
  3001. ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
  3002. const TargetMachine &TM = MF->getTarget();
  3003. TM.resetTargetOptions(F);
  3004. EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
  3005. FuncInfo.MF = MF;
  3006. if (EnableOpts) {
  3007. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  3008. FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
  3009. } else {
  3010. AA = nullptr;
  3011. FuncInfo.BPI = nullptr;
  3012. }
  3013. AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
  3014. MF->getFunction());
  3015. LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  3016. FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF);
  3017. const auto &TLI = *MF->getSubtarget().getTargetLowering();
  3018. SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
  3019. SL->init(TLI, TM, *DL);
  3020. assert(PendingPHIs.empty() && "stale PHIs");
  3021. // Targets which want to use big endian can enable it using
  3022. // enableBigEndian()
  3023. if (!DL->isLittleEndian() && !CLI->enableBigEndian()) {
  3024. // Currently we don't properly handle big endian code.
  3025. OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
  3026. F.getSubprogram(), &F.getEntryBlock());
  3027. R << "unable to translate in big endian mode";
  3028. reportTranslationError(*MF, *TPC, *ORE, R);
  3029. }
  3030. // Release the per-function state when we return, whether we succeeded or not.
  3031. auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
  3032. // Setup a separate basic-block for the arguments and constants
  3033. MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
  3034. MF->push_back(EntryBB);
  3035. EntryBuilder->setMBB(*EntryBB);
  3036. DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
  3037. SwiftError.setFunction(CurMF);
  3038. SwiftError.createEntriesInEntryBlock(DbgLoc);
  3039. bool IsVarArg = F.isVarArg();
  3040. bool HasMustTailInVarArgFn = false;
  3041. // Create all blocks, in IR order, to preserve the layout.
  3042. for (const BasicBlock &BB: F) {
  3043. auto *&MBB = BBToMBB[&BB];
  3044. MBB = MF->CreateMachineBasicBlock(&BB);
  3045. MF->push_back(MBB);
  3046. if (BB.hasAddressTaken())
  3047. MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB));
  3048. if (!HasMustTailInVarArgFn)
  3049. HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
  3050. }
  3051. MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
  3052. // Make our arguments/constants entry block fallthrough to the IR entry block.
  3053. EntryBB->addSuccessor(&getMBB(F.front()));
  3054. if (CLI->fallBackToDAGISel(*MF)) {
  3055. OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
  3056. F.getSubprogram(), &F.getEntryBlock());
  3057. R << "unable to lower function: " << ore::NV("Prototype", F.getType());
  3058. reportTranslationError(*MF, *TPC, *ORE, R);
  3059. return false;
  3060. }
  3061. // Lower the actual args into this basic block.
  3062. SmallVector<ArrayRef<Register>, 8> VRegArgs;
  3063. for (const Argument &Arg: F.args()) {
  3064. if (DL->getTypeStoreSize(Arg.getType()).isZero())
  3065. continue; // Don't handle zero sized types.
  3066. ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
  3067. VRegArgs.push_back(VRegs);
  3068. if (Arg.hasSwiftErrorAttr()) {
  3069. assert(VRegs.size() == 1 && "Too many vregs for Swift error");
  3070. SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
  3071. }
  3072. }
  3073. if (!CLI->lowerFormalArguments(*EntryBuilder, F, VRegArgs, FuncInfo)) {
  3074. OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
  3075. F.getSubprogram(), &F.getEntryBlock());
  3076. R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
  3077. reportTranslationError(*MF, *TPC, *ORE, R);
  3078. return false;
  3079. }
  3080. // Need to visit defs before uses when translating instructions.
  3081. GISelObserverWrapper WrapperObserver;
  3082. if (EnableCSE && CSEInfo)
  3083. WrapperObserver.addObserver(CSEInfo);
  3084. {
  3085. ReversePostOrderTraversal<const Function *> RPOT(&F);
  3086. #ifndef NDEBUG
  3087. DILocationVerifier Verifier;
  3088. WrapperObserver.addObserver(&Verifier);
  3089. #endif // ifndef NDEBUG
  3090. RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
  3091. RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver);
  3092. for (const BasicBlock *BB : RPOT) {
  3093. MachineBasicBlock &MBB = getMBB(*BB);
  3094. // Set the insertion point of all the following translations to
  3095. // the end of this basic block.
  3096. CurBuilder->setMBB(MBB);
  3097. HasTailCall = false;
  3098. for (const Instruction &Inst : *BB) {
  3099. // If we translated a tail call in the last step, then we know
  3100. // everything after the call is either a return, or something that is
  3101. // handled by the call itself. (E.g. a lifetime marker or assume
  3102. // intrinsic.) In this case, we should stop translating the block and
  3103. // move on.
  3104. if (HasTailCall)
  3105. break;
  3106. #ifndef NDEBUG
  3107. Verifier.setCurrentInst(&Inst);
  3108. #endif // ifndef NDEBUG
  3109. if (translate(Inst))
  3110. continue;
  3111. OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
  3112. Inst.getDebugLoc(), BB);
  3113. R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
  3114. if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
  3115. std::string InstStrStorage;
  3116. raw_string_ostream InstStr(InstStrStorage);
  3117. InstStr << Inst;
  3118. R << ": '" << InstStr.str() << "'";
  3119. }
  3120. reportTranslationError(*MF, *TPC, *ORE, R);
  3121. return false;
  3122. }
  3123. if (!finalizeBasicBlock(*BB, MBB)) {
  3124. OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
  3125. BB->getTerminator()->getDebugLoc(), BB);
  3126. R << "unable to translate basic block";
  3127. reportTranslationError(*MF, *TPC, *ORE, R);
  3128. return false;
  3129. }
  3130. }
  3131. #ifndef NDEBUG
  3132. WrapperObserver.removeObserver(&Verifier);
  3133. #endif
  3134. }
  3135. finishPendingPhis();
  3136. SwiftError.propagateVRegs();
  3137. // Merge the argument lowering and constants block with its single
  3138. // successor, the LLVM-IR entry block. We want the basic block to
  3139. // be maximal.
  3140. assert(EntryBB->succ_size() == 1 &&
  3141. "Custom BB used for lowering should have only one successor");
  3142. // Get the successor of the current entry block.
  3143. MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
  3144. assert(NewEntryBB.pred_size() == 1 &&
  3145. "LLVM-IR entry block has a predecessor!?");
  3146. // Move all the instruction from the current entry block to the
  3147. // new entry block.
  3148. NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
  3149. EntryBB->end());
  3150. // Update the live-in information for the new entry block.
  3151. for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
  3152. NewEntryBB.addLiveIn(LiveIn);
  3153. NewEntryBB.sortUniqueLiveIns();
  3154. // Get rid of the now empty basic block.
  3155. EntryBB->removeSuccessor(&NewEntryBB);
  3156. MF->remove(EntryBB);
  3157. MF->deleteMachineBasicBlock(EntryBB);
  3158. assert(&MF->front() == &NewEntryBB &&
  3159. "New entry wasn't next in the list of basic block!");
  3160. // Initialize stack protector information.
  3161. StackProtector &SP = getAnalysis<StackProtector>();
  3162. SP.copyToMachineFrameInfo(MF->getFrameInfo());
  3163. return false;
  3164. }