X86FrameLowering.cpp 148 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882
  1. //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains the X86 implementation of TargetFrameLowering class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "X86FrameLowering.h"
  13. #include "MCTargetDesc/X86MCTargetDesc.h"
  14. #include "X86InstrBuilder.h"
  15. #include "X86InstrInfo.h"
  16. #include "X86MachineFunctionInfo.h"
  17. #include "X86Subtarget.h"
  18. #include "X86TargetMachine.h"
  19. #include "llvm/ADT/SmallSet.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/Analysis/EHPersonalities.h"
  22. #include "llvm/CodeGen/LivePhysRegs.h"
  23. #include "llvm/CodeGen/MachineFrameInfo.h"
  24. #include "llvm/CodeGen/MachineFunction.h"
  25. #include "llvm/CodeGen/MachineInstrBuilder.h"
  26. #include "llvm/CodeGen/MachineModuleInfo.h"
  27. #include "llvm/CodeGen/MachineRegisterInfo.h"
  28. #include "llvm/CodeGen/WinEHFuncInfo.h"
  29. #include "llvm/IR/DataLayout.h"
  30. #include "llvm/IR/Function.h"
  31. #include "llvm/MC/MCAsmInfo.h"
  32. #include "llvm/MC/MCObjectFileInfo.h"
  33. #include "llvm/MC/MCSymbol.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Target/TargetOptions.h"
  36. #include <cstdlib>
  37. #define DEBUG_TYPE "x86-fl"
  38. STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue");
  39. STATISTIC(NumFrameExtraProbe,
  40. "Number of extra stack probes generated in prologue");
  41. using namespace llvm;
  42. X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
  43. MaybeAlign StackAlignOverride)
  44. : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
  45. STI.is64Bit() ? -8 : -4),
  46. STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
  47. // Cache a bunch of frame-related predicates for this subtarget.
  48. SlotSize = TRI->getSlotSize();
  49. Is64Bit = STI.is64Bit();
  50. IsLP64 = STI.isTarget64BitLP64();
  51. // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
  52. Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
  53. StackPtr = TRI->getStackRegister();
  54. }
  55. bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
  56. return !MF.getFrameInfo().hasVarSizedObjects() &&
  57. !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
  58. !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
  59. }
  60. /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
  61. /// call frame pseudos can be simplified. Having a FP, as in the default
  62. /// implementation, is not sufficient here since we can't always use it.
  63. /// Use a more nuanced condition.
  64. bool
  65. X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
  66. return hasReservedCallFrame(MF) ||
  67. MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
  68. (hasFP(MF) && !TRI->hasStackRealignment(MF)) ||
  69. TRI->hasBasePointer(MF);
  70. }
  71. // needsFrameIndexResolution - Do we need to perform FI resolution for
  72. // this function. Normally, this is required only when the function
  73. // has any stack objects. However, FI resolution actually has another job,
  74. // not apparent from the title - it resolves callframesetup/destroy
  75. // that were not simplified earlier.
  76. // So, this is required for x86 functions that have push sequences even
  77. // when there are no stack objects.
  78. bool
  79. X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
  80. return MF.getFrameInfo().hasStackObjects() ||
  81. MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
  82. }
  83. /// hasFP - Return true if the specified function should have a dedicated frame
  84. /// pointer register. This is true if the function has variable sized allocas
  85. /// or if frame pointer elimination is disabled.
  86. bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
  87. const MachineFrameInfo &MFI = MF.getFrameInfo();
  88. return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
  89. TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
  90. MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
  91. MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
  92. MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
  93. MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
  94. MFI.hasStackMap() || MFI.hasPatchPoint() ||
  95. (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
  96. }
  97. static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
  98. if (IsLP64) {
  99. if (isInt<8>(Imm))
  100. return X86::SUB64ri8;
  101. return X86::SUB64ri32;
  102. } else {
  103. if (isInt<8>(Imm))
  104. return X86::SUB32ri8;
  105. return X86::SUB32ri;
  106. }
  107. }
  108. static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
  109. if (IsLP64) {
  110. if (isInt<8>(Imm))
  111. return X86::ADD64ri8;
  112. return X86::ADD64ri32;
  113. } else {
  114. if (isInt<8>(Imm))
  115. return X86::ADD32ri8;
  116. return X86::ADD32ri;
  117. }
  118. }
  119. static unsigned getSUBrrOpcode(bool IsLP64) {
  120. return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
  121. }
  122. static unsigned getADDrrOpcode(bool IsLP64) {
  123. return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
  124. }
  125. static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
  126. if (IsLP64) {
  127. if (isInt<8>(Imm))
  128. return X86::AND64ri8;
  129. return X86::AND64ri32;
  130. }
  131. if (isInt<8>(Imm))
  132. return X86::AND32ri8;
  133. return X86::AND32ri;
  134. }
  135. static unsigned getLEArOpcode(bool IsLP64) {
  136. return IsLP64 ? X86::LEA64r : X86::LEA32r;
  137. }
  138. static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) {
  139. if (Use64BitReg) {
  140. if (isUInt<32>(Imm))
  141. return X86::MOV32ri64;
  142. if (isInt<32>(Imm))
  143. return X86::MOV64ri32;
  144. return X86::MOV64ri;
  145. }
  146. return X86::MOV32ri;
  147. }
  148. static bool isEAXLiveIn(MachineBasicBlock &MBB) {
  149. for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
  150. unsigned Reg = RegMask.PhysReg;
  151. if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
  152. Reg == X86::AH || Reg == X86::AL)
  153. return true;
  154. }
  155. return false;
  156. }
  157. /// Check if the flags need to be preserved before the terminators.
  158. /// This would be the case, if the eflags is live-in of the region
  159. /// composed by the terminators or live-out of that region, without
  160. /// being defined by a terminator.
  161. static bool
  162. flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
  163. for (const MachineInstr &MI : MBB.terminators()) {
  164. bool BreakNext = false;
  165. for (const MachineOperand &MO : MI.operands()) {
  166. if (!MO.isReg())
  167. continue;
  168. Register Reg = MO.getReg();
  169. if (Reg != X86::EFLAGS)
  170. continue;
  171. // This terminator needs an eflags that is not defined
  172. // by a previous another terminator:
  173. // EFLAGS is live-in of the region composed by the terminators.
  174. if (!MO.isDef())
  175. return true;
  176. // This terminator defines the eflags, i.e., we don't need to preserve it.
  177. // However, we still need to check this specific terminator does not
  178. // read a live-in value.
  179. BreakNext = true;
  180. }
  181. // We found a definition of the eflags, no need to preserve them.
  182. if (BreakNext)
  183. return false;
  184. }
  185. // None of the terminators use or define the eflags.
  186. // Check if they are live-out, that would imply we need to preserve them.
  187. for (const MachineBasicBlock *Succ : MBB.successors())
  188. if (Succ->isLiveIn(X86::EFLAGS))
  189. return true;
  190. return false;
  191. }
  192. /// emitSPUpdate - Emit a series of instructions to increment / decrement the
  193. /// stack pointer by a constant value.
  194. void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
  195. MachineBasicBlock::iterator &MBBI,
  196. const DebugLoc &DL,
  197. int64_t NumBytes, bool InEpilogue) const {
  198. bool isSub = NumBytes < 0;
  199. uint64_t Offset = isSub ? -NumBytes : NumBytes;
  200. MachineInstr::MIFlag Flag =
  201. isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy;
  202. uint64_t Chunk = (1LL << 31) - 1;
  203. MachineFunction &MF = *MBB.getParent();
  204. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  205. const X86TargetLowering &TLI = *STI.getTargetLowering();
  206. const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
  207. // It's ok to not take into account large chunks when probing, as the
  208. // allocation is split in smaller chunks anyway.
  209. if (EmitInlineStackProbe && !InEpilogue) {
  210. // This pseudo-instruction is going to be expanded, potentially using a
  211. // loop, by inlineStackProbe().
  212. BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset);
  213. return;
  214. } else if (Offset > Chunk) {
  215. // Rather than emit a long series of instructions for large offsets,
  216. // load the offset into a register and do one sub/add
  217. unsigned Reg = 0;
  218. unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
  219. if (isSub && !isEAXLiveIn(MBB))
  220. Reg = Rax;
  221. else
  222. Reg = TRI->findDeadCallerSavedReg(MBB, MBBI);
  223. unsigned AddSubRROpc =
  224. isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit);
  225. if (Reg) {
  226. BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Reg)
  227. .addImm(Offset)
  228. .setMIFlag(Flag);
  229. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
  230. .addReg(StackPtr)
  231. .addReg(Reg);
  232. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  233. return;
  234. } else if (Offset > 8 * Chunk) {
  235. // If we would need more than 8 add or sub instructions (a >16GB stack
  236. // frame), it's worth spilling RAX to materialize this immediate.
  237. // pushq %rax
  238. // movabsq +-$Offset+-SlotSize, %rax
  239. // addq %rsp, %rax
  240. // xchg %rax, (%rsp)
  241. // movq (%rsp), %rsp
  242. assert(Is64Bit && "can't have 32-bit 16GB stack frame");
  243. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
  244. .addReg(Rax, RegState::Kill)
  245. .setMIFlag(Flag);
  246. // Subtract is not commutative, so negate the offset and always use add.
  247. // Subtract 8 less and add 8 more to account for the PUSH we just did.
  248. if (isSub)
  249. Offset = -(Offset - SlotSize);
  250. else
  251. Offset = Offset + SlotSize;
  252. BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Rax)
  253. .addImm(Offset)
  254. .setMIFlag(Flag);
  255. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
  256. .addReg(Rax)
  257. .addReg(StackPtr);
  258. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  259. // Exchange the new SP in RAX with the top of the stack.
  260. addRegOffset(
  261. BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
  262. StackPtr, false, 0);
  263. // Load new SP from the top of the stack into RSP.
  264. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
  265. StackPtr, false, 0);
  266. return;
  267. }
  268. }
  269. while (Offset) {
  270. uint64_t ThisVal = std::min(Offset, Chunk);
  271. if (ThisVal == SlotSize) {
  272. // Use push / pop for slot sized adjustments as a size optimization. We
  273. // need to find a dead register when using pop.
  274. unsigned Reg = isSub
  275. ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
  276. : TRI->findDeadCallerSavedReg(MBB, MBBI);
  277. if (Reg) {
  278. unsigned Opc = isSub
  279. ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
  280. : (Is64Bit ? X86::POP64r : X86::POP32r);
  281. BuildMI(MBB, MBBI, DL, TII.get(Opc))
  282. .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
  283. .setMIFlag(Flag);
  284. Offset -= ThisVal;
  285. continue;
  286. }
  287. }
  288. BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
  289. .setMIFlag(Flag);
  290. Offset -= ThisVal;
  291. }
  292. }
  293. MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
  294. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  295. const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
  296. assert(Offset != 0 && "zero offset stack adjustment requested");
  297. // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
  298. // is tricky.
  299. bool UseLEA;
  300. if (!InEpilogue) {
  301. // Check if inserting the prologue at the beginning
  302. // of MBB would require to use LEA operations.
  303. // We need to use LEA operations if EFLAGS is live in, because
  304. // it means an instruction will read it before it gets defined.
  305. UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
  306. } else {
  307. // If we can use LEA for SP but we shouldn't, check that none
  308. // of the terminators uses the eflags. Otherwise we will insert
  309. // a ADD that will redefine the eflags and break the condition.
  310. // Alternatively, we could move the ADD, but this may not be possible
  311. // and is an optimization anyway.
  312. UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
  313. if (UseLEA && !STI.useLeaForSP())
  314. UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
  315. // If that assert breaks, that means we do not do the right thing
  316. // in canUseAsEpilogue.
  317. assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
  318. "We shouldn't have allowed this insertion point");
  319. }
  320. MachineInstrBuilder MI;
  321. if (UseLEA) {
  322. MI = addRegOffset(BuildMI(MBB, MBBI, DL,
  323. TII.get(getLEArOpcode(Uses64BitFramePtr)),
  324. StackPtr),
  325. StackPtr, false, Offset);
  326. } else {
  327. bool IsSub = Offset < 0;
  328. uint64_t AbsOffset = IsSub ? -Offset : Offset;
  329. const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
  330. : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
  331. MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
  332. .addReg(StackPtr)
  333. .addImm(AbsOffset);
  334. MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
  335. }
  336. return MI;
  337. }
  338. int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
  339. MachineBasicBlock::iterator &MBBI,
  340. bool doMergeWithPrevious) const {
  341. if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
  342. (!doMergeWithPrevious && MBBI == MBB.end()))
  343. return 0;
  344. MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
  345. PI = skipDebugInstructionsBackward(PI, MBB.begin());
  346. // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
  347. // instruction, and that there are no DBG_VALUE or other instructions between
  348. // ADD/SUB/LEA and its corresponding CFI instruction.
  349. /* TODO: Add support for the case where there are multiple CFI instructions
  350. below the ADD/SUB/LEA, e.g.:
  351. ...
  352. add
  353. cfi_def_cfa_offset
  354. cfi_offset
  355. ...
  356. */
  357. if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
  358. PI = std::prev(PI);
  359. unsigned Opc = PI->getOpcode();
  360. int Offset = 0;
  361. if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
  362. Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
  363. PI->getOperand(0).getReg() == StackPtr){
  364. assert(PI->getOperand(1).getReg() == StackPtr);
  365. Offset = PI->getOperand(2).getImm();
  366. } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
  367. PI->getOperand(0).getReg() == StackPtr &&
  368. PI->getOperand(1).getReg() == StackPtr &&
  369. PI->getOperand(2).getImm() == 1 &&
  370. PI->getOperand(3).getReg() == X86::NoRegister &&
  371. PI->getOperand(5).getReg() == X86::NoRegister) {
  372. // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
  373. Offset = PI->getOperand(4).getImm();
  374. } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
  375. Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
  376. PI->getOperand(0).getReg() == StackPtr) {
  377. assert(PI->getOperand(1).getReg() == StackPtr);
  378. Offset = -PI->getOperand(2).getImm();
  379. } else
  380. return 0;
  381. PI = MBB.erase(PI);
  382. if (PI != MBB.end() && PI->isCFIInstruction()) {
  383. auto CIs = MBB.getParent()->getFrameInstructions();
  384. MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
  385. if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset ||
  386. CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset)
  387. PI = MBB.erase(PI);
  388. }
  389. if (!doMergeWithPrevious)
  390. MBBI = skipDebugInstructionsForward(PI, MBB.end());
  391. return Offset;
  392. }
  393. void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
  394. MachineBasicBlock::iterator MBBI,
  395. const DebugLoc &DL,
  396. const MCCFIInstruction &CFIInst,
  397. MachineInstr::MIFlag Flag) const {
  398. MachineFunction &MF = *MBB.getParent();
  399. unsigned CFIIndex = MF.addFrameInst(CFIInst);
  400. BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
  401. .addCFIIndex(CFIIndex)
  402. .setMIFlag(Flag);
  403. }
  404. /// Emits Dwarf Info specifying offsets of callee saved registers and
  405. /// frame pointer. This is called only when basic block sections are enabled.
  406. void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA(
  407. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
  408. MachineFunction &MF = *MBB.getParent();
  409. if (!hasFP(MF)) {
  410. emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
  411. return;
  412. }
  413. const MachineModuleInfo &MMI = MF.getMMI();
  414. const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
  415. const Register FramePtr = TRI->getFrameRegister(MF);
  416. const Register MachineFramePtr =
  417. STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64))
  418. : FramePtr;
  419. unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
  420. // Offset = space for return address + size of the frame pointer itself.
  421. unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
  422. BuildCFI(MBB, MBBI, DebugLoc{},
  423. MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
  424. emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
  425. }
  426. void X86FrameLowering::emitCalleeSavedFrameMoves(
  427. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  428. const DebugLoc &DL, bool IsPrologue) const {
  429. MachineFunction &MF = *MBB.getParent();
  430. MachineFrameInfo &MFI = MF.getFrameInfo();
  431. MachineModuleInfo &MMI = MF.getMMI();
  432. const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
  433. // Add callee saved registers to move list.
  434. const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
  435. // Calculate offsets.
  436. for (const CalleeSavedInfo &I : CSI) {
  437. int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
  438. Register Reg = I.getReg();
  439. unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
  440. if (IsPrologue) {
  441. BuildCFI(MBB, MBBI, DL,
  442. MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
  443. } else {
  444. BuildCFI(MBB, MBBI, DL,
  445. MCCFIInstruction::createRestore(nullptr, DwarfReg));
  446. }
  447. }
  448. }
  449. void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
  450. MachineBasicBlock &MBB) const {
  451. const MachineFunction &MF = *MBB.getParent();
  452. // Insertion point.
  453. MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  454. // Fake a debug loc.
  455. DebugLoc DL;
  456. if (MBBI != MBB.end())
  457. DL = MBBI->getDebugLoc();
  458. // Zero out FP stack if referenced. Do this outside of the loop below so that
  459. // it's done only once.
  460. const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
  461. for (MCRegister Reg : RegsToZero.set_bits()) {
  462. if (!X86::RFP80RegClass.contains(Reg))
  463. continue;
  464. unsigned NumFPRegs = ST.is64Bit() ? 8 : 7;
  465. for (unsigned i = 0; i != NumFPRegs; ++i)
  466. BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0));
  467. for (unsigned i = 0; i != NumFPRegs; ++i)
  468. BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0);
  469. break;
  470. }
  471. // For GPRs, we only care to clear out the 32-bit register.
  472. BitVector GPRsToZero(TRI->getNumRegs());
  473. for (MCRegister Reg : RegsToZero.set_bits())
  474. if (TRI->isGeneralPurposeRegister(MF, Reg)) {
  475. GPRsToZero.set(getX86SubSuperRegisterOrZero(Reg, 32));
  476. RegsToZero.reset(Reg);
  477. }
  478. for (MCRegister Reg : GPRsToZero.set_bits())
  479. BuildMI(MBB, MBBI, DL, TII.get(X86::XOR32rr), Reg)
  480. .addReg(Reg, RegState::Undef)
  481. .addReg(Reg, RegState::Undef);
  482. // Zero out registers.
  483. for (MCRegister Reg : RegsToZero.set_bits()) {
  484. if (ST.hasMMX() && X86::VR64RegClass.contains(Reg))
  485. // FIXME: Ignore MMX registers?
  486. continue;
  487. unsigned XorOp;
  488. if (X86::VR128RegClass.contains(Reg)) {
  489. // XMM#
  490. if (!ST.hasSSE1())
  491. continue;
  492. XorOp = X86::PXORrr;
  493. } else if (X86::VR256RegClass.contains(Reg)) {
  494. // YMM#
  495. if (!ST.hasAVX())
  496. continue;
  497. XorOp = X86::VPXORrr;
  498. } else if (X86::VR512RegClass.contains(Reg)) {
  499. // ZMM#
  500. if (!ST.hasAVX512())
  501. continue;
  502. XorOp = X86::VPXORYrr;
  503. } else if (X86::VK1RegClass.contains(Reg) ||
  504. X86::VK2RegClass.contains(Reg) ||
  505. X86::VK4RegClass.contains(Reg) ||
  506. X86::VK8RegClass.contains(Reg) ||
  507. X86::VK16RegClass.contains(Reg)) {
  508. if (!ST.hasVLX())
  509. continue;
  510. XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr;
  511. } else {
  512. continue;
  513. }
  514. BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg)
  515. .addReg(Reg, RegState::Undef)
  516. .addReg(Reg, RegState::Undef);
  517. }
  518. }
  519. void X86FrameLowering::emitStackProbe(
  520. MachineFunction &MF, MachineBasicBlock &MBB,
  521. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
  522. std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
  523. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  524. if (STI.isTargetWindowsCoreCLR()) {
  525. if (InProlog) {
  526. BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING))
  527. .addImm(0 /* no explicit stack size */);
  528. } else {
  529. emitStackProbeInline(MF, MBB, MBBI, DL, false);
  530. }
  531. } else {
  532. emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
  533. }
  534. }
  535. bool X86FrameLowering::stackProbeFunctionModifiesSP() const {
  536. return STI.isOSWindows() && !STI.isTargetWin64();
  537. }
  538. void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
  539. MachineBasicBlock &PrologMBB) const {
  540. auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
  541. return MI.getOpcode() == X86::STACKALLOC_W_PROBING;
  542. });
  543. if (Where != PrologMBB.end()) {
  544. DebugLoc DL = PrologMBB.findDebugLoc(Where);
  545. emitStackProbeInline(MF, PrologMBB, Where, DL, true);
  546. Where->eraseFromParent();
  547. }
  548. }
  549. void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
  550. MachineBasicBlock &MBB,
  551. MachineBasicBlock::iterator MBBI,
  552. const DebugLoc &DL,
  553. bool InProlog) const {
  554. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  555. if (STI.isTargetWindowsCoreCLR() && STI.is64Bit())
  556. emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog);
  557. else
  558. emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog);
  559. }
  560. void X86FrameLowering::emitStackProbeInlineGeneric(
  561. MachineFunction &MF, MachineBasicBlock &MBB,
  562. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
  563. MachineInstr &AllocWithProbe = *MBBI;
  564. uint64_t Offset = AllocWithProbe.getOperand(0).getImm();
  565. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  566. const X86TargetLowering &TLI = *STI.getTargetLowering();
  567. assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&
  568. "different expansion expected for CoreCLR 64 bit");
  569. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  570. uint64_t ProbeChunk = StackProbeSize * 8;
  571. uint64_t MaxAlign =
  572. TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0;
  573. // Synthesize a loop or unroll it, depending on the number of iterations.
  574. // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
  575. // between the unaligned rsp and current rsp.
  576. if (Offset > ProbeChunk) {
  577. emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset,
  578. MaxAlign % StackProbeSize);
  579. } else {
  580. emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset,
  581. MaxAlign % StackProbeSize);
  582. }
  583. }
  584. void X86FrameLowering::emitStackProbeInlineGenericBlock(
  585. MachineFunction &MF, MachineBasicBlock &MBB,
  586. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
  587. uint64_t AlignOffset) const {
  588. const bool NeedsDwarfCFI = needsDwarfCFI(MF);
  589. const bool HasFP = hasFP(MF);
  590. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  591. const X86TargetLowering &TLI = *STI.getTargetLowering();
  592. const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
  593. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  594. uint64_t CurrentOffset = 0;
  595. assert(AlignOffset < StackProbeSize);
  596. // If the offset is so small it fits within a page, there's nothing to do.
  597. if (StackProbeSize < Offset + AlignOffset) {
  598. uint64_t StackAdjustment = StackProbeSize - AlignOffset;
  599. BuildStackAdjustment(MBB, MBBI, DL, -StackAdjustment, /*InEpilogue=*/false)
  600. .setMIFlag(MachineInstr::FrameSetup);
  601. if (!HasFP && NeedsDwarfCFI) {
  602. BuildCFI(
  603. MBB, MBBI, DL,
  604. MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
  605. }
  606. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
  607. .setMIFlag(MachineInstr::FrameSetup),
  608. StackPtr, false, 0)
  609. .addImm(0)
  610. .setMIFlag(MachineInstr::FrameSetup);
  611. NumFrameExtraProbe++;
  612. CurrentOffset = StackProbeSize - AlignOffset;
  613. }
  614. // For the next N - 1 pages, just probe. I tried to take advantage of
  615. // natural probes but it implies much more logic and there was very few
  616. // interesting natural probes to interleave.
  617. while (CurrentOffset + StackProbeSize < Offset) {
  618. BuildStackAdjustment(MBB, MBBI, DL, -StackProbeSize, /*InEpilogue=*/false)
  619. .setMIFlag(MachineInstr::FrameSetup);
  620. if (!HasFP && NeedsDwarfCFI) {
  621. BuildCFI(
  622. MBB, MBBI, DL,
  623. MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize));
  624. }
  625. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
  626. .setMIFlag(MachineInstr::FrameSetup),
  627. StackPtr, false, 0)
  628. .addImm(0)
  629. .setMIFlag(MachineInstr::FrameSetup);
  630. NumFrameExtraProbe++;
  631. CurrentOffset += StackProbeSize;
  632. }
  633. // No need to probe the tail, it is smaller than a Page.
  634. uint64_t ChunkSize = Offset - CurrentOffset;
  635. if (ChunkSize == SlotSize) {
  636. // Use push for slot sized adjustments as a size optimization,
  637. // like emitSPUpdate does when not probing.
  638. unsigned Reg = Is64Bit ? X86::RAX : X86::EAX;
  639. unsigned Opc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
  640. BuildMI(MBB, MBBI, DL, TII.get(Opc))
  641. .addReg(Reg, RegState::Undef)
  642. .setMIFlag(MachineInstr::FrameSetup);
  643. } else {
  644. BuildStackAdjustment(MBB, MBBI, DL, -ChunkSize, /*InEpilogue=*/false)
  645. .setMIFlag(MachineInstr::FrameSetup);
  646. }
  647. // No need to adjust Dwarf CFA offset here, the last position of the stack has
  648. // been defined
  649. }
  650. void X86FrameLowering::emitStackProbeInlineGenericLoop(
  651. MachineFunction &MF, MachineBasicBlock &MBB,
  652. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
  653. uint64_t AlignOffset) const {
  654. assert(Offset && "null offset");
  655. assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
  656. MachineBasicBlock::LQR_Live &&
  657. "Inline stack probe loop will clobber live EFLAGS.");
  658. const bool NeedsDwarfCFI = needsDwarfCFI(MF);
  659. const bool HasFP = hasFP(MF);
  660. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  661. const X86TargetLowering &TLI = *STI.getTargetLowering();
  662. const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
  663. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  664. if (AlignOffset) {
  665. if (AlignOffset < StackProbeSize) {
  666. // Perform a first smaller allocation followed by a probe.
  667. BuildStackAdjustment(MBB, MBBI, DL, -AlignOffset, /*InEpilogue=*/false)
  668. .setMIFlag(MachineInstr::FrameSetup);
  669. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc))
  670. .setMIFlag(MachineInstr::FrameSetup),
  671. StackPtr, false, 0)
  672. .addImm(0)
  673. .setMIFlag(MachineInstr::FrameSetup);
  674. NumFrameExtraProbe++;
  675. Offset -= AlignOffset;
  676. }
  677. }
  678. // Synthesize a loop
  679. NumFrameLoopProbe++;
  680. const BasicBlock *LLVM_BB = MBB.getBasicBlock();
  681. MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  682. MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  683. MachineFunction::iterator MBBIter = ++MBB.getIterator();
  684. MF.insert(MBBIter, testMBB);
  685. MF.insert(MBBIter, tailMBB);
  686. Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
  687. : Is64Bit ? X86::R11D
  688. : X86::EAX;
  689. BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
  690. .addReg(StackPtr)
  691. .setMIFlag(MachineInstr::FrameSetup);
  692. // save loop bound
  693. {
  694. const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
  695. const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
  696. BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
  697. .addReg(FinalStackProbed)
  698. .addImm(BoundOffset)
  699. .setMIFlag(MachineInstr::FrameSetup);
  700. // while in the loop, use loop-invariant reg for CFI,
  701. // instead of the stack pointer, which changes during the loop
  702. if (!HasFP && NeedsDwarfCFI) {
  703. // x32 uses the same DWARF register numbers as x86-64,
  704. // so there isn't a register number for r11d, we must use r11 instead
  705. const Register DwarfFinalStackProbed =
  706. STI.isTarget64BitILP32()
  707. ? Register(getX86SubSuperRegister(FinalStackProbed, 64))
  708. : FinalStackProbed;
  709. BuildCFI(MBB, MBBI, DL,
  710. MCCFIInstruction::createDefCfaRegister(
  711. nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true)));
  712. BuildCFI(MBB, MBBI, DL,
  713. MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset));
  714. }
  715. }
  716. // allocate a page
  717. BuildStackAdjustment(*testMBB, testMBB->end(), DL, -StackProbeSize,
  718. /*InEpilogue=*/false)
  719. .setMIFlag(MachineInstr::FrameSetup);
  720. // touch the page
  721. addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc))
  722. .setMIFlag(MachineInstr::FrameSetup),
  723. StackPtr, false, 0)
  724. .addImm(0)
  725. .setMIFlag(MachineInstr::FrameSetup);
  726. // cmp with stack pointer bound
  727. BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  728. .addReg(StackPtr)
  729. .addReg(FinalStackProbed)
  730. .setMIFlag(MachineInstr::FrameSetup);
  731. // jump
  732. BuildMI(testMBB, DL, TII.get(X86::JCC_1))
  733. .addMBB(testMBB)
  734. .addImm(X86::COND_NE)
  735. .setMIFlag(MachineInstr::FrameSetup);
  736. testMBB->addSuccessor(testMBB);
  737. testMBB->addSuccessor(tailMBB);
  738. // BB management
  739. tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end());
  740. tailMBB->transferSuccessorsAndUpdatePHIs(&MBB);
  741. MBB.addSuccessor(testMBB);
  742. // handle tail
  743. const uint64_t TailOffset = Offset % StackProbeSize;
  744. MachineBasicBlock::iterator TailMBBIter = tailMBB->begin();
  745. if (TailOffset) {
  746. BuildStackAdjustment(*tailMBB, TailMBBIter, DL, -TailOffset,
  747. /*InEpilogue=*/false)
  748. .setMIFlag(MachineInstr::FrameSetup);
  749. }
  750. // after the loop, switch back to stack pointer for CFI
  751. if (!HasFP && NeedsDwarfCFI) {
  752. // x32 uses the same DWARF register numbers as x86-64,
  753. // so there isn't a register number for esp, we must use rsp instead
  754. const Register DwarfStackPtr =
  755. STI.isTarget64BitILP32()
  756. ? Register(getX86SubSuperRegister(StackPtr, 64))
  757. : Register(StackPtr);
  758. BuildCFI(*tailMBB, TailMBBIter, DL,
  759. MCCFIInstruction::createDefCfaRegister(
  760. nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true)));
  761. }
  762. // Update Live In information
  763. recomputeLiveIns(*testMBB);
  764. recomputeLiveIns(*tailMBB);
  765. }
  766. void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
  767. MachineFunction &MF, MachineBasicBlock &MBB,
  768. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
  769. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  770. assert(STI.is64Bit() && "different expansion needed for 32 bit");
  771. assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
  772. const TargetInstrInfo &TII = *STI.getInstrInfo();
  773. const BasicBlock *LLVM_BB = MBB.getBasicBlock();
  774. assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
  775. MachineBasicBlock::LQR_Live &&
  776. "Inline stack probe loop will clobber live EFLAGS.");
  777. // RAX contains the number of bytes of desired stack adjustment.
  778. // The handling here assumes this value has already been updated so as to
  779. // maintain stack alignment.
  780. //
  781. // We need to exit with RSP modified by this amount and execute suitable
  782. // page touches to notify the OS that we're growing the stack responsibly.
  783. // All stack probing must be done without modifying RSP.
  784. //
  785. // MBB:
  786. // SizeReg = RAX;
  787. // ZeroReg = 0
  788. // CopyReg = RSP
  789. // Flags, TestReg = CopyReg - SizeReg
  790. // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
  791. // LimitReg = gs magic thread env access
  792. // if FinalReg >= LimitReg goto ContinueMBB
  793. // RoundBB:
  794. // RoundReg = page address of FinalReg
  795. // LoopMBB:
  796. // LoopReg = PHI(LimitReg,ProbeReg)
  797. // ProbeReg = LoopReg - PageSize
  798. // [ProbeReg] = 0
  799. // if (ProbeReg > RoundReg) goto LoopMBB
  800. // ContinueMBB:
  801. // RSP = RSP - RAX
  802. // [rest of original MBB]
  803. // Set up the new basic blocks
  804. MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  805. MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  806. MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
  807. MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
  808. MF.insert(MBBIter, RoundMBB);
  809. MF.insert(MBBIter, LoopMBB);
  810. MF.insert(MBBIter, ContinueMBB);
  811. // Split MBB and move the tail portion down to ContinueMBB.
  812. MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
  813. ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
  814. ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
  815. // Some useful constants
  816. const int64_t ThreadEnvironmentStackLimit = 0x10;
  817. const int64_t PageSize = 0x1000;
  818. const int64_t PageMask = ~(PageSize - 1);
  819. // Registers we need. For the normal case we use virtual
  820. // registers. For the prolog expansion we use RAX, RCX and RDX.
  821. MachineRegisterInfo &MRI = MF.getRegInfo();
  822. const TargetRegisterClass *RegClass = &X86::GR64RegClass;
  823. const Register SizeReg = InProlog ? X86::RAX
  824. : MRI.createVirtualRegister(RegClass),
  825. ZeroReg = InProlog ? X86::RCX
  826. : MRI.createVirtualRegister(RegClass),
  827. CopyReg = InProlog ? X86::RDX
  828. : MRI.createVirtualRegister(RegClass),
  829. TestReg = InProlog ? X86::RDX
  830. : MRI.createVirtualRegister(RegClass),
  831. FinalReg = InProlog ? X86::RDX
  832. : MRI.createVirtualRegister(RegClass),
  833. RoundedReg = InProlog ? X86::RDX
  834. : MRI.createVirtualRegister(RegClass),
  835. LimitReg = InProlog ? X86::RCX
  836. : MRI.createVirtualRegister(RegClass),
  837. JoinReg = InProlog ? X86::RCX
  838. : MRI.createVirtualRegister(RegClass),
  839. ProbeReg = InProlog ? X86::RCX
  840. : MRI.createVirtualRegister(RegClass);
  841. // SP-relative offsets where we can save RCX and RDX.
  842. int64_t RCXShadowSlot = 0;
  843. int64_t RDXShadowSlot = 0;
  844. // If inlining in the prolog, save RCX and RDX.
  845. if (InProlog) {
  846. // Compute the offsets. We need to account for things already
  847. // pushed onto the stack at this point: return address, frame
  848. // pointer (if used), and callee saves.
  849. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  850. const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
  851. const bool HasFP = hasFP(MF);
  852. // Check if we need to spill RCX and/or RDX.
  853. // Here we assume that no earlier prologue instruction changes RCX and/or
  854. // RDX, so checking the block live-ins is enough.
  855. const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
  856. const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
  857. int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
  858. // Assign the initial slot to both registers, then change RDX's slot if both
  859. // need to be spilled.
  860. if (IsRCXLiveIn)
  861. RCXShadowSlot = InitSlot;
  862. if (IsRDXLiveIn)
  863. RDXShadowSlot = InitSlot;
  864. if (IsRDXLiveIn && IsRCXLiveIn)
  865. RDXShadowSlot += 8;
  866. // Emit the saves if needed.
  867. if (IsRCXLiveIn)
  868. addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
  869. RCXShadowSlot)
  870. .addReg(X86::RCX);
  871. if (IsRDXLiveIn)
  872. addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
  873. RDXShadowSlot)
  874. .addReg(X86::RDX);
  875. } else {
  876. // Not in the prolog. Copy RAX to a virtual reg.
  877. BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
  878. }
  879. // Add code to MBB to check for overflow and set the new target stack pointer
  880. // to zero if so.
  881. BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
  882. .addReg(ZeroReg, RegState::Undef)
  883. .addReg(ZeroReg, RegState::Undef);
  884. BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
  885. BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
  886. .addReg(CopyReg)
  887. .addReg(SizeReg);
  888. BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
  889. .addReg(TestReg)
  890. .addReg(ZeroReg)
  891. .addImm(X86::COND_B);
  892. // FinalReg now holds final stack pointer value, or zero if
  893. // allocation would overflow. Compare against the current stack
  894. // limit from the thread environment block. Note this limit is the
  895. // lowest touched page on the stack, not the point at which the OS
  896. // will cause an overflow exception, so this is just an optimization
  897. // to avoid unnecessarily touching pages that are below the current
  898. // SP but already committed to the stack by the OS.
  899. BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
  900. .addReg(0)
  901. .addImm(1)
  902. .addReg(0)
  903. .addImm(ThreadEnvironmentStackLimit)
  904. .addReg(X86::GS);
  905. BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
  906. // Jump if the desired stack pointer is at or above the stack limit.
  907. BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
  908. // Add code to roundMBB to round the final stack pointer to a page boundary.
  909. RoundMBB->addLiveIn(FinalReg);
  910. BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
  911. .addReg(FinalReg)
  912. .addImm(PageMask);
  913. BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
  914. // LimitReg now holds the current stack limit, RoundedReg page-rounded
  915. // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
  916. // and probe until we reach RoundedReg.
  917. if (!InProlog) {
  918. BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
  919. .addReg(LimitReg)
  920. .addMBB(RoundMBB)
  921. .addReg(ProbeReg)
  922. .addMBB(LoopMBB);
  923. }
  924. LoopMBB->addLiveIn(JoinReg);
  925. addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
  926. false, -PageSize);
  927. // Probe by storing a byte onto the stack.
  928. BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
  929. .addReg(ProbeReg)
  930. .addImm(1)
  931. .addReg(0)
  932. .addImm(0)
  933. .addReg(0)
  934. .addImm(0);
  935. LoopMBB->addLiveIn(RoundedReg);
  936. BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
  937. .addReg(RoundedReg)
  938. .addReg(ProbeReg);
  939. BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
  940. MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
  941. // If in prolog, restore RDX and RCX.
  942. if (InProlog) {
  943. if (RCXShadowSlot) // It means we spilled RCX in the prologue.
  944. addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
  945. TII.get(X86::MOV64rm), X86::RCX),
  946. X86::RSP, false, RCXShadowSlot);
  947. if (RDXShadowSlot) // It means we spilled RDX in the prologue.
  948. addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
  949. TII.get(X86::MOV64rm), X86::RDX),
  950. X86::RSP, false, RDXShadowSlot);
  951. }
  952. // Now that the probing is done, add code to continueMBB to update
  953. // the stack pointer for real.
  954. ContinueMBB->addLiveIn(SizeReg);
  955. BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
  956. .addReg(X86::RSP)
  957. .addReg(SizeReg);
  958. // Add the control flow edges we need.
  959. MBB.addSuccessor(ContinueMBB);
  960. MBB.addSuccessor(RoundMBB);
  961. RoundMBB->addSuccessor(LoopMBB);
  962. LoopMBB->addSuccessor(ContinueMBB);
  963. LoopMBB->addSuccessor(LoopMBB);
  964. // Mark all the instructions added to the prolog as frame setup.
  965. if (InProlog) {
  966. for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
  967. BeforeMBBI->setFlag(MachineInstr::FrameSetup);
  968. }
  969. for (MachineInstr &MI : *RoundMBB) {
  970. MI.setFlag(MachineInstr::FrameSetup);
  971. }
  972. for (MachineInstr &MI : *LoopMBB) {
  973. MI.setFlag(MachineInstr::FrameSetup);
  974. }
  975. for (MachineInstr &MI :
  976. llvm::make_range(ContinueMBB->begin(), ContinueMBBI)) {
  977. MI.setFlag(MachineInstr::FrameSetup);
  978. }
  979. }
  980. }
  981. void X86FrameLowering::emitStackProbeCall(
  982. MachineFunction &MF, MachineBasicBlock &MBB,
  983. MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
  984. std::optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
  985. bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
  986. // FIXME: Add indirect thunk support and remove this.
  987. if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
  988. report_fatal_error("Emitting stack probe calls on 64-bit with the large "
  989. "code model and indirect thunks not yet implemented.");
  990. assert(MBB.computeRegisterLiveness(TRI, X86::EFLAGS, MBBI) !=
  991. MachineBasicBlock::LQR_Live &&
  992. "Stack probe calls will clobber live EFLAGS.");
  993. unsigned CallOp;
  994. if (Is64Bit)
  995. CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
  996. else
  997. CallOp = X86::CALLpcrel32;
  998. StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
  999. MachineInstrBuilder CI;
  1000. MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
  1001. // All current stack probes take AX and SP as input, clobber flags, and
  1002. // preserve all registers. x86_64 probes leave RSP unmodified.
  1003. if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
  1004. // For the large code model, we have to call through a register. Use R11,
  1005. // as it is scratch in all supported calling conventions.
  1006. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
  1007. .addExternalSymbol(MF.createExternalSymbolName(Symbol));
  1008. CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
  1009. } else {
  1010. CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
  1011. .addExternalSymbol(MF.createExternalSymbolName(Symbol));
  1012. }
  1013. unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
  1014. unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
  1015. CI.addReg(AX, RegState::Implicit)
  1016. .addReg(SP, RegState::Implicit)
  1017. .addReg(AX, RegState::Define | RegState::Implicit)
  1018. .addReg(SP, RegState::Define | RegState::Implicit)
  1019. .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
  1020. MachineInstr *ModInst = CI;
  1021. if (STI.isTargetWin64() || !STI.isOSWindows()) {
  1022. // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
  1023. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
  1024. // themselves. They also does not clobber %rax so we can reuse it when
  1025. // adjusting %rsp.
  1026. // All other platforms do not specify a particular ABI for the stack probe
  1027. // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
  1028. ModInst =
  1029. BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
  1030. .addReg(SP)
  1031. .addReg(AX);
  1032. }
  1033. // DebugInfo variable locations -- if there's an instruction number for the
  1034. // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
  1035. // modifies SP.
  1036. if (InstrNum) {
  1037. if (STI.isTargetWin64() || !STI.isOSWindows()) {
  1038. // Label destination operand of the subtract.
  1039. MF.makeDebugValueSubstitution(*InstrNum,
  1040. {ModInst->getDebugInstrNum(), 0});
  1041. } else {
  1042. // Label the call. The operand number is the penultimate operand, zero
  1043. // based.
  1044. unsigned SPDefOperand = ModInst->getNumOperands() - 2;
  1045. MF.makeDebugValueSubstitution(
  1046. *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
  1047. }
  1048. }
  1049. if (InProlog) {
  1050. // Apply the frame setup flag to all inserted instrs.
  1051. for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
  1052. ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
  1053. }
  1054. }
  1055. static unsigned calculateSetFPREG(uint64_t SPAdjust) {
  1056. // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
  1057. // and might require smaller successive adjustments.
  1058. const uint64_t Win64MaxSEHOffset = 128;
  1059. uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
  1060. // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
  1061. return SEHFrameOffset & -16;
  1062. }
  1063. // If we're forcing a stack realignment we can't rely on just the frame
  1064. // info, we need to know the ABI stack alignment as well in case we
  1065. // have a call out. Otherwise just make sure we have some alignment - we'll
  1066. // go with the minimum SlotSize.
  1067. uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
  1068. const MachineFrameInfo &MFI = MF.getFrameInfo();
  1069. Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
  1070. Align StackAlign = getStackAlign();
  1071. if (MF.getFunction().hasFnAttribute("stackrealign")) {
  1072. if (MFI.hasCalls())
  1073. MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
  1074. else if (MaxAlign < SlotSize)
  1075. MaxAlign = Align(SlotSize);
  1076. }
  1077. return MaxAlign.value();
  1078. }
  1079. void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
  1080. MachineBasicBlock::iterator MBBI,
  1081. const DebugLoc &DL, unsigned Reg,
  1082. uint64_t MaxAlign) const {
  1083. uint64_t Val = -MaxAlign;
  1084. unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
  1085. MachineFunction &MF = *MBB.getParent();
  1086. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  1087. const X86TargetLowering &TLI = *STI.getTargetLowering();
  1088. const uint64_t StackProbeSize = TLI.getStackProbeSize(MF);
  1089. const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF);
  1090. // We want to make sure that (in worst case) less than StackProbeSize bytes
  1091. // are not probed after the AND. This assumption is used in
  1092. // emitStackProbeInlineGeneric.
  1093. if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
  1094. {
  1095. NumFrameLoopProbe++;
  1096. MachineBasicBlock *entryMBB =
  1097. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1098. MachineBasicBlock *headMBB =
  1099. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1100. MachineBasicBlock *bodyMBB =
  1101. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1102. MachineBasicBlock *footMBB =
  1103. MF.CreateMachineBasicBlock(MBB.getBasicBlock());
  1104. MachineFunction::iterator MBBIter = MBB.getIterator();
  1105. MF.insert(MBBIter, entryMBB);
  1106. MF.insert(MBBIter, headMBB);
  1107. MF.insert(MBBIter, bodyMBB);
  1108. MF.insert(MBBIter, footMBB);
  1109. const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
  1110. Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
  1111. : Is64Bit ? X86::R11D
  1112. : X86::EAX;
  1113. // Setup entry block
  1114. {
  1115. entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI);
  1116. BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
  1117. .addReg(StackPtr)
  1118. .setMIFlag(MachineInstr::FrameSetup);
  1119. MachineInstr *MI =
  1120. BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed)
  1121. .addReg(FinalStackProbed)
  1122. .addImm(Val)
  1123. .setMIFlag(MachineInstr::FrameSetup);
  1124. // The EFLAGS implicit def is dead.
  1125. MI->getOperand(3).setIsDead();
  1126. BuildMI(entryMBB, DL,
  1127. TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  1128. .addReg(FinalStackProbed)
  1129. .addReg(StackPtr)
  1130. .setMIFlag(MachineInstr::FrameSetup);
  1131. BuildMI(entryMBB, DL, TII.get(X86::JCC_1))
  1132. .addMBB(&MBB)
  1133. .addImm(X86::COND_E)
  1134. .setMIFlag(MachineInstr::FrameSetup);
  1135. entryMBB->addSuccessor(headMBB);
  1136. entryMBB->addSuccessor(&MBB);
  1137. }
  1138. // Loop entry block
  1139. {
  1140. const unsigned SUBOpc =
  1141. getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
  1142. BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
  1143. .addReg(StackPtr)
  1144. .addImm(StackProbeSize)
  1145. .setMIFlag(MachineInstr::FrameSetup);
  1146. BuildMI(headMBB, DL,
  1147. TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  1148. .addReg(StackPtr)
  1149. .addReg(FinalStackProbed)
  1150. .setMIFlag(MachineInstr::FrameSetup);
  1151. // jump to the footer if StackPtr < FinalStackProbed
  1152. BuildMI(headMBB, DL, TII.get(X86::JCC_1))
  1153. .addMBB(footMBB)
  1154. .addImm(X86::COND_B)
  1155. .setMIFlag(MachineInstr::FrameSetup);
  1156. headMBB->addSuccessor(bodyMBB);
  1157. headMBB->addSuccessor(footMBB);
  1158. }
  1159. // setup loop body
  1160. {
  1161. addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc))
  1162. .setMIFlag(MachineInstr::FrameSetup),
  1163. StackPtr, false, 0)
  1164. .addImm(0)
  1165. .setMIFlag(MachineInstr::FrameSetup);
  1166. const unsigned SUBOpc =
  1167. getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
  1168. BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
  1169. .addReg(StackPtr)
  1170. .addImm(StackProbeSize)
  1171. .setMIFlag(MachineInstr::FrameSetup);
  1172. // cmp with stack pointer bound
  1173. BuildMI(bodyMBB, DL,
  1174. TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
  1175. .addReg(FinalStackProbed)
  1176. .addReg(StackPtr)
  1177. .setMIFlag(MachineInstr::FrameSetup);
  1178. // jump back while FinalStackProbed < StackPtr
  1179. BuildMI(bodyMBB, DL, TII.get(X86::JCC_1))
  1180. .addMBB(bodyMBB)
  1181. .addImm(X86::COND_B)
  1182. .setMIFlag(MachineInstr::FrameSetup);
  1183. bodyMBB->addSuccessor(bodyMBB);
  1184. bodyMBB->addSuccessor(footMBB);
  1185. }
  1186. // setup loop footer
  1187. {
  1188. BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr)
  1189. .addReg(FinalStackProbed)
  1190. .setMIFlag(MachineInstr::FrameSetup);
  1191. addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc))
  1192. .setMIFlag(MachineInstr::FrameSetup),
  1193. StackPtr, false, 0)
  1194. .addImm(0)
  1195. .setMIFlag(MachineInstr::FrameSetup);
  1196. footMBB->addSuccessor(&MBB);
  1197. }
  1198. recomputeLiveIns(*headMBB);
  1199. recomputeLiveIns(*bodyMBB);
  1200. recomputeLiveIns(*footMBB);
  1201. recomputeLiveIns(MBB);
  1202. }
  1203. } else {
  1204. MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
  1205. .addReg(Reg)
  1206. .addImm(Val)
  1207. .setMIFlag(MachineInstr::FrameSetup);
  1208. // The EFLAGS implicit def is dead.
  1209. MI->getOperand(3).setIsDead();
  1210. }
  1211. }
  1212. bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
  1213. // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
  1214. // clobbered by any interrupt handler.
  1215. assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
  1216. "MF used frame lowering for wrong subtarget");
  1217. const Function &Fn = MF.getFunction();
  1218. const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
  1219. return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
  1220. }
  1221. /// Return true if we need to use the restricted Windows x64 prologue and
  1222. /// epilogue code patterns that can be described with WinCFI (.seh_*
  1223. /// directives).
  1224. bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const {
  1225. return MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  1226. }
  1227. bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
  1228. return !isWin64Prologue(MF) && MF.needsFrameMoves();
  1229. }
  1230. /// emitPrologue - Push callee-saved registers onto the stack, which
  1231. /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
  1232. /// space for local variables. Also emit labels used by the exception handler to
  1233. /// generate the exception handling frames.
  1234. /*
  1235. Here's a gist of what gets emitted:
  1236. ; Establish frame pointer, if needed
  1237. [if needs FP]
  1238. push %rbp
  1239. .cfi_def_cfa_offset 16
  1240. .cfi_offset %rbp, -16
  1241. .seh_pushreg %rpb
  1242. mov %rsp, %rbp
  1243. .cfi_def_cfa_register %rbp
  1244. ; Spill general-purpose registers
  1245. [for all callee-saved GPRs]
  1246. pushq %<reg>
  1247. [if not needs FP]
  1248. .cfi_def_cfa_offset (offset from RETADDR)
  1249. .seh_pushreg %<reg>
  1250. ; If the required stack alignment > default stack alignment
  1251. ; rsp needs to be re-aligned. This creates a "re-alignment gap"
  1252. ; of unknown size in the stack frame.
  1253. [if stack needs re-alignment]
  1254. and $MASK, %rsp
  1255. ; Allocate space for locals
  1256. [if target is Windows and allocated space > 4096 bytes]
  1257. ; Windows needs special care for allocations larger
  1258. ; than one page.
  1259. mov $NNN, %rax
  1260. call ___chkstk_ms/___chkstk
  1261. sub %rax, %rsp
  1262. [else]
  1263. sub $NNN, %rsp
  1264. [if needs FP]
  1265. .seh_stackalloc (size of XMM spill slots)
  1266. .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
  1267. [else]
  1268. .seh_stackalloc NNN
  1269. ; Spill XMMs
  1270. ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
  1271. ; they may get spilled on any platform, if the current function
  1272. ; calls @llvm.eh.unwind.init
  1273. [if needs FP]
  1274. [for all callee-saved XMM registers]
  1275. movaps %<xmm reg>, -MMM(%rbp)
  1276. [for all callee-saved XMM registers]
  1277. .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
  1278. ; i.e. the offset relative to (%rbp - SEHFrameOffset)
  1279. [else]
  1280. [for all callee-saved XMM registers]
  1281. movaps %<xmm reg>, KKK(%rsp)
  1282. [for all callee-saved XMM registers]
  1283. .seh_savexmm %<xmm reg>, KKK
  1284. .seh_endprologue
  1285. [if needs base pointer]
  1286. mov %rsp, %rbx
  1287. [if needs to restore base pointer]
  1288. mov %rsp, -MMM(%rbp)
  1289. ; Emit CFI info
  1290. [if needs FP]
  1291. [for all callee-saved registers]
  1292. .cfi_offset %<reg>, (offset from %rbp)
  1293. [else]
  1294. .cfi_def_cfa_offset (offset from RETADDR)
  1295. [for all callee-saved registers]
  1296. .cfi_offset %<reg>, (offset from %rsp)
  1297. Notes:
  1298. - .seh directives are emitted only for Windows 64 ABI
  1299. - .cv_fpo directives are emitted on win32 when emitting CodeView
  1300. - .cfi directives are emitted for all other ABIs
  1301. - for 32-bit code, substitute %e?? registers for %r??
  1302. */
  1303. void X86FrameLowering::emitPrologue(MachineFunction &MF,
  1304. MachineBasicBlock &MBB) const {
  1305. assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
  1306. "MF used frame lowering for wrong subtarget");
  1307. MachineBasicBlock::iterator MBBI = MBB.begin();
  1308. MachineFrameInfo &MFI = MF.getFrameInfo();
  1309. const Function &Fn = MF.getFunction();
  1310. MachineModuleInfo &MMI = MF.getMMI();
  1311. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  1312. uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
  1313. uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate.
  1314. bool IsFunclet = MBB.isEHFuncletEntry();
  1315. EHPersonality Personality = EHPersonality::Unknown;
  1316. if (Fn.hasPersonalityFn())
  1317. Personality = classifyEHPersonality(Fn.getPersonalityFn());
  1318. bool FnHasClrFunclet =
  1319. MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
  1320. bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
  1321. bool HasFP = hasFP(MF);
  1322. bool IsWin64Prologue = isWin64Prologue(MF);
  1323. bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
  1324. // FIXME: Emit FPO data for EH funclets.
  1325. bool NeedsWinFPO =
  1326. !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
  1327. bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
  1328. bool NeedsDwarfCFI = needsDwarfCFI(MF);
  1329. Register FramePtr = TRI->getFrameRegister(MF);
  1330. const Register MachineFramePtr =
  1331. STI.isTarget64BitILP32()
  1332. ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
  1333. Register BasePtr = TRI->getBaseRegister();
  1334. bool HasWinCFI = false;
  1335. // Debug location must be unknown since the first debug location is used
  1336. // to determine the end of the prologue.
  1337. DebugLoc DL;
  1338. // Space reserved for stack-based arguments when making a (ABI-guaranteed)
  1339. // tail call.
  1340. unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
  1341. if (TailCallArgReserveSize && IsWin64Prologue)
  1342. report_fatal_error("Can't handle guaranteed tail call under win64 yet");
  1343. const bool EmitStackProbeCall =
  1344. STI.getTargetLowering()->hasStackProbeSymbol(MF);
  1345. unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
  1346. if (HasFP && X86FI->hasSwiftAsyncContext()) {
  1347. switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
  1348. case SwiftAsyncFramePointerMode::DeploymentBased:
  1349. if (STI.swiftAsyncContextIsDynamicallySet()) {
  1350. // The special symbol below is absolute and has a *value* suitable to be
  1351. // combined with the frame pointer directly.
  1352. BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
  1353. .addUse(MachineFramePtr)
  1354. .addUse(X86::RIP)
  1355. .addImm(1)
  1356. .addUse(X86::NoRegister)
  1357. .addExternalSymbol("swift_async_extendedFramePointerFlags",
  1358. X86II::MO_GOTPCREL)
  1359. .addUse(X86::NoRegister);
  1360. break;
  1361. }
  1362. [[fallthrough]];
  1363. case SwiftAsyncFramePointerMode::Always:
  1364. BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
  1365. .addUse(MachineFramePtr)
  1366. .addImm(60)
  1367. .setMIFlag(MachineInstr::FrameSetup);
  1368. break;
  1369. case SwiftAsyncFramePointerMode::Never:
  1370. break;
  1371. }
  1372. }
  1373. // Re-align the stack on 64-bit if the x86-interrupt calling convention is
  1374. // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
  1375. // stack alignment.
  1376. if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit &&
  1377. Fn.arg_size() == 2) {
  1378. StackSize += 8;
  1379. MFI.setStackSize(StackSize);
  1380. emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false);
  1381. }
  1382. // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
  1383. // function, and use up to 128 bytes of stack space, don't have a frame
  1384. // pointer, calls, or dynamic alloca then we do not need to adjust the
  1385. // stack pointer (we fit in the Red Zone). We also check that we don't
  1386. // push and pop from the stack.
  1387. if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) &&
  1388. !MFI.hasVarSizedObjects() && // No dynamic alloca.
  1389. !MFI.adjustsStack() && // No calls.
  1390. !EmitStackProbeCall && // No stack probes.
  1391. !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
  1392. !MF.shouldSplitStack()) { // Regular stack
  1393. uint64_t MinSize =
  1394. X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta();
  1395. if (HasFP) MinSize += SlotSize;
  1396. X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
  1397. StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
  1398. MFI.setStackSize(StackSize);
  1399. }
  1400. // Insert stack pointer adjustment for later moving of return addr. Only
  1401. // applies to tail call optimized functions where the callee argument stack
  1402. // size is bigger than the callers.
  1403. if (TailCallArgReserveSize != 0) {
  1404. BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
  1405. /*InEpilogue=*/false)
  1406. .setMIFlag(MachineInstr::FrameSetup);
  1407. }
  1408. // Mapping for machine moves:
  1409. //
  1410. // DST: VirtualFP AND
  1411. // SRC: VirtualFP => DW_CFA_def_cfa_offset
  1412. // ELSE => DW_CFA_def_cfa
  1413. //
  1414. // SRC: VirtualFP AND
  1415. // DST: Register => DW_CFA_def_cfa_register
  1416. //
  1417. // ELSE
  1418. // OFFSET < 0 => DW_CFA_offset_extended_sf
  1419. // REG < 64 => DW_CFA_offset + Reg
  1420. // ELSE => DW_CFA_offset_extended
  1421. uint64_t NumBytes = 0;
  1422. int stackGrowth = -SlotSize;
  1423. // Find the funclet establisher parameter
  1424. Register Establisher = X86::NoRegister;
  1425. if (IsClrFunclet)
  1426. Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
  1427. else if (IsFunclet)
  1428. Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
  1429. if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
  1430. // Immediately spill establisher into the home slot.
  1431. // The runtime cares about this.
  1432. // MOV64mr %rdx, 16(%rsp)
  1433. unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
  1434. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
  1435. .addReg(Establisher)
  1436. .setMIFlag(MachineInstr::FrameSetup);
  1437. MBB.addLiveIn(Establisher);
  1438. }
  1439. if (HasFP) {
  1440. assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
  1441. // Calculate required stack adjustment.
  1442. uint64_t FrameSize = StackSize - SlotSize;
  1443. // If required, include space for extra hidden slot for stashing base pointer.
  1444. if (X86FI->getRestoreBasePointer())
  1445. FrameSize += SlotSize;
  1446. NumBytes = FrameSize -
  1447. (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
  1448. // Callee-saved registers are pushed on stack before the stack is realigned.
  1449. if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
  1450. NumBytes = alignTo(NumBytes, MaxAlign);
  1451. // Save EBP/RBP into the appropriate stack slot.
  1452. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
  1453. .addReg(MachineFramePtr, RegState::Kill)
  1454. .setMIFlag(MachineInstr::FrameSetup);
  1455. if (NeedsDwarfCFI) {
  1456. // Mark the place where EBP/RBP was saved.
  1457. // Define the current CFA rule to use the provided offset.
  1458. assert(StackSize);
  1459. BuildCFI(MBB, MBBI, DL,
  1460. MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth),
  1461. MachineInstr::FrameSetup);
  1462. // Change the rule for the FramePtr to be an "offset" rule.
  1463. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
  1464. BuildCFI(MBB, MBBI, DL,
  1465. MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
  1466. 2 * stackGrowth),
  1467. MachineInstr::FrameSetup);
  1468. }
  1469. if (NeedsWinCFI) {
  1470. HasWinCFI = true;
  1471. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
  1472. .addImm(FramePtr)
  1473. .setMIFlag(MachineInstr::FrameSetup);
  1474. }
  1475. if (!IsFunclet) {
  1476. if (X86FI->hasSwiftAsyncContext()) {
  1477. const auto &Attrs = MF.getFunction().getAttributes();
  1478. // Before we update the live frame pointer we have to ensure there's a
  1479. // valid (or null) asynchronous context in its slot just before FP in
  1480. // the frame record, so store it now.
  1481. if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
  1482. // We have an initial context in r14, store it just before the frame
  1483. // pointer.
  1484. MBB.addLiveIn(X86::R14);
  1485. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
  1486. .addReg(X86::R14)
  1487. .setMIFlag(MachineInstr::FrameSetup);
  1488. } else {
  1489. // No initial context, store null so that there's no pointer that
  1490. // could be misused.
  1491. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8))
  1492. .addImm(0)
  1493. .setMIFlag(MachineInstr::FrameSetup);
  1494. }
  1495. if (NeedsWinCFI) {
  1496. HasWinCFI = true;
  1497. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
  1498. .addImm(X86::R14)
  1499. .setMIFlag(MachineInstr::FrameSetup);
  1500. }
  1501. BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
  1502. .addUse(X86::RSP)
  1503. .addImm(1)
  1504. .addUse(X86::NoRegister)
  1505. .addImm(8)
  1506. .addUse(X86::NoRegister)
  1507. .setMIFlag(MachineInstr::FrameSetup);
  1508. BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
  1509. .addUse(X86::RSP)
  1510. .addImm(8)
  1511. .setMIFlag(MachineInstr::FrameSetup);
  1512. }
  1513. if (!IsWin64Prologue && !IsFunclet) {
  1514. // Update EBP with the new base value.
  1515. if (!X86FI->hasSwiftAsyncContext())
  1516. BuildMI(MBB, MBBI, DL,
  1517. TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
  1518. FramePtr)
  1519. .addReg(StackPtr)
  1520. .setMIFlag(MachineInstr::FrameSetup);
  1521. if (NeedsDwarfCFI) {
  1522. // Mark effective beginning of when frame pointer becomes valid.
  1523. // Define the current CFA to use the EBP/RBP register.
  1524. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
  1525. BuildCFI(
  1526. MBB, MBBI, DL,
  1527. MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
  1528. MachineInstr::FrameSetup);
  1529. }
  1530. if (NeedsWinFPO) {
  1531. // .cv_fpo_setframe $FramePtr
  1532. HasWinCFI = true;
  1533. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
  1534. .addImm(FramePtr)
  1535. .addImm(0)
  1536. .setMIFlag(MachineInstr::FrameSetup);
  1537. }
  1538. }
  1539. }
  1540. } else {
  1541. assert(!IsFunclet && "funclets without FPs not yet implemented");
  1542. NumBytes = StackSize -
  1543. (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
  1544. }
  1545. // Update the offset adjustment, which is mainly used by codeview to translate
  1546. // from ESP to VFRAME relative local variable offsets.
  1547. if (!IsFunclet) {
  1548. if (HasFP && TRI->hasStackRealignment(MF))
  1549. MFI.setOffsetAdjustment(-NumBytes);
  1550. else
  1551. MFI.setOffsetAdjustment(-StackSize);
  1552. }
  1553. // For EH funclets, only allocate enough space for outgoing calls. Save the
  1554. // NumBytes value that we would've used for the parent frame.
  1555. unsigned ParentFrameNumBytes = NumBytes;
  1556. if (IsFunclet)
  1557. NumBytes = getWinEHFuncletFrameSize(MF);
  1558. // Skip the callee-saved push instructions.
  1559. bool PushedRegs = false;
  1560. int StackOffset = 2 * stackGrowth;
  1561. while (MBBI != MBB.end() &&
  1562. MBBI->getFlag(MachineInstr::FrameSetup) &&
  1563. (MBBI->getOpcode() == X86::PUSH32r ||
  1564. MBBI->getOpcode() == X86::PUSH64r)) {
  1565. PushedRegs = true;
  1566. Register Reg = MBBI->getOperand(0).getReg();
  1567. ++MBBI;
  1568. if (!HasFP && NeedsDwarfCFI) {
  1569. // Mark callee-saved push instruction.
  1570. // Define the current CFA rule to use the provided offset.
  1571. assert(StackSize);
  1572. BuildCFI(MBB, MBBI, DL,
  1573. MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset),
  1574. MachineInstr::FrameSetup);
  1575. StackOffset += stackGrowth;
  1576. }
  1577. if (NeedsWinCFI) {
  1578. HasWinCFI = true;
  1579. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
  1580. .addImm(Reg)
  1581. .setMIFlag(MachineInstr::FrameSetup);
  1582. }
  1583. }
  1584. // Realign stack after we pushed callee-saved registers (so that we'll be
  1585. // able to calculate their offsets from the frame pointer).
  1586. // Don't do this for Win64, it needs to realign the stack after the prologue.
  1587. if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) {
  1588. assert(HasFP && "There should be a frame pointer if stack is realigned.");
  1589. BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
  1590. if (NeedsWinCFI) {
  1591. HasWinCFI = true;
  1592. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
  1593. .addImm(MaxAlign)
  1594. .setMIFlag(MachineInstr::FrameSetup);
  1595. }
  1596. }
  1597. // If there is an SUB32ri of ESP immediately before this instruction, merge
  1598. // the two. This can be the case when tail call elimination is enabled and
  1599. // the callee has more arguments then the caller.
  1600. NumBytes -= mergeSPUpdates(MBB, MBBI, true);
  1601. // Adjust stack pointer: ESP -= numbytes.
  1602. // Windows and cygwin/mingw require a prologue helper routine when allocating
  1603. // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
  1604. // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
  1605. // stack and adjust the stack pointer in one go. The 64-bit version of
  1606. // __chkstk is only responsible for probing the stack. The 64-bit prologue is
  1607. // responsible for adjusting the stack pointer. Touching the stack at 4K
  1608. // increments is necessary to ensure that the guard pages used by the OS
  1609. // virtual memory manager are allocated in correct sequence.
  1610. uint64_t AlignedNumBytes = NumBytes;
  1611. if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF))
  1612. AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
  1613. if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) {
  1614. assert(!X86FI->getUsesRedZone() &&
  1615. "The Red Zone is not accounted for in stack probes");
  1616. // Check whether EAX is livein for this block.
  1617. bool isEAXAlive = isEAXLiveIn(MBB);
  1618. if (isEAXAlive) {
  1619. if (Is64Bit) {
  1620. // Save RAX
  1621. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
  1622. .addReg(X86::RAX, RegState::Kill)
  1623. .setMIFlag(MachineInstr::FrameSetup);
  1624. } else {
  1625. // Save EAX
  1626. BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
  1627. .addReg(X86::EAX, RegState::Kill)
  1628. .setMIFlag(MachineInstr::FrameSetup);
  1629. }
  1630. }
  1631. if (Is64Bit) {
  1632. // Handle the 64-bit Windows ABI case where we need to call __chkstk.
  1633. // Function prologue is responsible for adjusting the stack pointer.
  1634. int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
  1635. BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX)
  1636. .addImm(Alloc)
  1637. .setMIFlag(MachineInstr::FrameSetup);
  1638. } else {
  1639. // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
  1640. // We'll also use 4 already allocated bytes for EAX.
  1641. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
  1642. .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
  1643. .setMIFlag(MachineInstr::FrameSetup);
  1644. }
  1645. // Call __chkstk, __chkstk_ms, or __alloca.
  1646. emitStackProbe(MF, MBB, MBBI, DL, true);
  1647. if (isEAXAlive) {
  1648. // Restore RAX/EAX
  1649. MachineInstr *MI;
  1650. if (Is64Bit)
  1651. MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
  1652. StackPtr, false, NumBytes - 8);
  1653. else
  1654. MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
  1655. StackPtr, false, NumBytes - 4);
  1656. MI->setFlag(MachineInstr::FrameSetup);
  1657. MBB.insert(MBBI, MI);
  1658. }
  1659. } else if (NumBytes) {
  1660. emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
  1661. }
  1662. if (NeedsWinCFI && NumBytes) {
  1663. HasWinCFI = true;
  1664. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
  1665. .addImm(NumBytes)
  1666. .setMIFlag(MachineInstr::FrameSetup);
  1667. }
  1668. int SEHFrameOffset = 0;
  1669. unsigned SPOrEstablisher;
  1670. if (IsFunclet) {
  1671. if (IsClrFunclet) {
  1672. // The establisher parameter passed to a CLR funclet is actually a pointer
  1673. // to the (mostly empty) frame of its nearest enclosing funclet; we have
  1674. // to find the root function establisher frame by loading the PSPSym from
  1675. // the intermediate frame.
  1676. unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
  1677. MachinePointerInfo NoInfo;
  1678. MBB.addLiveIn(Establisher);
  1679. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
  1680. Establisher, false, PSPSlotOffset)
  1681. .addMemOperand(MF.getMachineMemOperand(
  1682. NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize)));
  1683. ;
  1684. // Save the root establisher back into the current funclet's (mostly
  1685. // empty) frame, in case a sub-funclet or the GC needs it.
  1686. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
  1687. false, PSPSlotOffset)
  1688. .addReg(Establisher)
  1689. .addMemOperand(MF.getMachineMemOperand(
  1690. NoInfo,
  1691. MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
  1692. SlotSize, Align(SlotSize)));
  1693. }
  1694. SPOrEstablisher = Establisher;
  1695. } else {
  1696. SPOrEstablisher = StackPtr;
  1697. }
  1698. if (IsWin64Prologue && HasFP) {
  1699. // Set RBP to a small fixed offset from RSP. In the funclet case, we base
  1700. // this calculation on the incoming establisher, which holds the value of
  1701. // RSP from the parent frame at the end of the prologue.
  1702. SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
  1703. if (SEHFrameOffset)
  1704. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
  1705. SPOrEstablisher, false, SEHFrameOffset);
  1706. else
  1707. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
  1708. .addReg(SPOrEstablisher);
  1709. // If this is not a funclet, emit the CFI describing our frame pointer.
  1710. if (NeedsWinCFI && !IsFunclet) {
  1711. assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
  1712. HasWinCFI = true;
  1713. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
  1714. .addImm(FramePtr)
  1715. .addImm(SEHFrameOffset)
  1716. .setMIFlag(MachineInstr::FrameSetup);
  1717. if (isAsynchronousEHPersonality(Personality))
  1718. MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
  1719. }
  1720. } else if (IsFunclet && STI.is32Bit()) {
  1721. // Reset EBP / ESI to something good for funclets.
  1722. MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
  1723. // If we're a catch funclet, we can be returned to via catchret. Save ESP
  1724. // into the registration node so that the runtime will restore it for us.
  1725. if (!MBB.isCleanupFuncletEntry()) {
  1726. assert(Personality == EHPersonality::MSVC_CXX);
  1727. Register FrameReg;
  1728. int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
  1729. int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed();
  1730. // ESP is the first field, so no extra displacement is needed.
  1731. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
  1732. false, EHRegOffset)
  1733. .addReg(X86::ESP);
  1734. }
  1735. }
  1736. while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
  1737. const MachineInstr &FrameInstr = *MBBI;
  1738. ++MBBI;
  1739. if (NeedsWinCFI) {
  1740. int FI;
  1741. if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
  1742. if (X86::FR64RegClass.contains(Reg)) {
  1743. int Offset;
  1744. Register IgnoredFrameReg;
  1745. if (IsWin64Prologue && IsFunclet)
  1746. Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
  1747. else
  1748. Offset =
  1749. getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() +
  1750. SEHFrameOffset;
  1751. HasWinCFI = true;
  1752. assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
  1753. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
  1754. .addImm(Reg)
  1755. .addImm(Offset)
  1756. .setMIFlag(MachineInstr::FrameSetup);
  1757. }
  1758. }
  1759. }
  1760. }
  1761. if (NeedsWinCFI && HasWinCFI)
  1762. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
  1763. .setMIFlag(MachineInstr::FrameSetup);
  1764. if (FnHasClrFunclet && !IsFunclet) {
  1765. // Save the so-called Initial-SP (i.e. the value of the stack pointer
  1766. // immediately after the prolog) into the PSPSlot so that funclets
  1767. // and the GC can recover it.
  1768. unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
  1769. auto PSPInfo = MachinePointerInfo::getFixedStack(
  1770. MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx);
  1771. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
  1772. PSPSlotOffset)
  1773. .addReg(StackPtr)
  1774. .addMemOperand(MF.getMachineMemOperand(
  1775. PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
  1776. SlotSize, Align(SlotSize)));
  1777. }
  1778. // Realign stack after we spilled callee-saved registers (so that we'll be
  1779. // able to calculate their offsets from the frame pointer).
  1780. // Win64 requires aligning the stack after the prologue.
  1781. if (IsWin64Prologue && TRI->hasStackRealignment(MF)) {
  1782. assert(HasFP && "There should be a frame pointer if stack is realigned.");
  1783. BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
  1784. }
  1785. // We already dealt with stack realignment and funclets above.
  1786. if (IsFunclet && STI.is32Bit())
  1787. return;
  1788. // If we need a base pointer, set it up here. It's whatever the value
  1789. // of the stack pointer is at this point. Any variable size objects
  1790. // will be allocated after this, so we can still use the base pointer
  1791. // to reference locals.
  1792. if (TRI->hasBasePointer(MF)) {
  1793. // Update the base pointer with the current stack pointer.
  1794. unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
  1795. BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
  1796. .addReg(SPOrEstablisher)
  1797. .setMIFlag(MachineInstr::FrameSetup);
  1798. if (X86FI->getRestoreBasePointer()) {
  1799. // Stash value of base pointer. Saving RSP instead of EBP shortens
  1800. // dependence chain. Used by SjLj EH.
  1801. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
  1802. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
  1803. FramePtr, true, X86FI->getRestoreBasePointerOffset())
  1804. .addReg(SPOrEstablisher)
  1805. .setMIFlag(MachineInstr::FrameSetup);
  1806. }
  1807. if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
  1808. // Stash the value of the frame pointer relative to the base pointer for
  1809. // Win32 EH. This supports Win32 EH, which does the inverse of the above:
  1810. // it recovers the frame pointer from the base pointer rather than the
  1811. // other way around.
  1812. unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
  1813. Register UsedReg;
  1814. int Offset =
  1815. getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
  1816. .getFixed();
  1817. assert(UsedReg == BasePtr);
  1818. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
  1819. .addReg(FramePtr)
  1820. .setMIFlag(MachineInstr::FrameSetup);
  1821. }
  1822. }
  1823. if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
  1824. // Mark end of stack pointer adjustment.
  1825. if (!HasFP && NumBytes) {
  1826. // Define the current CFA rule to use the provided offset.
  1827. assert(StackSize);
  1828. BuildCFI(
  1829. MBB, MBBI, DL,
  1830. MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
  1831. MachineInstr::FrameSetup);
  1832. }
  1833. // Emit DWARF info specifying the offsets of the callee-saved registers.
  1834. emitCalleeSavedFrameMoves(MBB, MBBI, DL, true);
  1835. }
  1836. // X86 Interrupt handling function cannot assume anything about the direction
  1837. // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
  1838. // in each prologue of interrupt handler function.
  1839. //
  1840. // FIXME: Create "cld" instruction only in these cases:
  1841. // 1. The interrupt handling function uses any of the "rep" instructions.
  1842. // 2. Interrupt handling function calls another function.
  1843. //
  1844. if (Fn.getCallingConv() == CallingConv::X86_INTR)
  1845. BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
  1846. .setMIFlag(MachineInstr::FrameSetup);
  1847. // At this point we know if the function has WinCFI or not.
  1848. MF.setHasWinCFI(HasWinCFI);
  1849. }
  1850. bool X86FrameLowering::canUseLEAForSPInEpilogue(
  1851. const MachineFunction &MF) const {
  1852. // We can't use LEA instructions for adjusting the stack pointer if we don't
  1853. // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
  1854. // to deallocate the stack.
  1855. // This means that we can use LEA for SP in two situations:
  1856. // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
  1857. // 2. We *have* a frame pointer which means we are permitted to use LEA.
  1858. return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
  1859. }
  1860. static bool isFuncletReturnInstr(MachineInstr &MI) {
  1861. switch (MI.getOpcode()) {
  1862. case X86::CATCHRET:
  1863. case X86::CLEANUPRET:
  1864. return true;
  1865. default:
  1866. return false;
  1867. }
  1868. llvm_unreachable("impossible");
  1869. }
  1870. // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
  1871. // stack. It holds a pointer to the bottom of the root function frame. The
  1872. // establisher frame pointer passed to a nested funclet may point to the
  1873. // (mostly empty) frame of its parent funclet, but it will need to find
  1874. // the frame of the root function to access locals. To facilitate this,
  1875. // every funclet copies the pointer to the bottom of the root function
  1876. // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
  1877. // same offset for the PSPSym in the root function frame that's used in the
  1878. // funclets' frames allows each funclet to dynamically accept any ancestor
  1879. // frame as its establisher argument (the runtime doesn't guarantee the
  1880. // immediate parent for some reason lost to history), and also allows the GC,
  1881. // which uses the PSPSym for some bookkeeping, to find it in any funclet's
  1882. // frame with only a single offset reported for the entire method.
  1883. unsigned
  1884. X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
  1885. const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
  1886. Register SPReg;
  1887. int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
  1888. /*IgnoreSPUpdates*/ true)
  1889. .getFixed();
  1890. assert(Offset >= 0 && SPReg == TRI->getStackRegister());
  1891. return static_cast<unsigned>(Offset);
  1892. }
  1893. unsigned
  1894. X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
  1895. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  1896. // This is the size of the pushed CSRs.
  1897. unsigned CSSize = X86FI->getCalleeSavedFrameSize();
  1898. // This is the size of callee saved XMMs.
  1899. const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
  1900. unsigned XMMSize = WinEHXMMSlotInfo.size() *
  1901. TRI->getSpillSize(X86::VR128RegClass);
  1902. // This is the amount of stack a funclet needs to allocate.
  1903. unsigned UsedSize;
  1904. EHPersonality Personality =
  1905. classifyEHPersonality(MF.getFunction().getPersonalityFn());
  1906. if (Personality == EHPersonality::CoreCLR) {
  1907. // CLR funclets need to hold enough space to include the PSPSym, at the
  1908. // same offset from the stack pointer (immediately after the prolog) as it
  1909. // resides at in the main function.
  1910. UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
  1911. } else {
  1912. // Other funclets just need enough stack for outgoing call arguments.
  1913. UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
  1914. }
  1915. // RBP is not included in the callee saved register block. After pushing RBP,
  1916. // everything is 16 byte aligned. Everything we allocate before an outgoing
  1917. // call must also be 16 byte aligned.
  1918. unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign());
  1919. // Subtract out the size of the callee saved registers. This is how much stack
  1920. // each funclet will allocate.
  1921. return FrameSizeMinusRBP + XMMSize - CSSize;
  1922. }
  1923. static bool isTailCallOpcode(unsigned Opc) {
  1924. return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
  1925. Opc == X86::TCRETURNmi ||
  1926. Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 ||
  1927. Opc == X86::TCRETURNmi64;
  1928. }
  1929. void X86FrameLowering::emitEpilogue(MachineFunction &MF,
  1930. MachineBasicBlock &MBB) const {
  1931. const MachineFrameInfo &MFI = MF.getFrameInfo();
  1932. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  1933. MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
  1934. MachineBasicBlock::iterator MBBI = Terminator;
  1935. DebugLoc DL;
  1936. if (MBBI != MBB.end())
  1937. DL = MBBI->getDebugLoc();
  1938. // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
  1939. const bool Is64BitILP32 = STI.isTarget64BitILP32();
  1940. Register FramePtr = TRI->getFrameRegister(MF);
  1941. Register MachineFramePtr =
  1942. Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
  1943. bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  1944. bool NeedsWin64CFI =
  1945. IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
  1946. bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
  1947. // Get the number of bytes to allocate from the FrameInfo.
  1948. uint64_t StackSize = MFI.getStackSize();
  1949. uint64_t MaxAlign = calculateMaxStackAlign(MF);
  1950. unsigned CSSize = X86FI->getCalleeSavedFrameSize();
  1951. unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
  1952. bool HasFP = hasFP(MF);
  1953. uint64_t NumBytes = 0;
  1954. bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
  1955. !MF.getTarget().getTargetTriple().isOSWindows()) &&
  1956. MF.needsFrameMoves();
  1957. if (IsFunclet) {
  1958. assert(HasFP && "EH funclets without FP not yet implemented");
  1959. NumBytes = getWinEHFuncletFrameSize(MF);
  1960. } else if (HasFP) {
  1961. // Calculate required stack adjustment.
  1962. uint64_t FrameSize = StackSize - SlotSize;
  1963. NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
  1964. // Callee-saved registers were pushed on stack before the stack was
  1965. // realigned.
  1966. if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
  1967. NumBytes = alignTo(FrameSize, MaxAlign);
  1968. } else {
  1969. NumBytes = StackSize - CSSize - TailCallArgReserveSize;
  1970. }
  1971. uint64_t SEHStackAllocAmt = NumBytes;
  1972. // AfterPop is the position to insert .cfi_restore.
  1973. MachineBasicBlock::iterator AfterPop = MBBI;
  1974. if (HasFP) {
  1975. if (X86FI->hasSwiftAsyncContext()) {
  1976. // Discard the context.
  1977. int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
  1978. emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
  1979. }
  1980. // Pop EBP.
  1981. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
  1982. MachineFramePtr)
  1983. .setMIFlag(MachineInstr::FrameDestroy);
  1984. // We need to reset FP to its untagged state on return. Bit 60 is currently
  1985. // used to show the presence of an extended frame.
  1986. if (X86FI->hasSwiftAsyncContext()) {
  1987. BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
  1988. MachineFramePtr)
  1989. .addUse(MachineFramePtr)
  1990. .addImm(60)
  1991. .setMIFlag(MachineInstr::FrameDestroy);
  1992. }
  1993. if (NeedsDwarfCFI) {
  1994. unsigned DwarfStackPtr =
  1995. TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
  1996. BuildCFI(MBB, MBBI, DL,
  1997. MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
  1998. MachineInstr::FrameDestroy);
  1999. if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
  2000. unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
  2001. BuildCFI(MBB, AfterPop, DL,
  2002. MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
  2003. MachineInstr::FrameDestroy);
  2004. --MBBI;
  2005. --AfterPop;
  2006. }
  2007. --MBBI;
  2008. }
  2009. }
  2010. MachineBasicBlock::iterator FirstCSPop = MBBI;
  2011. // Skip the callee-saved pop instructions.
  2012. while (MBBI != MBB.begin()) {
  2013. MachineBasicBlock::iterator PI = std::prev(MBBI);
  2014. unsigned Opc = PI->getOpcode();
  2015. if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
  2016. if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
  2017. (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
  2018. (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
  2019. (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
  2020. break;
  2021. FirstCSPop = PI;
  2022. }
  2023. --MBBI;
  2024. }
  2025. MBBI = FirstCSPop;
  2026. if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
  2027. emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
  2028. if (MBBI != MBB.end())
  2029. DL = MBBI->getDebugLoc();
  2030. // If there is an ADD32ri or SUB32ri of ESP immediately before this
  2031. // instruction, merge the two instructions.
  2032. if (NumBytes || MFI.hasVarSizedObjects())
  2033. NumBytes += mergeSPUpdates(MBB, MBBI, true);
  2034. // If dynamic alloca is used, then reset esp to point to the last callee-saved
  2035. // slot before popping them off! Same applies for the case, when stack was
  2036. // realigned. Don't do this if this was a funclet epilogue, since the funclets
  2037. // will not do realignment or dynamic stack allocation.
  2038. if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) &&
  2039. !IsFunclet) {
  2040. if (TRI->hasStackRealignment(MF))
  2041. MBBI = FirstCSPop;
  2042. unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
  2043. uint64_t LEAAmount =
  2044. IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
  2045. if (X86FI->hasSwiftAsyncContext())
  2046. LEAAmount -= 16;
  2047. // There are only two legal forms of epilogue:
  2048. // - add SEHAllocationSize, %rsp
  2049. // - lea SEHAllocationSize(%FramePtr), %rsp
  2050. //
  2051. // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
  2052. // However, we may use this sequence if we have a frame pointer because the
  2053. // effects of the prologue can safely be undone.
  2054. if (LEAAmount != 0) {
  2055. unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
  2056. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
  2057. FramePtr, false, LEAAmount);
  2058. --MBBI;
  2059. } else {
  2060. unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
  2061. BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
  2062. .addReg(FramePtr);
  2063. --MBBI;
  2064. }
  2065. } else if (NumBytes) {
  2066. // Adjust stack pointer back: ESP += numbytes.
  2067. emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
  2068. if (!HasFP && NeedsDwarfCFI) {
  2069. // Define the current CFA rule to use the provided offset.
  2070. BuildCFI(MBB, MBBI, DL,
  2071. MCCFIInstruction::cfiDefCfaOffset(
  2072. nullptr, CSSize + TailCallArgReserveSize + SlotSize),
  2073. MachineInstr::FrameDestroy);
  2074. }
  2075. --MBBI;
  2076. }
  2077. // Windows unwinder will not invoke function's exception handler if IP is
  2078. // either in prologue or in epilogue. This behavior causes a problem when a
  2079. // call immediately precedes an epilogue, because the return address points
  2080. // into the epilogue. To cope with that, we insert an epilogue marker here,
  2081. // then replace it with a 'nop' if it ends up immediately after a CALL in the
  2082. // final emitted code.
  2083. if (NeedsWin64CFI && MF.hasWinCFI())
  2084. BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
  2085. if (!HasFP && NeedsDwarfCFI) {
  2086. MBBI = FirstCSPop;
  2087. int64_t Offset = -CSSize - SlotSize;
  2088. // Mark callee-saved pop instruction.
  2089. // Define the current CFA rule to use the provided offset.
  2090. while (MBBI != MBB.end()) {
  2091. MachineBasicBlock::iterator PI = MBBI;
  2092. unsigned Opc = PI->getOpcode();
  2093. ++MBBI;
  2094. if (Opc == X86::POP32r || Opc == X86::POP64r) {
  2095. Offset += SlotSize;
  2096. BuildCFI(MBB, MBBI, DL,
  2097. MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset),
  2098. MachineInstr::FrameDestroy);
  2099. }
  2100. }
  2101. }
  2102. // Emit DWARF info specifying the restores of the callee-saved registers.
  2103. // For epilogue with return inside or being other block without successor,
  2104. // no need to generate .cfi_restore for callee-saved registers.
  2105. if (NeedsDwarfCFI && !MBB.succ_empty())
  2106. emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
  2107. if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
  2108. // Add the return addr area delta back since we are not tail calling.
  2109. int Offset = -1 * X86FI->getTCReturnAddrDelta();
  2110. assert(Offset >= 0 && "TCDelta should never be positive");
  2111. if (Offset) {
  2112. // Check for possible merge with preceding ADD instruction.
  2113. Offset += mergeSPUpdates(MBB, Terminator, true);
  2114. emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
  2115. }
  2116. }
  2117. // Emit tilerelease for AMX kernel.
  2118. if (X86FI->hasVirtualTileReg())
  2119. BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
  2120. }
  2121. StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
  2122. int FI,
  2123. Register &FrameReg) const {
  2124. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2125. bool IsFixed = MFI.isFixedObjectIndex(FI);
  2126. // We can't calculate offset from frame pointer if the stack is realigned,
  2127. // so enforce usage of stack/base pointer. The base pointer is used when we
  2128. // have dynamic allocas in addition to dynamic realignment.
  2129. if (TRI->hasBasePointer(MF))
  2130. FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
  2131. else if (TRI->hasStackRealignment(MF))
  2132. FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
  2133. else
  2134. FrameReg = TRI->getFrameRegister(MF);
  2135. // Offset will hold the offset from the stack pointer at function entry to the
  2136. // object.
  2137. // We need to factor in additional offsets applied during the prologue to the
  2138. // frame, base, and stack pointer depending on which is used.
  2139. int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
  2140. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2141. unsigned CSSize = X86FI->getCalleeSavedFrameSize();
  2142. uint64_t StackSize = MFI.getStackSize();
  2143. bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  2144. int64_t FPDelta = 0;
  2145. // In an x86 interrupt, remove the offset we added to account for the return
  2146. // address from any stack object allocated in the caller's frame. Interrupts
  2147. // do not have a standard return address. Fixed objects in the current frame,
  2148. // such as SSE register spills, should not get this treatment.
  2149. if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
  2150. Offset >= 0) {
  2151. Offset += getOffsetOfLocalArea();
  2152. }
  2153. if (IsWin64Prologue) {
  2154. assert(!MFI.hasCalls() || (StackSize % 16) == 8);
  2155. // Calculate required stack adjustment.
  2156. uint64_t FrameSize = StackSize - SlotSize;
  2157. // If required, include space for extra hidden slot for stashing base pointer.
  2158. if (X86FI->getRestoreBasePointer())
  2159. FrameSize += SlotSize;
  2160. uint64_t NumBytes = FrameSize - CSSize;
  2161. uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
  2162. if (FI && FI == X86FI->getFAIndex())
  2163. return StackOffset::getFixed(-SEHFrameOffset);
  2164. // FPDelta is the offset from the "traditional" FP location of the old base
  2165. // pointer followed by return address and the location required by the
  2166. // restricted Win64 prologue.
  2167. // Add FPDelta to all offsets below that go through the frame pointer.
  2168. FPDelta = FrameSize - SEHFrameOffset;
  2169. assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
  2170. "FPDelta isn't aligned per the Win64 ABI!");
  2171. }
  2172. if (FrameReg == TRI->getFramePtr()) {
  2173. // Skip saved EBP/RBP
  2174. Offset += SlotSize;
  2175. // Account for restricted Windows prologue.
  2176. Offset += FPDelta;
  2177. // Skip the RETADDR move area
  2178. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
  2179. if (TailCallReturnAddrDelta < 0)
  2180. Offset -= TailCallReturnAddrDelta;
  2181. return StackOffset::getFixed(Offset);
  2182. }
  2183. // FrameReg is either the stack pointer or a base pointer. But the base is
  2184. // located at the end of the statically known StackSize so the distinction
  2185. // doesn't really matter.
  2186. if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
  2187. assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
  2188. return StackOffset::getFixed(Offset + StackSize);
  2189. }
  2190. int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
  2191. Register &FrameReg) const {
  2192. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2193. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2194. const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
  2195. const auto it = WinEHXMMSlotInfo.find(FI);
  2196. if (it == WinEHXMMSlotInfo.end())
  2197. return getFrameIndexReference(MF, FI, FrameReg).getFixed();
  2198. FrameReg = TRI->getStackRegister();
  2199. return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) +
  2200. it->second;
  2201. }
  2202. StackOffset
  2203. X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI,
  2204. Register &FrameReg,
  2205. int Adjustment) const {
  2206. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2207. FrameReg = TRI->getStackRegister();
  2208. return StackOffset::getFixed(MFI.getObjectOffset(FI) -
  2209. getOffsetOfLocalArea() + Adjustment);
  2210. }
  2211. StackOffset
  2212. X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
  2213. int FI, Register &FrameReg,
  2214. bool IgnoreSPUpdates) const {
  2215. const MachineFrameInfo &MFI = MF.getFrameInfo();
  2216. // Does not include any dynamic realign.
  2217. const uint64_t StackSize = MFI.getStackSize();
  2218. // LLVM arranges the stack as follows:
  2219. // ...
  2220. // ARG2
  2221. // ARG1
  2222. // RETADDR
  2223. // PUSH RBP <-- RBP points here
  2224. // PUSH CSRs
  2225. // ~~~~~~~ <-- possible stack realignment (non-win64)
  2226. // ...
  2227. // STACK OBJECTS
  2228. // ... <-- RSP after prologue points here
  2229. // ~~~~~~~ <-- possible stack realignment (win64)
  2230. //
  2231. // if (hasVarSizedObjects()):
  2232. // ... <-- "base pointer" (ESI/RBX) points here
  2233. // DYNAMIC ALLOCAS
  2234. // ... <-- RSP points here
  2235. //
  2236. // Case 1: In the simple case of no stack realignment and no dynamic
  2237. // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
  2238. // with fixed offsets from RSP.
  2239. //
  2240. // Case 2: In the case of stack realignment with no dynamic allocas, fixed
  2241. // stack objects are addressed with RBP and regular stack objects with RSP.
  2242. //
  2243. // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
  2244. // to address stack arguments for outgoing calls and nothing else. The "base
  2245. // pointer" points to local variables, and RBP points to fixed objects.
  2246. //
  2247. // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
  2248. // answer we give is relative to the SP after the prologue, and not the
  2249. // SP in the middle of the function.
  2250. if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) &&
  2251. !STI.isTargetWin64())
  2252. return getFrameIndexReference(MF, FI, FrameReg);
  2253. // If !hasReservedCallFrame the function might have SP adjustement in the
  2254. // body. So, even though the offset is statically known, it depends on where
  2255. // we are in the function.
  2256. if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
  2257. return getFrameIndexReference(MF, FI, FrameReg);
  2258. // We don't handle tail calls, and shouldn't be seeing them either.
  2259. assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&
  2260. "we don't handle this case!");
  2261. // This is how the math works out:
  2262. //
  2263. // %rsp grows (i.e. gets lower) left to right. Each box below is
  2264. // one word (eight bytes). Obj0 is the stack slot we're trying to
  2265. // get to.
  2266. //
  2267. // ----------------------------------
  2268. // | BP | Obj0 | Obj1 | ... | ObjN |
  2269. // ----------------------------------
  2270. // ^ ^ ^ ^
  2271. // A B C E
  2272. //
  2273. // A is the incoming stack pointer.
  2274. // (B - A) is the local area offset (-8 for x86-64) [1]
  2275. // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
  2276. //
  2277. // |(E - B)| is the StackSize (absolute value, positive). For a
  2278. // stack that grown down, this works out to be (B - E). [3]
  2279. //
  2280. // E is also the value of %rsp after stack has been set up, and we
  2281. // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
  2282. // (C - E) == (C - A) - (B - A) + (B - E)
  2283. // { Using [1], [2] and [3] above }
  2284. // == getObjectOffset - LocalAreaOffset + StackSize
  2285. return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
  2286. }
  2287. bool X86FrameLowering::assignCalleeSavedSpillSlots(
  2288. MachineFunction &MF, const TargetRegisterInfo *TRI,
  2289. std::vector<CalleeSavedInfo> &CSI) const {
  2290. MachineFrameInfo &MFI = MF.getFrameInfo();
  2291. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2292. unsigned CalleeSavedFrameSize = 0;
  2293. unsigned XMMCalleeSavedFrameSize = 0;
  2294. auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
  2295. int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
  2296. int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
  2297. if (TailCallReturnAddrDelta < 0) {
  2298. // create RETURNADDR area
  2299. // arg
  2300. // arg
  2301. // RETADDR
  2302. // { ...
  2303. // RETADDR area
  2304. // ...
  2305. // }
  2306. // [EBP]
  2307. MFI.CreateFixedObject(-TailCallReturnAddrDelta,
  2308. TailCallReturnAddrDelta - SlotSize, true);
  2309. }
  2310. // Spill the BasePtr if it's used.
  2311. if (this->TRI->hasBasePointer(MF)) {
  2312. // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
  2313. if (MF.hasEHFunclets()) {
  2314. int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize));
  2315. X86FI->setHasSEHFramePtrSave(true);
  2316. X86FI->setSEHFramePtrSaveIndex(FI);
  2317. }
  2318. }
  2319. if (hasFP(MF)) {
  2320. // emitPrologue always spills frame register the first thing.
  2321. SpillSlotOffset -= SlotSize;
  2322. MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
  2323. // The async context lives directly before the frame pointer, and we
  2324. // allocate a second slot to preserve stack alignment.
  2325. if (X86FI->hasSwiftAsyncContext()) {
  2326. SpillSlotOffset -= SlotSize;
  2327. MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
  2328. SpillSlotOffset -= SlotSize;
  2329. }
  2330. // Since emitPrologue and emitEpilogue will handle spilling and restoring of
  2331. // the frame register, we can delete it from CSI list and not have to worry
  2332. // about avoiding it later.
  2333. Register FPReg = TRI->getFrameRegister(MF);
  2334. for (unsigned i = 0; i < CSI.size(); ++i) {
  2335. if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
  2336. CSI.erase(CSI.begin() + i);
  2337. break;
  2338. }
  2339. }
  2340. }
  2341. // Assign slots for GPRs. It increases frame size.
  2342. for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2343. Register Reg = I.getReg();
  2344. if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
  2345. continue;
  2346. SpillSlotOffset -= SlotSize;
  2347. CalleeSavedFrameSize += SlotSize;
  2348. int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
  2349. I.setFrameIdx(SlotIndex);
  2350. }
  2351. X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
  2352. MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
  2353. // Assign slots for XMMs.
  2354. for (CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2355. Register Reg = I.getReg();
  2356. if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
  2357. continue;
  2358. // If this is k-register make sure we lookup via the largest legal type.
  2359. MVT VT = MVT::Other;
  2360. if (X86::VK16RegClass.contains(Reg))
  2361. VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
  2362. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
  2363. unsigned Size = TRI->getSpillSize(*RC);
  2364. Align Alignment = TRI->getSpillAlign(*RC);
  2365. // ensure alignment
  2366. assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
  2367. SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment);
  2368. // spill into slot
  2369. SpillSlotOffset -= Size;
  2370. int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
  2371. I.setFrameIdx(SlotIndex);
  2372. MFI.ensureMaxAlignment(Alignment);
  2373. // Save the start offset and size of XMM in stack frame for funclets.
  2374. if (X86::VR128RegClass.contains(Reg)) {
  2375. WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
  2376. XMMCalleeSavedFrameSize += Size;
  2377. }
  2378. }
  2379. return true;
  2380. }
  2381. bool X86FrameLowering::spillCalleeSavedRegisters(
  2382. MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
  2383. ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
  2384. DebugLoc DL = MBB.findDebugLoc(MI);
  2385. // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
  2386. // for us, and there are no XMM CSRs on Win32.
  2387. if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
  2388. return true;
  2389. // Push GPRs. It increases frame size.
  2390. const MachineFunction &MF = *MBB.getParent();
  2391. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
  2392. for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2393. Register Reg = I.getReg();
  2394. if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
  2395. continue;
  2396. const MachineRegisterInfo &MRI = MF.getRegInfo();
  2397. bool isLiveIn = MRI.isLiveIn(Reg);
  2398. if (!isLiveIn)
  2399. MBB.addLiveIn(Reg);
  2400. // Decide whether we can add a kill flag to the use.
  2401. bool CanKill = !isLiveIn;
  2402. // Check if any subregister is live-in
  2403. if (CanKill) {
  2404. for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) {
  2405. if (MRI.isLiveIn(*AReg)) {
  2406. CanKill = false;
  2407. break;
  2408. }
  2409. }
  2410. }
  2411. // Do not set a kill flag on values that are also marked as live-in. This
  2412. // happens with the @llvm-returnaddress intrinsic and with arguments
  2413. // passed in callee saved registers.
  2414. // Omitting the kill flags is conservatively correct even if the live-in
  2415. // is not used after all.
  2416. BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
  2417. .setMIFlag(MachineInstr::FrameSetup);
  2418. }
  2419. // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
  2420. // It can be done by spilling XMMs to stack frame.
  2421. for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
  2422. Register Reg = I.getReg();
  2423. if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
  2424. continue;
  2425. // If this is k-register make sure we lookup via the largest legal type.
  2426. MVT VT = MVT::Other;
  2427. if (X86::VK16RegClass.contains(Reg))
  2428. VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
  2429. // Add the callee-saved register as live-in. It's killed at the spill.
  2430. MBB.addLiveIn(Reg);
  2431. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
  2432. TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI,
  2433. Register());
  2434. --MI;
  2435. MI->setFlag(MachineInstr::FrameSetup);
  2436. ++MI;
  2437. }
  2438. return true;
  2439. }
  2440. void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
  2441. MachineBasicBlock::iterator MBBI,
  2442. MachineInstr *CatchRet) const {
  2443. // SEH shouldn't use catchret.
  2444. assert(!isAsynchronousEHPersonality(classifyEHPersonality(
  2445. MBB.getParent()->getFunction().getPersonalityFn())) &&
  2446. "SEH should not use CATCHRET");
  2447. const DebugLoc &DL = CatchRet->getDebugLoc();
  2448. MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
  2449. // Fill EAX/RAX with the address of the target block.
  2450. if (STI.is64Bit()) {
  2451. // LEA64r CatchRetTarget(%rip), %rax
  2452. BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
  2453. .addReg(X86::RIP)
  2454. .addImm(0)
  2455. .addReg(0)
  2456. .addMBB(CatchRetTarget)
  2457. .addReg(0);
  2458. } else {
  2459. // MOV32ri $CatchRetTarget, %eax
  2460. BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
  2461. .addMBB(CatchRetTarget);
  2462. }
  2463. // Record that we've taken the address of CatchRetTarget and no longer just
  2464. // reference it in a terminator.
  2465. CatchRetTarget->setMachineBlockAddressTaken();
  2466. }
  2467. bool X86FrameLowering::restoreCalleeSavedRegisters(
  2468. MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
  2469. MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
  2470. if (CSI.empty())
  2471. return false;
  2472. if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
  2473. // Don't restore CSRs in 32-bit EH funclets. Matches
  2474. // spillCalleeSavedRegisters.
  2475. if (STI.is32Bit())
  2476. return true;
  2477. // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
  2478. // funclets. emitEpilogue transforms these to normal jumps.
  2479. if (MI->getOpcode() == X86::CATCHRET) {
  2480. const Function &F = MBB.getParent()->getFunction();
  2481. bool IsSEH = isAsynchronousEHPersonality(
  2482. classifyEHPersonality(F.getPersonalityFn()));
  2483. if (IsSEH)
  2484. return true;
  2485. }
  2486. }
  2487. DebugLoc DL = MBB.findDebugLoc(MI);
  2488. // Reload XMMs from stack frame.
  2489. for (const CalleeSavedInfo &I : CSI) {
  2490. Register Reg = I.getReg();
  2491. if (X86::GR64RegClass.contains(Reg) ||
  2492. X86::GR32RegClass.contains(Reg))
  2493. continue;
  2494. // If this is k-register make sure we lookup via the largest legal type.
  2495. MVT VT = MVT::Other;
  2496. if (X86::VK16RegClass.contains(Reg))
  2497. VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
  2498. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
  2499. TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI,
  2500. Register());
  2501. }
  2502. // POP GPRs.
  2503. unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
  2504. for (const CalleeSavedInfo &I : CSI) {
  2505. Register Reg = I.getReg();
  2506. if (!X86::GR64RegClass.contains(Reg) &&
  2507. !X86::GR32RegClass.contains(Reg))
  2508. continue;
  2509. BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
  2510. .setMIFlag(MachineInstr::FrameDestroy);
  2511. }
  2512. return true;
  2513. }
  2514. void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
  2515. BitVector &SavedRegs,
  2516. RegScavenger *RS) const {
  2517. TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
  2518. // Spill the BasePtr if it's used.
  2519. if (TRI->hasBasePointer(MF)){
  2520. Register BasePtr = TRI->getBaseRegister();
  2521. if (STI.isTarget64BitILP32())
  2522. BasePtr = getX86SubSuperRegister(BasePtr, 64);
  2523. SavedRegs.set(BasePtr);
  2524. }
  2525. }
  2526. static bool
  2527. HasNestArgument(const MachineFunction *MF) {
  2528. const Function &F = MF->getFunction();
  2529. for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
  2530. I != E; I++) {
  2531. if (I->hasNestAttr() && !I->use_empty())
  2532. return true;
  2533. }
  2534. return false;
  2535. }
  2536. /// GetScratchRegister - Get a temp register for performing work in the
  2537. /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
  2538. /// and the properties of the function either one or two registers will be
  2539. /// needed. Set primary to true for the first register, false for the second.
  2540. static unsigned
  2541. GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
  2542. CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
  2543. // Erlang stuff.
  2544. if (CallingConvention == CallingConv::HiPE) {
  2545. if (Is64Bit)
  2546. return Primary ? X86::R14 : X86::R13;
  2547. else
  2548. return Primary ? X86::EBX : X86::EDI;
  2549. }
  2550. if (Is64Bit) {
  2551. if (IsLP64)
  2552. return Primary ? X86::R11 : X86::R12;
  2553. else
  2554. return Primary ? X86::R11D : X86::R12D;
  2555. }
  2556. bool IsNested = HasNestArgument(&MF);
  2557. if (CallingConvention == CallingConv::X86_FastCall ||
  2558. CallingConvention == CallingConv::Fast ||
  2559. CallingConvention == CallingConv::Tail) {
  2560. if (IsNested)
  2561. report_fatal_error("Segmented stacks does not support fastcall with "
  2562. "nested function.");
  2563. return Primary ? X86::EAX : X86::ECX;
  2564. }
  2565. if (IsNested)
  2566. return Primary ? X86::EDX : X86::EAX;
  2567. return Primary ? X86::ECX : X86::EAX;
  2568. }
  2569. // The stack limit in the TCB is set to this many bytes above the actual stack
  2570. // limit.
  2571. static const uint64_t kSplitStackAvailable = 256;
  2572. void X86FrameLowering::adjustForSegmentedStacks(
  2573. MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
  2574. MachineFrameInfo &MFI = MF.getFrameInfo();
  2575. uint64_t StackSize;
  2576. unsigned TlsReg, TlsOffset;
  2577. DebugLoc DL;
  2578. // To support shrink-wrapping we would need to insert the new blocks
  2579. // at the right place and update the branches to PrologueMBB.
  2580. assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
  2581. unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
  2582. assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
  2583. "Scratch register is live-in");
  2584. if (MF.getFunction().isVarArg())
  2585. report_fatal_error("Segmented stacks do not support vararg functions.");
  2586. if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
  2587. !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
  2588. !STI.isTargetDragonFly())
  2589. report_fatal_error("Segmented stacks not supported on this platform.");
  2590. // Eventually StackSize will be calculated by a link-time pass; which will
  2591. // also decide whether checking code needs to be injected into this particular
  2592. // prologue.
  2593. StackSize = MFI.getStackSize();
  2594. if (!MFI.needsSplitStackProlog())
  2595. return;
  2596. MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
  2597. MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
  2598. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  2599. bool IsNested = false;
  2600. // We need to know if the function has a nest argument only in 64 bit mode.
  2601. if (Is64Bit)
  2602. IsNested = HasNestArgument(&MF);
  2603. // The MOV R10, RAX needs to be in a different block, since the RET we emit in
  2604. // allocMBB needs to be last (terminating) instruction.
  2605. for (const auto &LI : PrologueMBB.liveins()) {
  2606. allocMBB->addLiveIn(LI);
  2607. checkMBB->addLiveIn(LI);
  2608. }
  2609. if (IsNested)
  2610. allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
  2611. MF.push_front(allocMBB);
  2612. MF.push_front(checkMBB);
  2613. // When the frame size is less than 256 we just compare the stack
  2614. // boundary directly to the value of the stack pointer, per gcc.
  2615. bool CompareStackPointer = StackSize < kSplitStackAvailable;
  2616. // Read the limit off the current stacklet off the stack_guard location.
  2617. if (Is64Bit) {
  2618. if (STI.isTargetLinux()) {
  2619. TlsReg = X86::FS;
  2620. TlsOffset = IsLP64 ? 0x70 : 0x40;
  2621. } else if (STI.isTargetDarwin()) {
  2622. TlsReg = X86::GS;
  2623. TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
  2624. } else if (STI.isTargetWin64()) {
  2625. TlsReg = X86::GS;
  2626. TlsOffset = 0x28; // pvArbitrary, reserved for application use
  2627. } else if (STI.isTargetFreeBSD()) {
  2628. TlsReg = X86::FS;
  2629. TlsOffset = 0x18;
  2630. } else if (STI.isTargetDragonFly()) {
  2631. TlsReg = X86::FS;
  2632. TlsOffset = 0x20; // use tls_tcb.tcb_segstack
  2633. } else {
  2634. report_fatal_error("Segmented stacks not supported on this platform.");
  2635. }
  2636. if (CompareStackPointer)
  2637. ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
  2638. else
  2639. BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
  2640. .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
  2641. BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
  2642. .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
  2643. } else {
  2644. if (STI.isTargetLinux()) {
  2645. TlsReg = X86::GS;
  2646. TlsOffset = 0x30;
  2647. } else if (STI.isTargetDarwin()) {
  2648. TlsReg = X86::GS;
  2649. TlsOffset = 0x48 + 90*4;
  2650. } else if (STI.isTargetWin32()) {
  2651. TlsReg = X86::FS;
  2652. TlsOffset = 0x14; // pvArbitrary, reserved for application use
  2653. } else if (STI.isTargetDragonFly()) {
  2654. TlsReg = X86::FS;
  2655. TlsOffset = 0x10; // use tls_tcb.tcb_segstack
  2656. } else if (STI.isTargetFreeBSD()) {
  2657. report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
  2658. } else {
  2659. report_fatal_error("Segmented stacks not supported on this platform.");
  2660. }
  2661. if (CompareStackPointer)
  2662. ScratchReg = X86::ESP;
  2663. else
  2664. BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
  2665. .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
  2666. if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
  2667. STI.isTargetDragonFly()) {
  2668. BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
  2669. .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
  2670. } else if (STI.isTargetDarwin()) {
  2671. // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
  2672. unsigned ScratchReg2;
  2673. bool SaveScratch2;
  2674. if (CompareStackPointer) {
  2675. // The primary scratch register is available for holding the TLS offset.
  2676. ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
  2677. SaveScratch2 = false;
  2678. } else {
  2679. // Need to use a second register to hold the TLS offset
  2680. ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
  2681. // Unfortunately, with fastcc the second scratch register may hold an
  2682. // argument.
  2683. SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
  2684. }
  2685. // If Scratch2 is live-in then it needs to be saved.
  2686. assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
  2687. "Scratch register is live-in and not saved");
  2688. if (SaveScratch2)
  2689. BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
  2690. .addReg(ScratchReg2, RegState::Kill);
  2691. BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
  2692. .addImm(TlsOffset);
  2693. BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
  2694. .addReg(ScratchReg)
  2695. .addReg(ScratchReg2).addImm(1).addReg(0)
  2696. .addImm(0)
  2697. .addReg(TlsReg);
  2698. if (SaveScratch2)
  2699. BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
  2700. }
  2701. }
  2702. // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
  2703. // It jumps to normal execution of the function body.
  2704. BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
  2705. // On 32 bit we first push the arguments size and then the frame size. On 64
  2706. // bit, we pass the stack frame size in r10 and the argument size in r11.
  2707. if (Is64Bit) {
  2708. // Functions with nested arguments use R10, so it needs to be saved across
  2709. // the call to _morestack
  2710. const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
  2711. const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
  2712. const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
  2713. const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
  2714. if (IsNested)
  2715. BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
  2716. BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10)
  2717. .addImm(StackSize);
  2718. BuildMI(allocMBB, DL,
  2719. TII.get(getMOVriOpcode(IsLP64, X86FI->getArgumentStackSize())),
  2720. Reg11)
  2721. .addImm(X86FI->getArgumentStackSize());
  2722. } else {
  2723. BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
  2724. .addImm(X86FI->getArgumentStackSize());
  2725. BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
  2726. .addImm(StackSize);
  2727. }
  2728. // __morestack is in libgcc
  2729. if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
  2730. // Under the large code model, we cannot assume that __morestack lives
  2731. // within 2^31 bytes of the call site, so we cannot use pc-relative
  2732. // addressing. We cannot perform the call via a temporary register,
  2733. // as the rax register may be used to store the static chain, and all
  2734. // other suitable registers may be either callee-save or used for
  2735. // parameter passing. We cannot use the stack at this point either
  2736. // because __morestack manipulates the stack directly.
  2737. //
  2738. // To avoid these issues, perform an indirect call via a read-only memory
  2739. // location containing the address.
  2740. //
  2741. // This solution is not perfect, as it assumes that the .rodata section
  2742. // is laid out within 2^31 bytes of each function body, but this seems
  2743. // to be sufficient for JIT.
  2744. // FIXME: Add retpoline support and remove the error here..
  2745. if (STI.useIndirectThunkCalls())
  2746. report_fatal_error("Emitting morestack calls on 64-bit with the large "
  2747. "code model and thunks not yet implemented.");
  2748. BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
  2749. .addReg(X86::RIP)
  2750. .addImm(0)
  2751. .addReg(0)
  2752. .addExternalSymbol("__morestack_addr")
  2753. .addReg(0);
  2754. } else {
  2755. if (Is64Bit)
  2756. BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
  2757. .addExternalSymbol("__morestack");
  2758. else
  2759. BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
  2760. .addExternalSymbol("__morestack");
  2761. }
  2762. if (IsNested)
  2763. BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
  2764. else
  2765. BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
  2766. allocMBB->addSuccessor(&PrologueMBB);
  2767. checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
  2768. checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
  2769. #ifdef EXPENSIVE_CHECKS
  2770. MF.verify();
  2771. #endif
  2772. }
  2773. /// Lookup an ERTS parameter in the !hipe.literals named metadata node.
  2774. /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
  2775. /// to fields it needs, through a named metadata node "hipe.literals" containing
  2776. /// name-value pairs.
  2777. static unsigned getHiPELiteral(
  2778. NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
  2779. for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
  2780. MDNode *Node = HiPELiteralsMD->getOperand(i);
  2781. if (Node->getNumOperands() != 2) continue;
  2782. MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
  2783. ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
  2784. if (!NodeName || !NodeVal) continue;
  2785. ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
  2786. if (ValConst && NodeName->getString() == LiteralName) {
  2787. return ValConst->getZExtValue();
  2788. }
  2789. }
  2790. report_fatal_error("HiPE literal " + LiteralName
  2791. + " required but not provided");
  2792. }
  2793. // Return true if there are no non-ehpad successors to MBB and there are no
  2794. // non-meta instructions between MBBI and MBB.end().
  2795. static bool blockEndIsUnreachable(const MachineBasicBlock &MBB,
  2796. MachineBasicBlock::const_iterator MBBI) {
  2797. return llvm::all_of(
  2798. MBB.successors(),
  2799. [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
  2800. std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
  2801. return MI.isMetaInstruction();
  2802. });
  2803. }
  2804. /// Erlang programs may need a special prologue to handle the stack size they
  2805. /// might need at runtime. That is because Erlang/OTP does not implement a C
  2806. /// stack but uses a custom implementation of hybrid stack/heap architecture.
  2807. /// (for more information see Eric Stenman's Ph.D. thesis:
  2808. /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
  2809. ///
  2810. /// CheckStack:
  2811. /// temp0 = sp - MaxStack
  2812. /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
  2813. /// OldStart:
  2814. /// ...
  2815. /// IncStack:
  2816. /// call inc_stack # doubles the stack space
  2817. /// temp0 = sp - MaxStack
  2818. /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
  2819. void X86FrameLowering::adjustForHiPEPrologue(
  2820. MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
  2821. MachineFrameInfo &MFI = MF.getFrameInfo();
  2822. DebugLoc DL;
  2823. // To support shrink-wrapping we would need to insert the new blocks
  2824. // at the right place and update the branches to PrologueMBB.
  2825. assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
  2826. // HiPE-specific values
  2827. NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
  2828. ->getNamedMetadata("hipe.literals");
  2829. if (!HiPELiteralsMD)
  2830. report_fatal_error(
  2831. "Can't generate HiPE prologue without runtime parameters");
  2832. const unsigned HipeLeafWords
  2833. = getHiPELiteral(HiPELiteralsMD,
  2834. Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
  2835. const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
  2836. const unsigned Guaranteed = HipeLeafWords * SlotSize;
  2837. unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
  2838. MF.getFunction().arg_size() - CCRegisteredArgs : 0;
  2839. unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
  2840. assert(STI.isTargetLinux() &&
  2841. "HiPE prologue is only supported on Linux operating systems.");
  2842. // Compute the largest caller's frame that is needed to fit the callees'
  2843. // frames. This 'MaxStack' is computed from:
  2844. //
  2845. // a) the fixed frame size, which is the space needed for all spilled temps,
  2846. // b) outgoing on-stack parameter areas, and
  2847. // c) the minimum stack space this function needs to make available for the
  2848. // functions it calls (a tunable ABI property).
  2849. if (MFI.hasCalls()) {
  2850. unsigned MoreStackForCalls = 0;
  2851. for (auto &MBB : MF) {
  2852. for (auto &MI : MBB) {
  2853. if (!MI.isCall())
  2854. continue;
  2855. // Get callee operand.
  2856. const MachineOperand &MO = MI.getOperand(0);
  2857. // Only take account of global function calls (no closures etc.).
  2858. if (!MO.isGlobal())
  2859. continue;
  2860. const Function *F = dyn_cast<Function>(MO.getGlobal());
  2861. if (!F)
  2862. continue;
  2863. // Do not update 'MaxStack' for primitive and built-in functions
  2864. // (encoded with names either starting with "erlang."/"bif_" or not
  2865. // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
  2866. // "_", such as the BIF "suspend_0") as they are executed on another
  2867. // stack.
  2868. if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
  2869. F->getName().find_first_of("._") == StringRef::npos)
  2870. continue;
  2871. unsigned CalleeStkArity =
  2872. F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
  2873. if (HipeLeafWords - 1 > CalleeStkArity)
  2874. MoreStackForCalls = std::max(MoreStackForCalls,
  2875. (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
  2876. }
  2877. }
  2878. MaxStack += MoreStackForCalls;
  2879. }
  2880. // If the stack frame needed is larger than the guaranteed then runtime checks
  2881. // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
  2882. if (MaxStack > Guaranteed) {
  2883. MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
  2884. MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
  2885. for (const auto &LI : PrologueMBB.liveins()) {
  2886. stackCheckMBB->addLiveIn(LI);
  2887. incStackMBB->addLiveIn(LI);
  2888. }
  2889. MF.push_front(incStackMBB);
  2890. MF.push_front(stackCheckMBB);
  2891. unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
  2892. unsigned LEAop, CMPop, CALLop;
  2893. SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
  2894. if (Is64Bit) {
  2895. SPReg = X86::RSP;
  2896. PReg = X86::RBP;
  2897. LEAop = X86::LEA64r;
  2898. CMPop = X86::CMP64rm;
  2899. CALLop = X86::CALL64pcrel32;
  2900. } else {
  2901. SPReg = X86::ESP;
  2902. PReg = X86::EBP;
  2903. LEAop = X86::LEA32r;
  2904. CMPop = X86::CMP32rm;
  2905. CALLop = X86::CALLpcrel32;
  2906. }
  2907. ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
  2908. assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
  2909. "HiPE prologue scratch register is live-in");
  2910. // Create new MBB for StackCheck:
  2911. addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
  2912. SPReg, false, -MaxStack);
  2913. // SPLimitOffset is in a fixed heap location (pointed by BP).
  2914. addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
  2915. .addReg(ScratchReg), PReg, false, SPLimitOffset);
  2916. BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
  2917. // Create new MBB for IncStack:
  2918. BuildMI(incStackMBB, DL, TII.get(CALLop)).
  2919. addExternalSymbol("inc_stack_0");
  2920. addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
  2921. SPReg, false, -MaxStack);
  2922. addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
  2923. .addReg(ScratchReg), PReg, false, SPLimitOffset);
  2924. BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
  2925. stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
  2926. stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
  2927. incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
  2928. incStackMBB->addSuccessor(incStackMBB, {1, 100});
  2929. }
  2930. #ifdef EXPENSIVE_CHECKS
  2931. MF.verify();
  2932. #endif
  2933. }
  2934. bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
  2935. MachineBasicBlock::iterator MBBI,
  2936. const DebugLoc &DL,
  2937. int Offset) const {
  2938. if (Offset <= 0)
  2939. return false;
  2940. if (Offset % SlotSize)
  2941. return false;
  2942. int NumPops = Offset / SlotSize;
  2943. // This is only worth it if we have at most 2 pops.
  2944. if (NumPops != 1 && NumPops != 2)
  2945. return false;
  2946. // Handle only the trivial case where the adjustment directly follows
  2947. // a call. This is the most common one, anyway.
  2948. if (MBBI == MBB.begin())
  2949. return false;
  2950. MachineBasicBlock::iterator Prev = std::prev(MBBI);
  2951. if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
  2952. return false;
  2953. unsigned Regs[2];
  2954. unsigned FoundRegs = 0;
  2955. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
  2956. const MachineOperand &RegMask = Prev->getOperand(1);
  2957. auto &RegClass =
  2958. Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
  2959. // Try to find up to NumPops free registers.
  2960. for (auto Candidate : RegClass) {
  2961. // Poor man's liveness:
  2962. // Since we're immediately after a call, any register that is clobbered
  2963. // by the call and not defined by it can be considered dead.
  2964. if (!RegMask.clobbersPhysReg(Candidate))
  2965. continue;
  2966. // Don't clobber reserved registers
  2967. if (MRI.isReserved(Candidate))
  2968. continue;
  2969. bool IsDef = false;
  2970. for (const MachineOperand &MO : Prev->implicit_operands()) {
  2971. if (MO.isReg() && MO.isDef() &&
  2972. TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
  2973. IsDef = true;
  2974. break;
  2975. }
  2976. }
  2977. if (IsDef)
  2978. continue;
  2979. Regs[FoundRegs++] = Candidate;
  2980. if (FoundRegs == (unsigned)NumPops)
  2981. break;
  2982. }
  2983. if (FoundRegs == 0)
  2984. return false;
  2985. // If we found only one free register, but need two, reuse the same one twice.
  2986. while (FoundRegs < (unsigned)NumPops)
  2987. Regs[FoundRegs++] = Regs[0];
  2988. for (int i = 0; i < NumPops; ++i)
  2989. BuildMI(MBB, MBBI, DL,
  2990. TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
  2991. return true;
  2992. }
  2993. MachineBasicBlock::iterator X86FrameLowering::
  2994. eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
  2995. MachineBasicBlock::iterator I) const {
  2996. bool reserveCallFrame = hasReservedCallFrame(MF);
  2997. unsigned Opcode = I->getOpcode();
  2998. bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
  2999. DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased.
  3000. uint64_t Amount = TII.getFrameSize(*I);
  3001. uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
  3002. I = MBB.erase(I);
  3003. auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
  3004. // Try to avoid emitting dead SP adjustments if the block end is unreachable,
  3005. // typically because the function is marked noreturn (abort, throw,
  3006. // assert_fail, etc).
  3007. if (isDestroy && blockEndIsUnreachable(MBB, I))
  3008. return I;
  3009. if (!reserveCallFrame) {
  3010. // If the stack pointer can be changed after prologue, turn the
  3011. // adjcallstackup instruction into a 'sub ESP, <amt>' and the
  3012. // adjcallstackdown instruction into 'add ESP, <amt>'
  3013. // We need to keep the stack aligned properly. To do this, we round the
  3014. // amount of space needed for the outgoing arguments up to the next
  3015. // alignment boundary.
  3016. Amount = alignTo(Amount, getStackAlign());
  3017. const Function &F = MF.getFunction();
  3018. bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
  3019. bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
  3020. // If we have any exception handlers in this function, and we adjust
  3021. // the SP before calls, we may need to indicate this to the unwinder
  3022. // using GNU_ARGS_SIZE. Note that this may be necessary even when
  3023. // Amount == 0, because the preceding function may have set a non-0
  3024. // GNU_ARGS_SIZE.
  3025. // TODO: We don't need to reset this between subsequent functions,
  3026. // if it didn't change.
  3027. bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
  3028. if (HasDwarfEHHandlers && !isDestroy &&
  3029. MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
  3030. BuildCFI(MBB, InsertPos, DL,
  3031. MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
  3032. if (Amount == 0)
  3033. return I;
  3034. // Factor out the amount that gets handled inside the sequence
  3035. // (Pushes of argument for frame setup, callee pops for frame destroy)
  3036. Amount -= InternalAmt;
  3037. // TODO: This is needed only if we require precise CFA.
  3038. // If this is a callee-pop calling convention, emit a CFA adjust for
  3039. // the amount the callee popped.
  3040. if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
  3041. BuildCFI(MBB, InsertPos, DL,
  3042. MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
  3043. // Add Amount to SP to destroy a frame, or subtract to setup.
  3044. int64_t StackAdjustment = isDestroy ? Amount : -Amount;
  3045. if (StackAdjustment) {
  3046. // Merge with any previous or following adjustment instruction. Note: the
  3047. // instructions merged with here do not have CFI, so their stack
  3048. // adjustments do not feed into CfaAdjustment.
  3049. StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
  3050. StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
  3051. if (StackAdjustment) {
  3052. if (!(F.hasMinSize() &&
  3053. adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
  3054. BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
  3055. /*InEpilogue=*/false);
  3056. }
  3057. }
  3058. if (DwarfCFI && !hasFP(MF)) {
  3059. // If we don't have FP, but need to generate unwind information,
  3060. // we need to set the correct CFA offset after the stack adjustment.
  3061. // How much we adjust the CFA offset depends on whether we're emitting
  3062. // CFI only for EH purposes or for debugging. EH only requires the CFA
  3063. // offset to be correct at each call site, while for debugging we want
  3064. // it to be more precise.
  3065. int64_t CfaAdjustment = -StackAdjustment;
  3066. // TODO: When not using precise CFA, we also need to adjust for the
  3067. // InternalAmt here.
  3068. if (CfaAdjustment) {
  3069. BuildCFI(MBB, InsertPos, DL,
  3070. MCCFIInstruction::createAdjustCfaOffset(nullptr,
  3071. CfaAdjustment));
  3072. }
  3073. }
  3074. return I;
  3075. }
  3076. if (InternalAmt) {
  3077. MachineBasicBlock::iterator CI = I;
  3078. MachineBasicBlock::iterator B = MBB.begin();
  3079. while (CI != B && !std::prev(CI)->isCall())
  3080. --CI;
  3081. BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
  3082. }
  3083. return I;
  3084. }
  3085. bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
  3086. assert(MBB.getParent() && "Block is not attached to a function!");
  3087. const MachineFunction &MF = *MBB.getParent();
  3088. if (!MBB.isLiveIn(X86::EFLAGS))
  3089. return true;
  3090. // If stack probes have to loop inline or call, that will clobber EFLAGS.
  3091. // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
  3092. const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
  3093. const X86TargetLowering &TLI = *STI.getTargetLowering();
  3094. if (TLI.hasInlineStackProbe(MF) || TLI.hasStackProbeSymbol(MF))
  3095. return false;
  3096. const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  3097. return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
  3098. }
  3099. bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
  3100. assert(MBB.getParent() && "Block is not attached to a function!");
  3101. // Win64 has strict requirements in terms of epilogue and we are
  3102. // not taking a chance at messing with them.
  3103. // I.e., unless this block is already an exit block, we can't use
  3104. // it as an epilogue.
  3105. if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
  3106. return false;
  3107. // Swift async context epilogue has a BTR instruction that clobbers parts of
  3108. // EFLAGS.
  3109. const MachineFunction &MF = *MBB.getParent();
  3110. if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext())
  3111. return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
  3112. if (canUseLEAForSPInEpilogue(*MBB.getParent()))
  3113. return true;
  3114. // If we cannot use LEA to adjust SP, we may need to use ADD, which
  3115. // clobbers the EFLAGS. Check that we do not need to preserve it,
  3116. // otherwise, conservatively assume this is not
  3117. // safe to insert the epilogue here.
  3118. return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
  3119. }
  3120. bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
  3121. // If we may need to emit frameless compact unwind information, give
  3122. // up as this is currently broken: PR25614.
  3123. bool CompactUnwind =
  3124. MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() !=
  3125. nullptr;
  3126. return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
  3127. !CompactUnwind) &&
  3128. // The lowering of segmented stack and HiPE only support entry
  3129. // blocks as prologue blocks: PR26107. This limitation may be
  3130. // lifted if we fix:
  3131. // - adjustForSegmentedStacks
  3132. // - adjustForHiPEPrologue
  3133. MF.getFunction().getCallingConv() != CallingConv::HiPE &&
  3134. !MF.shouldSplitStack();
  3135. }
  3136. MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
  3137. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  3138. const DebugLoc &DL, bool RestoreSP) const {
  3139. assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
  3140. assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
  3141. assert(STI.is32Bit() && !Uses64BitFramePtr &&
  3142. "restoring EBP/ESI on non-32-bit target");
  3143. MachineFunction &MF = *MBB.getParent();
  3144. Register FramePtr = TRI->getFrameRegister(MF);
  3145. Register BasePtr = TRI->getBaseRegister();
  3146. WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
  3147. X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  3148. MachineFrameInfo &MFI = MF.getFrameInfo();
  3149. // FIXME: Don't set FrameSetup flag in catchret case.
  3150. int FI = FuncInfo.EHRegNodeFrameIndex;
  3151. int EHRegSize = MFI.getObjectSize(FI);
  3152. if (RestoreSP) {
  3153. // MOV32rm -EHRegSize(%ebp), %esp
  3154. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
  3155. X86::EBP, true, -EHRegSize)
  3156. .setMIFlag(MachineInstr::FrameSetup);
  3157. }
  3158. Register UsedReg;
  3159. int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed();
  3160. int EndOffset = -EHRegOffset - EHRegSize;
  3161. FuncInfo.EHRegNodeEndOffset = EndOffset;
  3162. if (UsedReg == FramePtr) {
  3163. // ADD $offset, %ebp
  3164. unsigned ADDri = getADDriOpcode(false, EndOffset);
  3165. BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
  3166. .addReg(FramePtr)
  3167. .addImm(EndOffset)
  3168. .setMIFlag(MachineInstr::FrameSetup)
  3169. ->getOperand(3)
  3170. .setIsDead();
  3171. assert(EndOffset >= 0 &&
  3172. "end of registration object above normal EBP position!");
  3173. } else if (UsedReg == BasePtr) {
  3174. // LEA offset(%ebp), %esi
  3175. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
  3176. FramePtr, false, EndOffset)
  3177. .setMIFlag(MachineInstr::FrameSetup);
  3178. // MOV32rm SavedEBPOffset(%esi), %ebp
  3179. assert(X86FI->getHasSEHFramePtrSave());
  3180. int Offset =
  3181. getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg)
  3182. .getFixed();
  3183. assert(UsedReg == BasePtr);
  3184. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
  3185. UsedReg, true, Offset)
  3186. .setMIFlag(MachineInstr::FrameSetup);
  3187. } else {
  3188. llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
  3189. }
  3190. return MBBI;
  3191. }
  3192. int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
  3193. return TRI->getSlotSize();
  3194. }
  3195. Register
  3196. X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
  3197. return TRI->getDwarfRegNum(StackPtr, true);
  3198. }
  3199. namespace {
  3200. // Struct used by orderFrameObjects to help sort the stack objects.
  3201. struct X86FrameSortingObject {
  3202. bool IsValid = false; // true if we care about this Object.
  3203. unsigned ObjectIndex = 0; // Index of Object into MFI list.
  3204. unsigned ObjectSize = 0; // Size of Object in bytes.
  3205. Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
  3206. unsigned ObjectNumUses = 0; // Object static number of uses.
  3207. };
  3208. // The comparison function we use for std::sort to order our local
  3209. // stack symbols. The current algorithm is to use an estimated
  3210. // "density". This takes into consideration the size and number of
  3211. // uses each object has in order to roughly minimize code size.
  3212. // So, for example, an object of size 16B that is referenced 5 times
  3213. // will get higher priority than 4 4B objects referenced 1 time each.
  3214. // It's not perfect and we may be able to squeeze a few more bytes out of
  3215. // it (for example : 0(esp) requires fewer bytes, symbols allocated at the
  3216. // fringe end can have special consideration, given their size is less
  3217. // important, etc.), but the algorithmic complexity grows too much to be
  3218. // worth the extra gains we get. This gets us pretty close.
  3219. // The final order leaves us with objects with highest priority going
  3220. // at the end of our list.
  3221. struct X86FrameSortingComparator {
  3222. inline bool operator()(const X86FrameSortingObject &A,
  3223. const X86FrameSortingObject &B) const {
  3224. uint64_t DensityAScaled, DensityBScaled;
  3225. // For consistency in our comparison, all invalid objects are placed
  3226. // at the end. This also allows us to stop walking when we hit the
  3227. // first invalid item after it's all sorted.
  3228. if (!A.IsValid)
  3229. return false;
  3230. if (!B.IsValid)
  3231. return true;
  3232. // The density is calculated by doing :
  3233. // (double)DensityA = A.ObjectNumUses / A.ObjectSize
  3234. // (double)DensityB = B.ObjectNumUses / B.ObjectSize
  3235. // Since this approach may cause inconsistencies in
  3236. // the floating point <, >, == comparisons, depending on the floating
  3237. // point model with which the compiler was built, we're going
  3238. // to scale both sides by multiplying with
  3239. // A.ObjectSize * B.ObjectSize. This ends up factoring away
  3240. // the division and, with it, the need for any floating point
  3241. // arithmetic.
  3242. DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
  3243. static_cast<uint64_t>(B.ObjectSize);
  3244. DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
  3245. static_cast<uint64_t>(A.ObjectSize);
  3246. // If the two densities are equal, prioritize highest alignment
  3247. // objects. This allows for similar alignment objects
  3248. // to be packed together (given the same density).
  3249. // There's room for improvement here, also, since we can pack
  3250. // similar alignment (different density) objects next to each
  3251. // other to save padding. This will also require further
  3252. // complexity/iterations, and the overall gain isn't worth it,
  3253. // in general. Something to keep in mind, though.
  3254. if (DensityAScaled == DensityBScaled)
  3255. return A.ObjectAlignment < B.ObjectAlignment;
  3256. return DensityAScaled < DensityBScaled;
  3257. }
  3258. };
  3259. } // namespace
  3260. // Order the symbols in the local stack.
  3261. // We want to place the local stack objects in some sort of sensible order.
  3262. // The heuristic we use is to try and pack them according to static number
  3263. // of uses and size of object in order to minimize code size.
  3264. void X86FrameLowering::orderFrameObjects(
  3265. const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
  3266. const MachineFrameInfo &MFI = MF.getFrameInfo();
  3267. // Don't waste time if there's nothing to do.
  3268. if (ObjectsToAllocate.empty())
  3269. return;
  3270. // Create an array of all MFI objects. We won't need all of these
  3271. // objects, but we're going to create a full array of them to make
  3272. // it easier to index into when we're counting "uses" down below.
  3273. // We want to be able to easily/cheaply access an object by simply
  3274. // indexing into it, instead of having to search for it every time.
  3275. std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
  3276. // Walk the objects we care about and mark them as such in our working
  3277. // struct.
  3278. for (auto &Obj : ObjectsToAllocate) {
  3279. SortingObjects[Obj].IsValid = true;
  3280. SortingObjects[Obj].ObjectIndex = Obj;
  3281. SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj);
  3282. // Set the size.
  3283. int ObjectSize = MFI.getObjectSize(Obj);
  3284. if (ObjectSize == 0)
  3285. // Variable size. Just use 4.
  3286. SortingObjects[Obj].ObjectSize = 4;
  3287. else
  3288. SortingObjects[Obj].ObjectSize = ObjectSize;
  3289. }
  3290. // Count the number of uses for each object.
  3291. for (auto &MBB : MF) {
  3292. for (auto &MI : MBB) {
  3293. if (MI.isDebugInstr())
  3294. continue;
  3295. for (const MachineOperand &MO : MI.operands()) {
  3296. // Check to see if it's a local stack symbol.
  3297. if (!MO.isFI())
  3298. continue;
  3299. int Index = MO.getIndex();
  3300. // Check to see if it falls within our range, and is tagged
  3301. // to require ordering.
  3302. if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
  3303. SortingObjects[Index].IsValid)
  3304. SortingObjects[Index].ObjectNumUses++;
  3305. }
  3306. }
  3307. }
  3308. // Sort the objects using X86FrameSortingAlgorithm (see its comment for
  3309. // info).
  3310. llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
  3311. // Now modify the original list to represent the final order that
  3312. // we want. The order will depend on whether we're going to access them
  3313. // from the stack pointer or the frame pointer. For SP, the list should
  3314. // end up with the END containing objects that we want with smaller offsets.
  3315. // For FP, it should be flipped.
  3316. int i = 0;
  3317. for (auto &Obj : SortingObjects) {
  3318. // All invalid items are sorted at the end, so it's safe to stop.
  3319. if (!Obj.IsValid)
  3320. break;
  3321. ObjectsToAllocate[i++] = Obj.ObjectIndex;
  3322. }
  3323. // Flip it if we're accessing off of the FP.
  3324. if (!TRI->hasStackRealignment(MF) && hasFP(MF))
  3325. std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
  3326. }
  3327. unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
  3328. // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
  3329. unsigned Offset = 16;
  3330. // RBP is immediately pushed.
  3331. Offset += SlotSize;
  3332. // All callee-saved registers are then pushed.
  3333. Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
  3334. // Every funclet allocates enough stack space for the largest outgoing call.
  3335. Offset += getWinEHFuncletFrameSize(MF);
  3336. return Offset;
  3337. }
  3338. void X86FrameLowering::processFunctionBeforeFrameFinalized(
  3339. MachineFunction &MF, RegScavenger *RS) const {
  3340. // Mark the function as not having WinCFI. We will set it back to true in
  3341. // emitPrologue if it gets called and emits CFI.
  3342. MF.setHasWinCFI(false);
  3343. // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
  3344. // aligned. The format doesn't support misaligned stack adjustments.
  3345. if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
  3346. MF.getFrameInfo().ensureMaxAlignment(Align(SlotSize));
  3347. // If this function isn't doing Win64-style C++ EH, we don't need to do
  3348. // anything.
  3349. if (STI.is64Bit() && MF.hasEHFunclets() &&
  3350. classifyEHPersonality(MF.getFunction().getPersonalityFn()) ==
  3351. EHPersonality::MSVC_CXX) {
  3352. adjustFrameForMsvcCxxEh(MF);
  3353. }
  3354. }
  3355. void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
  3356. // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
  3357. // relative to RSP after the prologue. Find the offset of the last fixed
  3358. // object, so that we can allocate a slot immediately following it. If there
  3359. // were no fixed objects, use offset -SlotSize, which is immediately after the
  3360. // return address. Fixed objects have negative frame indices.
  3361. MachineFrameInfo &MFI = MF.getFrameInfo();
  3362. WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
  3363. int64_t MinFixedObjOffset = -SlotSize;
  3364. for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
  3365. MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
  3366. for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
  3367. for (WinEHHandlerType &H : TBME.HandlerArray) {
  3368. int FrameIndex = H.CatchObj.FrameIndex;
  3369. if (FrameIndex != INT_MAX) {
  3370. // Ensure alignment.
  3371. unsigned Align = MFI.getObjectAlign(FrameIndex).value();
  3372. MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
  3373. MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
  3374. MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
  3375. }
  3376. }
  3377. }
  3378. // Ensure alignment.
  3379. MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
  3380. int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
  3381. int UnwindHelpFI =
  3382. MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
  3383. EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
  3384. // Store -2 into UnwindHelp on function entry. We have to scan forwards past
  3385. // other frame setup instructions.
  3386. MachineBasicBlock &MBB = MF.front();
  3387. auto MBBI = MBB.begin();
  3388. while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
  3389. ++MBBI;
  3390. DebugLoc DL = MBB.findDebugLoc(MBBI);
  3391. addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
  3392. UnwindHelpFI)
  3393. .addImm(-2);
  3394. }
  3395. void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
  3396. MachineFunction &MF, RegScavenger *RS) const {
  3397. if (STI.is32Bit() && MF.hasEHFunclets())
  3398. restoreWinEHStackPointersInParent(MF);
  3399. }
  3400. void X86FrameLowering::restoreWinEHStackPointersInParent(
  3401. MachineFunction &MF) const {
  3402. // 32-bit functions have to restore stack pointers when control is transferred
  3403. // back to the parent function. These blocks are identified as eh pads that
  3404. // are not funclet entries.
  3405. bool IsSEH = isAsynchronousEHPersonality(
  3406. classifyEHPersonality(MF.getFunction().getPersonalityFn()));
  3407. for (MachineBasicBlock &MBB : MF) {
  3408. bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry();
  3409. if (NeedsRestore)
  3410. restoreWin32EHStackPointers(MBB, MBB.begin(), DebugLoc(),
  3411. /*RestoreSP=*/IsSEH);
  3412. }
  3413. }